diff --git a/[refs] b/[refs] index 928085155501..9b7a6b1e1be7 100644 --- a/[refs] +++ b/[refs] @@ -1,2 +1,2 @@ --- -refs/heads/master: a4c12d6c5dde48c69464baf7c703e425ee511433 +refs/heads/master: 1c3c07e9f6cc50dab2aeb8051325e317d4f6c70e diff --git a/trunk/CREDITS b/trunk/CREDITS index cc3453a55fb9..0fe904ebb7c7 100644 --- a/trunk/CREDITS +++ b/trunk/CREDITS @@ -2384,13 +2384,6 @@ N: Thomas Molina E: tmolina@cablespeed.com D: bug fixes, documentation, minor hackery -N: Paul Moore -E: paul.moore@hp.com -D: NetLabel author -S: Hewlett-Packard -S: 110 Spit Brook Road -S: Nashua, NH 03062 - N: James Morris E: jmorris@namei.org W: http://namei.org/ diff --git a/trunk/Documentation/00-INDEX b/trunk/Documentation/00-INDEX index 02457ec9c94f..5f7f7d7f77d2 100644 --- a/trunk/Documentation/00-INDEX +++ b/trunk/Documentation/00-INDEX @@ -184,8 +184,6 @@ mtrr.txt - how to use PPro Memory Type Range Registers to increase performance. nbd.txt - info on a TCP implementation of a network block device. -netlabel/ - - directory with information on the NetLabel subsystem. networking/ - directory with info on various aspects of networking with Linux. nfsroot.txt diff --git a/trunk/Documentation/netlabel/00-INDEX b/trunk/Documentation/netlabel/00-INDEX deleted file mode 100644 index 837bf35990e2..000000000000 --- a/trunk/Documentation/netlabel/00-INDEX +++ /dev/null @@ -1,10 +0,0 @@ -00-INDEX - - this file. -cipso_ipv4.txt - - documentation on the IPv4 CIPSO protocol engine. -draft-ietf-cipso-ipsecurity-01.txt - - IETF draft of the CIPSO protocol, dated 16 July 1992. -introduction.txt - - NetLabel introduction, READ THIS FIRST. -lsm_interface.txt - - documentation on the NetLabel kernel security module API. diff --git a/trunk/Documentation/netlabel/cipso_ipv4.txt b/trunk/Documentation/netlabel/cipso_ipv4.txt deleted file mode 100644 index 93dacb132c3c..000000000000 --- a/trunk/Documentation/netlabel/cipso_ipv4.txt +++ /dev/null @@ -1,48 +0,0 @@ -NetLabel CIPSO/IPv4 Protocol Engine -============================================================================== -Paul Moore, paul.moore@hp.com - -May 17, 2006 - - * Overview - -The NetLabel CIPSO/IPv4 protocol engine is based on the IETF Commercial IP -Security Option (CIPSO) draft from July 16, 1992. A copy of this draft can be -found in this directory, consult '00-INDEX' for the filename. While the IETF -draft never made it to an RFC standard it has become a de-facto standard for -labeled networking and is used in many trusted operating systems. - - * Outbound Packet Processing - -The CIPSO/IPv4 protocol engine applies the CIPSO IP option to packets by -adding the CIPSO label to the socket. This causes all packets leaving the -system through the socket to have the CIPSO IP option applied. The socket's -CIPSO label can be changed at any point in time, however, it is recommended -that it is set upon the socket's creation. The LSM can set the socket's CIPSO -label by using the NetLabel security module API; if the NetLabel "domain" is -configured to use CIPSO for packet labeling then a CIPSO IP option will be -generated and attached to the socket. - - * Inbound Packet Processing - -The CIPSO/IPv4 protocol engine validates every CIPSO IP option it finds at the -IP layer without any special handling required by the LSM. However, in order -to decode and translate the CIPSO label on the packet the LSM must use the -NetLabel security module API to extract the security attributes of the packet. -This is typically done at the socket layer using the 'socket_sock_rcv_skb()' -LSM hook. - - * Label Translation - -The CIPSO/IPv4 protocol engine contains a mechanism to translate CIPSO security -attributes such as sensitivity level and category to values which are -appropriate for the host. These mappings are defined as part of a CIPSO -Domain Of Interpretation (DOI) definition and are configured through the -NetLabel user space communication layer. Each DOI definition can have a -different security attribute mapping table. - - * Label Translation Cache - -The NetLabel system provides a framework for caching security attribute -mappings from the network labels to the corresponding LSM identifiers. The -CIPSO/IPv4 protocol engine supports this caching mechanism. diff --git a/trunk/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt b/trunk/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt deleted file mode 100644 index 256c2c9d4f50..000000000000 --- a/trunk/Documentation/netlabel/draft-ietf-cipso-ipsecurity-01.txt +++ /dev/null @@ -1,791 +0,0 @@ -IETF CIPSO Working Group -16 July, 1992 - - - - COMMERCIAL IP SECURITY OPTION (CIPSO 2.2) - - - -1. Status - -This Internet Draft provides the high level specification for a Commercial -IP Security Option (CIPSO). This draft reflects the version as approved by -the CIPSO IETF Working Group. Distribution of this memo is unlimited. - -This document is an Internet Draft. Internet Drafts are working documents -of the Internet Engineering Task Force (IETF), its Areas, and its Working -Groups. Note that other groups may also distribute working documents as -Internet Drafts. - -Internet Drafts are draft documents valid for a maximum of six months. -Internet Drafts may be updated, replaced, or obsoleted by other documents -at any time. It is not appropriate to use Internet Drafts as reference -material or to cite them other than as a "working draft" or "work in -progress." - -Please check the I-D abstract listing contained in each Internet Draft -directory to learn the current status of this or any other Internet Draft. - - - - -2. Background - -Currently the Internet Protocol includes two security options. One of -these options is the DoD Basic Security Option (BSO) (Type 130) which allows -IP datagrams to be labeled with security classifications. This option -provides sixteen security classifications and a variable number of handling -restrictions. To handle additional security information, such as security -categories or compartments, another security option (Type 133) exists and -is referred to as the DoD Extended Security Option (ESO). The values for -the fixed fields within these two options are administered by the Defense -Information Systems Agency (DISA). - -Computer vendors are now building commercial operating systems with -mandatory access controls and multi-level security. These systems are -no longer built specifically for a particular group in the defense or -intelligence communities. They are generally available commercial systems -for use in a variety of government and civil sector environments. - -The small number of ESO format codes can not support all the possible -applications of a commercial security option. The BSO and ESO were -designed to only support the United States DoD. CIPSO has been designed -to support multiple security policies. This Internet Draft provides the -format and procedures required to support a Mandatory Access Control -security policy. Support for additional security policies shall be -defined in future RFCs. - - - - -Internet Draft, Expires 15 Jan 93 [PAGE 1] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - - -3. CIPSO Format - -Option type: 134 (Class 0, Number 6, Copy on Fragmentation) -Option length: Variable - -This option permits security related information to be passed between -systems within a single Domain of Interpretation (DOI). A DOI is a -collection of systems which agree on the meaning of particular values -in the security option. An authority that has been assigned a DOI -identifier will define a mapping between appropriate CIPSO field values -and their human readable equivalent. This authority will distribute that -mapping to hosts within the authority's domain. These mappings may be -sensitive, therefore a DOI authority is not required to make these -mappings available to anyone other than the systems that are included in -the DOI. - -This option MUST be copied on fragmentation. This option appears at most -once in a datagram. All multi-octet fields in the option are defined to be -transmitted in network byte order. The format of this option is as follows: - -+----------+----------+------//------+-----------//---------+ -| 10000110 | LLLLLLLL | DDDDDDDDDDDD | TTTTTTTTTTTTTTTTTTTT | -+----------+----------+------//------+-----------//---------+ - - TYPE=134 OPTION DOMAIN OF TAGS - LENGTH INTERPRETATION - - - Figure 1. CIPSO Format - - -3.1 Type - -This field is 1 octet in length. Its value is 134. - - -3.2 Length - -This field is 1 octet in length. It is the total length of the option -including the type and length fields. With the current IP header length -restriction of 40 octets the value of this field MUST not exceed 40. - - -3.3 Domain of Interpretation Identifier - -This field is an unsigned 32 bit integer. The value 0 is reserved and MUST -not appear as the DOI identifier in any CIPSO option. Implementations -should assume that the DOI identifier field is not aligned on any particular -byte boundary. - -To conserve space in the protocol, security levels and categories are -represented by numbers rather than their ASCII equivalent. This requires -a mapping table within CIPSO hosts to map these numbers to their -corresponding ASCII representations. Non-related groups of systems may - - - -Internet Draft, Expires 15 Jan 93 [PAGE 2] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -have their own unique mappings. For example, one group of systems may -use the number 5 to represent Unclassified while another group may use the -number 1 to represent that same security level. The DOI identifier is used -to identify which mapping was used for the values within the option. - - -3.4 Tag Types - -A common format for passing security related information is necessary -for interoperability. CIPSO uses sets of "tags" to contain the security -information relevant to the data in the IP packet. Each tag begins with -a tag type identifier followed by the length of the tag and ends with the -actual security information to be passed. All multi-octet fields in a tag -are defined to be transmitted in network byte order. Like the DOI -identifier field in the CIPSO header, implementations should assume that -all tags, as well as fields within a tag, are not aligned on any particular -octet boundary. The tag types defined in this document contain alignment -bytes to assist alignment of some information, however alignment can not -be guaranteed if CIPSO is not the first IP option. - -CIPSO tag types 0 through 127 are reserved for defining standard tag -formats. Their definitions will be published in RFCs. Tag types whose -identifiers are greater than 127 are defined by the DOI authority and may -only be meaningful in certain Domains of Interpretation. For these tag -types, implementations will require the DOI identifier as well as the tag -number to determine the security policy and the format associated with the -tag. Use of tag types above 127 are restricted to closed networks where -interoperability with other networks will not be an issue. Implementations -that support a tag type greater than 127 MUST support at least one DOI that -requires only tag types 1 to 127. - -Tag type 0 is reserved. Tag types 1, 2, and 5 are defined in this -Internet Draft. Types 3 and 4 are reserved for work in progress. -The standard format for all current and future CIPSO tags is shown below: - -+----------+----------+--------//--------+ -| TTTTTTTT | LLLLLLLL | IIIIIIIIIIIIIIII | -+----------+----------+--------//--------+ - TAG TAG TAG - TYPE LENGTH INFORMATION - - Figure 2: Standard Tag Format - -In the three tag types described in this document, the length and count -restrictions are based on the current IP limitation of 40 octets for all -IP options. If the IP header is later expanded, then the length and count -restrictions specified in this document may increase to use the full area -provided for IP options. - - -3.4.1 Tag Type Classes - -Tag classes consist of tag types that have common processing requirements -and support the same security policy. The three tags defined in this -Internet Draft belong to the Mandatory Access Control (MAC) Sensitivity - - - -Internet Draft, Expires 15 Jan 93 [PAGE 3] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -class and support the MAC Sensitivity security policy. - - -3.4.2 Tag Type 1 - -This is referred to as the "bit-mapped" tag type. Tag type 1 is included -in the MAC Sensitivity tag type class. The format of this tag type is as -follows: - -+----------+----------+----------+----------+--------//---------+ -| 00000001 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCC | -+----------+----------+----------+----------+--------//---------+ - - TAG TAG ALIGNMENT SENSITIVITY BIT MAP OF - TYPE LENGTH OCTET LEVEL CATEGORIES - - Figure 3. Tag Type 1 Format - - -3.4.2.1 Tag Type - -This field is 1 octet in length and has a value of 1. - - -3.4.2.2 Tag Length - -This field is 1 octet in length. It is the total length of the tag type -including the type and length fields. With the current IP header length -restriction of 40 bytes the value within this field is between 4 and 34. - - -3.4.2.3 Alignment Octet - -This field is 1 octet in length and always has the value of 0. Its purpose -is to align the category bitmap field on an even octet boundary. This will -speed many implementations including router implementations. - - -3.4.2.4 Sensitivity Level - -This field is 1 octet in length. Its value is from 0 to 255. The values -are ordered with 0 being the minimum value and 255 representing the maximum -value. - - -3.4.2.5 Bit Map of Categories - -The length of this field is variable and ranges from 0 to 30 octets. This -provides representation of categories 0 to 239. The ordering of the bits -is left to right or MSB to LSB. For example category 0 is represented by -the most significant bit of the first byte and category 15 is represented -by the least significant bit of the second byte. Figure 4 graphically -shows this ordering. Bit N is binary 1 if category N is part of the label -for the datagram, and bit N is binary 0 if category N is not part of the -label. Except for the optimized tag 1 format described in the next section, - - - -Internet Draft, Expires 15 Jan 93 [PAGE 4] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -minimal encoding SHOULD be used resulting in no trailing zero octets in the -category bitmap. - - octet 0 octet 1 octet 2 octet 3 octet 4 octet 5 - XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX XXXXXXXX . . . -bit 01234567 89111111 11112222 22222233 33333333 44444444 -number 012345 67890123 45678901 23456789 01234567 - - Figure 4. Ordering of Bits in Tag 1 Bit Map - - -3.4.2.6 Optimized Tag 1 Format - -Routers work most efficiently when processing fixed length fields. To -support these routers there is an optimized form of tag type 1. The format -does not change. The only change is to the category bitmap which is set to -a constant length of 10 octets. Trailing octets required to fill out the 10 -octets are zero filled. Ten octets, allowing for 80 categories, was chosen -because it makes the total length of the CIPSO option 20 octets. If CIPSO -is the only option then the option will be full word aligned and additional -filler octets will not be required. - - -3.4.3 Tag Type 2 - -This is referred to as the "enumerated" tag type. It is used to describe -large but sparsely populated sets of categories. Tag type 2 is in the MAC -Sensitivity tag type class. The format of this tag type is as follows: - -+----------+----------+----------+----------+-------------//-------------+ -| 00000010 | LLLLLLLL | 00000000 | LLLLLLLL | CCCCCCCCCCCCCCCCCCCCCCCCCC | -+----------+----------+----------+----------+-------------//-------------+ - - TAG TAG ALIGNMENT SENSITIVITY ENUMERATED - TYPE LENGTH OCTET LEVEL CATEGORIES - - Figure 5. Tag Type 2 Format - - -3.4.3.1 Tag Type - -This field is one octet in length and has a value of 2. - - -3.4.3.2 Tag Length - -This field is 1 octet in length. It is the total length of the tag type -including the type and length fields. With the current IP header length -restriction of 40 bytes the value within this field is between 4 and 34. - - -3.4.3.3 Alignment Octet - -This field is 1 octet in length and always has the value of 0. Its purpose -is to align the category field on an even octet boundary. This will - - - -Internet Draft, Expires 15 Jan 93 [PAGE 5] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -speed many implementations including router implementations. - - -3.4.3.4 Sensitivity Level - -This field is 1 octet in length. Its value is from 0 to 255. The values -are ordered with 0 being the minimum value and 255 representing the -maximum value. - - -3.4.3.5 Enumerated Categories - -In this tag, categories are represented by their actual value rather than -by their position within a bit field. The length of each category is 2 -octets. Up to 15 categories may be represented by this tag. Valid values -for categories are 0 to 65534. Category 65535 is not a valid category -value. The categories MUST be listed in ascending order within the tag. - - -3.4.4 Tag Type 5 - -This is referred to as the "range" tag type. It is used to represent -labels where all categories in a range, or set of ranges, are included -in the sensitivity label. Tag type 5 is in the MAC Sensitivity tag type -class. The format of this tag type is as follows: - -+----------+----------+----------+----------+------------//-------------+ -| 00000101 | LLLLLLLL | 00000000 | LLLLLLLL | Top/Bottom | Top/Bottom | -+----------+----------+----------+----------+------------//-------------+ - - TAG TAG ALIGNMENT SENSITIVITY CATEGORY RANGES - TYPE LENGTH OCTET LEVEL - - Figure 6. Tag Type 5 Format - - -3.4.4.1 Tag Type - -This field is one octet in length and has a value of 5. - - -3.4.4.2 Tag Length - -This field is 1 octet in length. It is the total length of the tag type -including the type and length fields. With the current IP header length -restriction of 40 bytes the value within this field is between 4 and 34. - - -3.4.4.3 Alignment Octet - -This field is 1 octet in length and always has the value of 0. Its purpose -is to align the category range field on an even octet boundary. This will -speed many implementations including router implementations. - - - - - -Internet Draft, Expires 15 Jan 93 [PAGE 6] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -3.4.4.4 Sensitivity Level - -This field is 1 octet in length. Its value is from 0 to 255. The values -are ordered with 0 being the minimum value and 255 representing the maximum -value. - - -3.4.4.5 Category Ranges - -A category range is a 4 octet field comprised of the 2 octet index of the -highest numbered category followed by the 2 octet index of the lowest -numbered category. These range endpoints are inclusive within the range of -categories. All categories within a range are included in the sensitivity -label. This tag may contain a maximum of 7 category pairs. The bottom -category endpoint for the last pair in the tag MAY be omitted and SHOULD be -assumed to be 0. The ranges MUST be non-overlapping and be listed in -descending order. Valid values for categories are 0 to 65534. Category -65535 is not a valid category value. - - -3.4.5 Minimum Requirements - -A CIPSO implementation MUST be capable of generating at least tag type 1 in -the non-optimized form. In addition, a CIPSO implementation MUST be able -to receive any valid tag type 1 even those using the optimized tag type 1 -format. - - -4. Configuration Parameters - -The configuration parameters defined below are required for all CIPSO hosts, -gateways, and routers that support multiple sensitivity labels. A CIPSO -host is defined to be the origination or destination system for an IP -datagram. A CIPSO gateway provides IP routing services between two or more -IP networks and may be required to perform label translations between -networks. A CIPSO gateway may be an enhanced CIPSO host or it may just -provide gateway services with no end system CIPSO capabilities. A CIPSO -router is a dedicated IP router that routes IP datagrams between two or more -IP networks. - -An implementation of CIPSO on a host MUST have the capability to reject a -datagram for reasons that the information contained can not be adequately -protected by the receiving host or if acceptance may result in violation of -the host or network security policy. In addition, a CIPSO gateway or router -MUST be able to reject datagrams going to networks that can not provide -adequate protection or may violate the network's security policy. To -provide this capability the following minimal set of configuration -parameters are required for CIPSO implementations: - -HOST_LABEL_MAX - This parameter contains the maximum sensitivity label that -a CIPSO host is authorized to handle. All datagrams that have a label -greater than this maximum MUST be rejected by the CIPSO host. This -parameter does not apply to CIPSO gateways or routers. This parameter need -not be defined explicitly as it can be implicitly derived from the -PORT_LABEL_MAX parameters for the associated interfaces. - - - -Internet Draft, Expires 15 Jan 93 [PAGE 7] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - - -HOST_LABEL_MIN - This parameter contains the minimum sensitivity label that -a CIPSO host is authorized to handle. All datagrams that have a label less -than this minimum MUST be rejected by the CIPSO host. This parameter does -not apply to CIPSO gateways or routers. This parameter need not be defined -explicitly as it can be implicitly derived from the PORT_LABEL_MIN -parameters for the associated interfaces. - -PORT_LABEL_MAX - This parameter contains the maximum sensitivity label for -all datagrams that may exit a particular network interface port. All -outgoing datagrams that have a label greater than this maximum MUST be -rejected by the CIPSO system. The label within this parameter MUST be -less than or equal to the label within the HOST_LABEL_MAX parameter. This -parameter does not apply to CIPSO hosts that support only one network port. - -PORT_LABEL_MIN - This parameter contains the minimum sensitivity label for -all datagrams that may exit a particular network interface port. All -outgoing datagrams that have a label less than this minimum MUST be -rejected by the CIPSO system. The label within this parameter MUST be -greater than or equal to the label within the HOST_LABEL_MIN parameter. -This parameter does not apply to CIPSO hosts that support only one network -port. - -PORT_DOI - This parameter is used to assign a DOI identifier value to a -particular network interface port. All CIPSO labels within datagrams -going out this port MUST use the specified DOI identifier. All CIPSO -hosts and gateways MUST support either this parameter, the NET_DOI -parameter, or the HOST_DOI parameter. - -NET_DOI - This parameter is used to assign a DOI identifier value to a -particular IP network address. All CIPSO labels within datagrams destined -for the particular IP network MUST use the specified DOI identifier. All -CIPSO hosts and gateways MUST support either this parameter, the PORT_DOI -parameter, or the HOST_DOI parameter. - -HOST_DOI - This parameter is used to assign a DOI identifier value to a -particular IP host address. All CIPSO labels within datagrams destined for -the particular IP host will use the specified DOI identifier. All CIPSO -hosts and gateways MUST support either this parameter, the PORT_DOI -parameter, or the NET_DOI parameter. - -This list represents the minimal set of configuration parameters required -to be compliant. Implementors are encouraged to add to this list to -provide enhanced functionality and control. For example, many security -policies may require both incoming and outgoing datagrams be checked against -the port and host label ranges. - - -4.1 Port Range Parameters - -The labels represented by the PORT_LABEL_MAX and PORT_LABEL_MIN parameters -MAY be in CIPSO or local format. Some CIPSO systems, such as routers, may -want to have the range parameters expressed in CIPSO format so that incoming -labels do not have to be converted to a local format before being compared -against the range. If multiple DOIs are supported by one of these CIPSO - - - -Internet Draft, Expires 15 Jan 93 [PAGE 8] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -systems then multiple port range parameters would be needed, one set for -each DOI supported on a particular port. - -The port range will usually represent the total set of labels that may -exist on the logical network accessed through the corresponding network -interface. It may, however, represent a subset of these labels that are -allowed to enter the CIPSO system. - - -4.2 Single Label CIPSO Hosts - -CIPSO implementations that support only one label are not required to -support the parameters described above. These limited implementations are -only required to support a NET_LABEL parameter. This parameter contains -the CIPSO label that may be inserted in datagrams that exit the host. In -addition, the host MUST reject any incoming datagram that has a label which -is not equivalent to the NET_LABEL parameter. - - -5. Handling Procedures - -This section describes the processing requirements for incoming and -outgoing IP datagrams. Just providing the correct CIPSO label format -is not enough. Assumptions will be made by one system on how a -receiving system will handle the CIPSO label. Wrong assumptions may -lead to non-interoperability or even a security incident. The -requirements described below represent the minimal set needed for -interoperability and that provide users some level of confidence. -Many other requirements could be added to increase user confidence, -however at the risk of restricting creativity and limiting vendor -participation. - - -5.1 Input Procedures - -All datagrams received through a network port MUST have a security label -associated with them, either contained in the datagram or assigned to the -receiving port. Without this label the host, gateway, or router will not -have the information it needs to make security decisions. This security -label will be obtained from the CIPSO if the option is present in the -datagram. See section 4.1.2 for handling procedures for unlabeled -datagrams. This label will be compared against the PORT (if appropriate) -and HOST configuration parameters defined in section 3. - -If any field within the CIPSO option, such as the DOI identifier, is not -recognized the IP datagram is discarded and an ICMP "parameter problem" -(type 12) is generated and returned. The ICMP code field is set to "bad -parameter" (code 0) and the pointer is set to the start of the CIPSO field -that is unrecognized. - -If the contents of the CIPSO are valid but the security label is -outside of the configured host or port label range, the datagram is -discarded and an ICMP "destination unreachable" (type 3) is generated -and returned. The code field of the ICMP is set to "communication with -destination network administratively prohibited" (code 9) or to - - - -Internet Draft, Expires 15 Jan 93 [PAGE 9] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -"communication with destination host administratively prohibited" -(code 10). The value of the code field used is dependent upon whether -the originator of the ICMP message is acting as a CIPSO host or a CIPSO -gateway. The recipient of the ICMP message MUST be able to handle either -value. The same procedure is performed if a CIPSO can not be added to an -IP packet because it is too large to fit in the IP options area. - -If the error is triggered by receipt of an ICMP message, the message -is discarded and no response is permitted (consistent with general ICMP -processing rules). - - -5.1.1 Unrecognized tag types - -The default condition for any CIPSO implementation is that an -unrecognized tag type MUST be treated as a "parameter problem" and -handled as described in section 4.1. A CIPSO implementation MAY allow -the system administrator to identify tag types that may safely be -ignored. This capability is an allowable enhancement, not a -requirement. - - -5.1.2 Unlabeled Packets - -A network port may be configured to not require a CIPSO label for all -incoming datagrams. For this configuration a CIPSO label must be -assigned to that network port and associated with all unlabeled IP -datagrams. This capability might be used for single level networks or -networks that have CIPSO and non-CIPSO hosts and the non-CIPSO hosts -all operate at the same label. - -If a CIPSO option is required and none is found, the datagram is -discarded and an ICMP "parameter problem" (type 12) is generated and -returned to the originator of the datagram. The code field of the ICMP -is set to "option missing" (code 1) and the ICMP pointer is set to 134 -(the value of the option type for the missing CIPSO option). - - -5.2 Output Procedures - -A CIPSO option MUST appear only once in a datagram. Only one tag type -from the MAC Sensitivity class MAY be included in a CIPSO option. Given -the current set of defined tag types, this means that CIPSO labels at -first will contain only one tag. - -All datagrams leaving a CIPSO system MUST meet the following condition: - - PORT_LABEL_MIN <= CIPSO label <= PORT_LABEL_MAX - -If this condition is not satisfied the datagram MUST be discarded. -If the CIPSO system only supports one port, the HOST_LABEL_MIN and the -HOST_LABEL_MAX parameters MAY be substituted for the PORT parameters in -the above condition. - -The DOI identifier to be used for all outgoing datagrams is configured by - - - -Internet Draft, Expires 15 Jan 93 [PAGE 10] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - -the administrator. If port level DOI identifier assignment is used, then -the PORT_DOI configuration parameter MUST contain the DOI identifier to -use. If network level DOI assignment is used, then the NET_DOI parameter -MUST contain the DOI identifier to use. And if host level DOI assignment -is employed, then the HOST_DOI parameter MUST contain the DOI identifier -to use. A CIPSO implementation need only support one level of DOI -assignment. - - -5.3 DOI Processing Requirements - -A CIPSO implementation MUST support at least one DOI and SHOULD support -multiple DOIs. System and network administrators are cautioned to -ensure that at least one DOI is common within an IP network to allow for -broadcasting of IP datagrams. - -CIPSO gateways MUST be capable of translating a CIPSO option from one -DOI to another when forwarding datagrams between networks. For -efficiency purposes this capability is only a desired feature for CIPSO -routers. - - -5.4 Label of ICMP Messages - -The CIPSO label to be used on all outgoing ICMP messages MUST be equivalent -to the label of the datagram that caused the ICMP message. If the ICMP was -generated due to a problem associated with the original CIPSO label then the -following responses are allowed: - - a. Use the CIPSO label of the original IP datagram - b. Drop the original datagram with no return message generated - -In most cases these options will have the same effect. If you can not -interpret the label or if it is outside the label range of your host or -interface then an ICMP message with the same label will probably not be -able to exit the system. - - -6. Assignment of DOI Identifier Numbers = - -Requests for assignment of a DOI identifier number should be addressed to -the Internet Assigned Numbers Authority (IANA). - - -7. Acknowledgements - -Much of the material in this RFC is based on (and copied from) work -done by Gary Winiger of Sun Microsystems and published as Commercial -IP Security Option at the INTEROP 89, Commercial IPSO Workshop. - - -8. Author's Address - -To submit mail for distribution to members of the IETF CIPSO Working -Group, send mail to: cipso@wdl1.wdl.loral.com. - - - -Internet Draft, Expires 15 Jan 93 [PAGE 11] - - - -CIPSO INTERNET DRAFT 16 July, 1992 - - - - -To be added to or deleted from this distribution, send mail to: -cipso-request@wdl1.wdl.loral.com. - - -9. References - -RFC 1038, "Draft Revised IP Security Option", M. St. Johns, IETF, January -1988. - -RFC 1108, "U.S. Department of Defense Security Options -for the Internet Protocol", Stephen Kent, IAB, 1 March, 1991. - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -Internet Draft, Expires 15 Jan 93 [PAGE 12] - - - diff --git a/trunk/Documentation/netlabel/introduction.txt b/trunk/Documentation/netlabel/introduction.txt deleted file mode 100644 index a4ffba1694c8..000000000000 --- a/trunk/Documentation/netlabel/introduction.txt +++ /dev/null @@ -1,46 +0,0 @@ -NetLabel Introduction -============================================================================== -Paul Moore, paul.moore@hp.com - -August 2, 2006 - - * Overview - -NetLabel is a mechanism which can be used by kernel security modules to attach -security attributes to outgoing network packets generated from user space -applications and read security attributes from incoming network packets. It -is composed of three main components, the protocol engines, the communication -layer, and the kernel security module API. - - * Protocol Engines - -The protocol engines are responsible for both applying and retrieving the -network packet's security attributes. If any translation between the network -security attributes and those on the host are required then the protocol -engine will handle those tasks as well. Other kernel subsystems should -refrain from calling the protocol engines directly, instead they should use -the NetLabel kernel security module API described below. - -Detailed information about each NetLabel protocol engine can be found in this -directory, consult '00-INDEX' for filenames. - - * Communication Layer - -The communication layer exists to allow NetLabel configuration and monitoring -from user space. The NetLabel communication layer uses a message based -protocol built on top of the Generic NETLINK transport mechanism. The exact -formatting of these NetLabel messages as well as the Generic NETLINK family -names can be found in the the 'net/netlabel/' directory as comments in the -header files as well as in 'include/net/netlabel.h'. - - * Security Module API - -The purpose of the NetLabel security module API is to provide a protocol -independent interface to the underlying NetLabel protocol engines. In addition -to protocol independence, the security module API is designed to be completely -LSM independent which should allow multiple LSMs to leverage the same code -base. - -Detailed information about the NetLabel security module API can be found in the -'include/net/netlabel.h' header file as well as the 'lsm_interface.txt' file -found in this directory. diff --git a/trunk/Documentation/netlabel/lsm_interface.txt b/trunk/Documentation/netlabel/lsm_interface.txt deleted file mode 100644 index 98dd9f7430f2..000000000000 --- a/trunk/Documentation/netlabel/lsm_interface.txt +++ /dev/null @@ -1,47 +0,0 @@ -NetLabel Linux Security Module Interface -============================================================================== -Paul Moore, paul.moore@hp.com - -May 17, 2006 - - * Overview - -NetLabel is a mechanism which can set and retrieve security attributes from -network packets. It is intended to be used by LSM developers who want to make -use of a common code base for several different packet labeling protocols. -The NetLabel security module API is defined in 'include/net/netlabel.h' but a -brief overview is given below. - - * NetLabel Security Attributes - -Since NetLabel supports multiple different packet labeling protocols and LSMs -it uses the concept of security attributes to refer to the packet's security -labels. The NetLabel security attributes are defined by the -'netlbl_lsm_secattr' structure in the NetLabel header file. Internally the -NetLabel subsystem converts the security attributes to and from the correct -low-level packet label depending on the NetLabel build time and run time -configuration. It is up to the LSM developer to translate the NetLabel -security attributes into whatever security identifiers are in use for their -particular LSM. - - * NetLabel LSM Protocol Operations - -These are the functions which allow the LSM developer to manipulate the labels -on outgoing packets as well as read the labels on incoming packets. Functions -exist to operate both on sockets as well as the sk_buffs directly. These high -level functions are translated into low level protocol operations based on how -the administrator has configured the NetLabel subsystem. - - * NetLabel Label Mapping Cache Operations - -Depending on the exact configuration, translation between the network packet -label and the internal LSM security identifier can be time consuming. The -NetLabel label mapping cache is a caching mechanism which can be used to -sidestep much of this overhead once a mapping has been established. Once the -LSM has received a packet, used NetLabel to decode it's security attributes, -and translated the security attributes into a LSM internal identifier the LSM -can use the NetLabel caching functions to associate the LSM internal -identifier with the network packet's label. This means that in the future -when a incoming packet matches a cached value not only are the internal -NetLabel translation mechanisms bypassed but the LSM translation mechanisms are -bypassed as well which should result in a significant reduction in overhead. diff --git a/trunk/Documentation/networking/ip-sysctl.txt b/trunk/Documentation/networking/ip-sysctl.txt index 935e298f674a..90ed78110fd4 100644 --- a/trunk/Documentation/networking/ip-sysctl.txt +++ b/trunk/Documentation/networking/ip-sysctl.txt @@ -375,41 +375,6 @@ tcp_slow_start_after_idle - BOOLEAN be timed out after an idle period. Default: 1 -CIPSOv4 Variables: - -cipso_cache_enable - BOOLEAN - If set, enable additions to and lookups from the CIPSO label mapping - cache. If unset, additions are ignored and lookups always result in a - miss. However, regardless of the setting the cache is still - invalidated when required when means you can safely toggle this on and - off and the cache will always be "safe". - Default: 1 - -cipso_cache_bucket_size - INTEGER - The CIPSO label cache consists of a fixed size hash table with each - hash bucket containing a number of cache entries. This variable limits - the number of entries in each hash bucket; the larger the value the - more CIPSO label mappings that can be cached. When the number of - entries in a given hash bucket reaches this limit adding new entries - causes the oldest entry in the bucket to be removed to make room. - Default: 10 - -cipso_rbm_optfmt - BOOLEAN - Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of - the CIPSO draft specification (see Documentation/netlabel for details). - This means that when set the CIPSO tag will be padded with empty - categories in order to make the packet data 32-bit aligned. - Default: 0 - -cipso_rbm_structvalid - BOOLEAN - If set, do a very strict check of the CIPSO option when - ip_options_compile() is called. If unset, relax the checks done during - ip_options_compile(). Either way is "safe" as errors are caught else - where in the CIPSO processing code but setting this to 0 (False) should - result in less work (i.e. it should be faster) but could cause problems - with other implementations that require strict checking. - Default: 0 - IP Variables: ip_local_port_range - 2 INTEGERS @@ -765,9 +730,6 @@ conf/all/forwarding - BOOLEAN This referred to as global forwarding. -proxy_ndp - BOOLEAN - Do proxy ndp. - conf/interface/*: Change special settings per interface. diff --git a/trunk/Documentation/networking/secid.txt b/trunk/Documentation/networking/secid.txt deleted file mode 100644 index 95ea06784333..000000000000 --- a/trunk/Documentation/networking/secid.txt +++ /dev/null @@ -1,14 +0,0 @@ -flowi structure: - -The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate -the label of the flow. This label of the flow is currently used in selecting -matching labeled xfrm(s). - -If this is an outbound flow, the label is derived from the socket, if any, or -the incoming packet this flow is being generated as a response to (e.g. tcp -resets, timewait ack, etc.). It is also conceivable that the label could be -derived from other sources such as process context, device, etc., in special -cases, as may be appropriate. - -If this is an inbound flow, the label is derived from the IPSec security -associations, if any, used by the packet. diff --git a/trunk/arch/powerpc/platforms/powermac/feature.c b/trunk/arch/powerpc/platforms/powermac/feature.c index e49621be6640..13fcaf5b1796 100644 --- a/trunk/arch/powerpc/platforms/powermac/feature.c +++ b/trunk/arch/powerpc/platforms/powermac/feature.c @@ -1058,8 +1058,8 @@ core99_reset_cpu(struct device_node *node, long param, long value) if (np == NULL) return -ENODEV; for (np = np->child; np != NULL; np = np->sibling) { - const u32 *num = get_property(np, "reg", NULL); - const u32 *rst = get_property(np, "soft-reset", NULL); + u32 *num = get_property(np, "reg", NULL); + u32 *rst = get_property(np, "soft-reset", NULL); if (num == NULL || rst == NULL) continue; if (param == *num) { diff --git a/trunk/arch/powerpc/platforms/powermac/smp.c b/trunk/arch/powerpc/platforms/powermac/smp.c index 1949b657b092..653eeb64d1e2 100644 --- a/trunk/arch/powerpc/platforms/powermac/smp.c +++ b/trunk/arch/powerpc/platforms/powermac/smp.c @@ -702,7 +702,7 @@ static void __init smp_core99_setup(int ncpus) /* GPIO based HW sync on ppc32 Core99 */ if (pmac_tb_freeze == NULL && !machine_is_compatible("MacRISC4")) { struct device_node *cpu; - const u32 *tbprop = NULL; + u32 *tbprop = NULL; core99_tb_gpio = KL_GPIO_TB_ENABLE; /* default value */ cpu = of_find_node_by_type(NULL, "cpu"); diff --git a/trunk/crypto/hmac.c b/trunk/crypto/hmac.c index b521bcd2b2c6..f403b6946047 100644 --- a/trunk/crypto/hmac.c +++ b/trunk/crypto/hmac.c @@ -92,17 +92,13 @@ static int hmac_init(struct hash_desc *pdesc) struct hmac_ctx *ctx = align_ptr(ipad + bs * 2 + ds, sizeof(void *)); struct hash_desc desc; struct scatterlist tmp; - int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; sg_set_buf(&tmp, ipad, bs); - err = crypto_hash_init(&desc); - if (unlikely(err)) - return err; - - return crypto_hash_update(&desc, &tmp, bs); + return unlikely(crypto_hash_init(&desc)) ?: + crypto_hash_update(&desc, &tmp, 1); } static int hmac_update(struct hash_desc *pdesc, @@ -127,17 +123,13 @@ static int hmac_final(struct hash_desc *pdesc, u8 *out) struct hmac_ctx *ctx = align_ptr(digest + ds, sizeof(void *)); struct hash_desc desc; struct scatterlist tmp; - int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; sg_set_buf(&tmp, opad, bs + ds); - err = crypto_hash_final(&desc, digest); - if (unlikely(err)) - return err; - - return crypto_hash_digest(&desc, &tmp, bs + ds, out); + return unlikely(crypto_hash_final(&desc, digest)) ?: + crypto_hash_digest(&desc, &tmp, bs + ds, out); } static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, @@ -153,7 +145,6 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, struct hash_desc desc; struct scatterlist sg1[2]; struct scatterlist sg2[1]; - int err; desc.tfm = ctx->child; desc.flags = pdesc->flags & CRYPTO_TFM_REQ_MAY_SLEEP; @@ -163,11 +154,8 @@ static int hmac_digest(struct hash_desc *pdesc, struct scatterlist *sg, sg1[1].length = 0; sg_set_buf(sg2, opad, bs + ds); - err = crypto_hash_digest(&desc, sg1, nbytes + bs, digest); - if (unlikely(err)) - return err; - - return crypto_hash_digest(&desc, sg2, bs + ds, out); + return unlikely(crypto_hash_digest(&desc, sg1, nbytes + bs, digest)) ?: + crypto_hash_digest(&desc, sg2, bs + ds, out); } static int hmac_init_tfm(struct crypto_tfm *tfm) diff --git a/trunk/drivers/atm/he.c b/trunk/drivers/atm/he.c index 41e052fecd7f..ffcb9fd31c38 100644 --- a/trunk/drivers/atm/he.c +++ b/trunk/drivers/atm/he.c @@ -1912,7 +1912,7 @@ he_service_rbrq(struct he_dev *he_dev, int group) skb->tail = skb->data + skb->len; #ifdef USE_CHECKSUM_HW if (vcc->vpi == 0 && vcc->vci >= ATM_NOT_RSV_VCI) { - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; skb->csum = TCP_CKSUM(skb->data, he_vcc->pdu_len); } diff --git a/trunk/drivers/char/briq_panel.c b/trunk/drivers/char/briq_panel.c index b8c22255f6ad..a0e5eac5f33a 100644 --- a/trunk/drivers/char/briq_panel.c +++ b/trunk/drivers/char/briq_panel.c @@ -87,7 +87,7 @@ static int briq_panel_release(struct inode *ino, struct file *filep) return 0; } -static ssize_t briq_panel_read(struct file *file, char __user *buf, size_t count, +static ssize_t briq_panel_read(struct file *file, char *buf, size_t count, loff_t *ppos) { unsigned short c; @@ -135,7 +135,7 @@ static void scroll_vfd( void ) vfd_cursor = 20; } -static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_t len, +static ssize_t briq_panel_write(struct file *file, const char *buf, size_t len, loff_t *ppos) { size_t indx = len; @@ -150,22 +150,19 @@ static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_ return -EBUSY; for (;;) { - char c; if (!indx) break; - if (get_user(c, buf)) - return -EFAULT; if (esc) { - set_led(c); + set_led(*buf); esc = 0; - } else if (c == 27) { + } else if (*buf == 27) { esc = 1; - } else if (c == 12) { + } else if (*buf == 12) { /* do a form feed */ for (i=0; i<40; i++) vfd[i] = ' '; vfd_cursor = 0; - } else if (c == 10) { + } else if (*buf == 10) { if (vfd_cursor < 20) vfd_cursor = 20; else if (vfd_cursor < 40) @@ -178,7 +175,7 @@ static ssize_t briq_panel_write(struct file *file, const char __user *buf, size_ /* just a character */ if (vfd_cursor > 39) scroll_vfd(); - vfd[vfd_cursor++] = c; + vfd[vfd_cursor++] = *buf; } indx--; buf++; @@ -205,7 +202,7 @@ static struct miscdevice briq_panel_miscdev = { static int __init briq_panel_init(void) { struct device_node *root = find_path_device("/"); - const char *machine; + char *machine; int i; machine = get_property(root, "model", NULL); diff --git a/trunk/drivers/infiniband/core/mad_priv.h b/trunk/drivers/infiniband/core/mad_priv.h index d06b59083f6e..1da9adbccaec 100644 --- a/trunk/drivers/infiniband/core/mad_priv.h +++ b/trunk/drivers/infiniband/core/mad_priv.h @@ -38,7 +38,6 @@ #define __IB_MAD_PRIV_H__ #include -#include #include #include #include diff --git a/trunk/drivers/infiniband/hw/amso1100/c2_provider.c b/trunk/drivers/infiniband/hw/amso1100/c2_provider.c index dd6af551108b..8fddc8cccdf3 100644 --- a/trunk/drivers/infiniband/hw/amso1100/c2_provider.c +++ b/trunk/drivers/infiniband/hw/amso1100/c2_provider.c @@ -49,7 +49,6 @@ #include #include #include -#include #include #include diff --git a/trunk/drivers/infiniband/hw/amso1100/c2_rnic.c b/trunk/drivers/infiniband/hw/amso1100/c2_rnic.c index f49a32b7a8f6..1c3c9d65ecea 100644 --- a/trunk/drivers/infiniband/hw/amso1100/c2_rnic.c +++ b/trunk/drivers/infiniband/hw/amso1100/c2_rnic.c @@ -50,7 +50,6 @@ #include #include #include -#include #include diff --git a/trunk/drivers/infiniband/hw/ipath/ipath_diag.c b/trunk/drivers/infiniband/hw/ipath/ipath_diag.c index 29958b6e0214..28b6b46c106a 100644 --- a/trunk/drivers/infiniband/hw/ipath/ipath_diag.c +++ b/trunk/drivers/infiniband/hw/ipath/ipath_diag.c @@ -43,7 +43,6 @@ #include #include -#include #include #include "ipath_kernel.h" diff --git a/trunk/drivers/net/3c59x.c b/trunk/drivers/net/3c59x.c index 29dede2eaa85..80e8ca013e44 100644 --- a/trunk/drivers/net/3c59x.c +++ b/trunk/drivers/net/3c59x.c @@ -2077,7 +2077,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) vp->tx_ring[entry].next = 0; #if DO_ZEROCOPY - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_HW) vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded); else vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); diff --git a/trunk/drivers/net/8139cp.c b/trunk/drivers/net/8139cp.c index a48b211c489d..1428bb7715af 100644 --- a/trunk/drivers/net/8139cp.c +++ b/trunk/drivers/net/8139cp.c @@ -813,7 +813,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) flags |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { + else if (skb->ip_summed == CHECKSUM_HW) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) flags |= IPCS | TCPCS; @@ -867,7 +867,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) if (mss) ctrl |= LargeSend | ((mss & MSSMask) << MSSShift); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { + else if (skb->ip_summed == CHECKSUM_HW) { if (ip->protocol == IPPROTO_TCP) ctrl |= IPCS | TCPCS; else if (ip->protocol == IPPROTO_UDP) @@ -898,7 +898,7 @@ static int cp_start_xmit (struct sk_buff *skb, struct net_device *dev) txd->addr = cpu_to_le64(first_mapping); wmb(); - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { if (ip->protocol == IPPROTO_TCP) txd->opts1 = cpu_to_le32(first_eor | first_len | FirstFrag | DescOwn | diff --git a/trunk/drivers/net/acenic.c b/trunk/drivers/net/acenic.c index 826548644d7b..1c01e9b3d07c 100644 --- a/trunk/drivers/net/acenic.c +++ b/trunk/drivers/net/acenic.c @@ -2040,7 +2040,7 @@ static void ace_rx_int(struct net_device *dev, u32 rxretprd, u32 rxretcsm) */ if (bd_flags & BD_FLG_TCP_UDP_SUM) { skb->csum = htons(csum); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } else { skb->ip_summed = CHECKSUM_NONE; } @@ -2511,7 +2511,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev) mapping = ace_map_tx_skb(ap, skb, skb, idx); flagsize = (skb->len << 16) | (BD_FLG_END); - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2534,7 +2534,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev) mapping = ace_map_tx_skb(ap, skb, NULL, idx); flagsize = (skb_headlen(skb) << 16); - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) flagsize |= BD_FLG_TCP_UDP_SUM; #if ACENIC_DO_VLAN if (vlan_tx_tag_present(skb)) { @@ -2560,7 +2560,7 @@ static int ace_start_xmit(struct sk_buff *skb, struct net_device *dev) PCI_DMA_TODEVICE); flagsize = (frag->size << 16); - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) flagsize |= BD_FLG_TCP_UDP_SUM; idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap); diff --git a/trunk/drivers/net/arcnet/com20020-pci.c b/trunk/drivers/net/arcnet/com20020-pci.c index 96d8a694d433..979a33df0a8c 100644 --- a/trunk/drivers/net/arcnet/com20020-pci.c +++ b/trunk/drivers/net/arcnet/com20020-pci.c @@ -161,7 +161,6 @@ static struct pci_device_id com20020pci_id_table[] = { { 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, - { 0x10B5, 0x9030, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, { 0x10B5, 0x9050, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT }, {0,} }; diff --git a/trunk/drivers/net/bnx2.c b/trunk/drivers/net/bnx2.c index 7857b4630124..652eb05a6c2d 100644 --- a/trunk/drivers/net/bnx2.c +++ b/trunk/drivers/net/bnx2.c @@ -4423,7 +4423,7 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev) ring_prod = TX_RING_IDX(prod); vlan_tag_flags = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM; } diff --git a/trunk/drivers/net/cassini.c b/trunk/drivers/net/cassini.c index 558fdb8ad2dc..a31544ccb3c4 100644 --- a/trunk/drivers/net/cassini.c +++ b/trunk/drivers/net/cassini.c @@ -2167,7 +2167,7 @@ static int cas_rx_process_pkt(struct cas *cp, struct cas_rx_comp *rxc, cas_page_unmap(addr); } skb->csum = ntohs(i ^ 0xffff); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; skb->protocol = eth_type_trans(skb, cp->dev); return len; } @@ -2821,7 +2821,7 @@ static inline int cas_xmit_tx_ringN(struct cas *cp, int ring, } ctrl = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/trunk/drivers/net/chelsio/sge.c b/trunk/drivers/net/chelsio/sge.c index ddd0bdb498f4..61b3754f50ff 100644 --- a/trunk/drivers/net/chelsio/sge.c +++ b/trunk/drivers/net/chelsio/sge.c @@ -1470,9 +1470,9 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) } if (!(adapter->flags & UDP_CSUM_CAPABLE) && - skb->ip_summed == CHECKSUM_PARTIAL && + skb->ip_summed == CHECKSUM_HW && skb->nh.iph->protocol == IPPROTO_UDP) - if (unlikely(skb_checksum_help(skb))) { + if (unlikely(skb_checksum_help(skb, 0))) { dev_kfree_skb_any(skb); return NETDEV_TX_OK; } @@ -1495,11 +1495,11 @@ int t1_start_xmit(struct sk_buff *skb, struct net_device *dev) cpl = (struct cpl_tx_pkt *)__skb_push(skb, sizeof(*cpl)); cpl->opcode = CPL_TX_PKT; cpl->ip_csum_dis = 1; /* SW calculates IP csum */ - cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_PARTIAL ? 0 : 1; + cpl->l4_csum_dis = skb->ip_summed == CHECKSUM_HW ? 0 : 1; /* the length field isn't used so don't bother setting it */ - st->tx_cso += (skb->ip_summed == CHECKSUM_PARTIAL); - sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_PARTIAL); + st->tx_cso += (skb->ip_summed == CHECKSUM_HW); + sge->stats.tx_do_cksum += (skb->ip_summed == CHECKSUM_HW); sge->stats.tx_reg_pkts++; } cpl->iff = dev->if_port; diff --git a/trunk/drivers/net/dl2k.c b/trunk/drivers/net/dl2k.c index b74e67654764..402961e68c89 100644 --- a/trunk/drivers/net/dl2k.c +++ b/trunk/drivers/net/dl2k.c @@ -611,7 +611,7 @@ start_xmit (struct sk_buff *skb, struct net_device *dev) txdesc = &np->tx_ring[entry]; #if 0 - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { txdesc->status |= cpu_to_le64 (TCPChecksumEnable | UDPChecksumEnable | IPChecksumEnable); diff --git a/trunk/drivers/net/e1000/e1000_main.c b/trunk/drivers/net/e1000/e1000_main.c index 2ab9f96f5dab..98ef9f85482f 100644 --- a/trunk/drivers/net/e1000/e1000_main.c +++ b/trunk/drivers/net/e1000/e1000_main.c @@ -2600,7 +2600,7 @@ e1000_tx_csum(struct e1000_adapter *adapter, struct e1000_tx_ring *tx_ring, unsigned int i; uint8_t css; - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + if (likely(skb->ip_summed == CHECKSUM_HW)) { css = skb->h.raw - skb->data; i = tx_ring->next_to_use; @@ -2927,11 +2927,11 @@ e1000_xmit_frame(struct sk_buff *skb, struct net_device *netdev) } /* reserve a descriptor for the offload context */ - if ((mss) || (skb->ip_summed == CHECKSUM_PARTIAL)) + if ((mss) || (skb->ip_summed == CHECKSUM_HW)) count++; count++; #else - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) count++; #endif @@ -3608,7 +3608,7 @@ e1000_rx_checksum(struct e1000_adapter *adapter, */ csum = ntohl(csum ^ 0xFFFF); skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } adapter->hw_csum_good++; } diff --git a/trunk/drivers/net/forcedeth.c b/trunk/drivers/net/forcedeth.c index 32cacf115f75..11b8f1b43dd5 100644 --- a/trunk/drivers/net/forcedeth.c +++ b/trunk/drivers/net/forcedeth.c @@ -1503,8 +1503,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev) tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT); else #endif - tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ? - NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0; + tx_flags_extra = (skb->ip_summed == CHECKSUM_HW ? (NV_TX2_CHECKSUM_L3|NV_TX2_CHECKSUM_L4) : 0); /* vlan tag */ if (np->vlangrp && vlan_tx_tag_present(skb)) { diff --git a/trunk/drivers/net/gianfar.c b/trunk/drivers/net/gianfar.c index ba960913c034..ebbbd6ca6204 100644 --- a/trunk/drivers/net/gianfar.c +++ b/trunk/drivers/net/gianfar.c @@ -947,7 +947,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Set up checksumming */ if (likely((dev->features & NETIF_F_IP_CSUM) - && (CHECKSUM_PARTIAL == skb->ip_summed))) { + && (CHECKSUM_HW == skb->ip_summed))) { fcb = gfar_add_fcb(skb, txbdp); status |= TXBD_TOE; gfar_tx_checksum(skb, fcb); diff --git a/trunk/drivers/net/hamachi.c b/trunk/drivers/net/hamachi.c index 763373ae9666..409c6aab0411 100644 --- a/trunk/drivers/net/hamachi.c +++ b/trunk/drivers/net/hamachi.c @@ -1648,7 +1648,7 @@ static int hamachi_rx(struct net_device *dev) * could do the pseudo myself and return * CHECKSUM_UNNECESSARY */ - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } } } diff --git a/trunk/drivers/net/ibm_emac/ibm_emac_core.c b/trunk/drivers/net/ibm_emac/ibm_emac_core.c index 57e214d85e9a..82468e2dc799 100644 --- a/trunk/drivers/net/ibm_emac/ibm_emac_core.c +++ b/trunk/drivers/net/ibm_emac/ibm_emac_core.c @@ -1036,7 +1036,7 @@ static inline u16 emac_tx_csum(struct ocp_enet_private *dev, struct sk_buff *skb) { #if defined(CONFIG_IBM_EMAC_TAH) - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { ++dev->stats.tx_packets_csum; return EMAC_TX_CTRL_TAH_CSUM; } diff --git a/trunk/drivers/net/ioc3-eth.c b/trunk/drivers/net/ioc3-eth.c index 65f897ddb920..68d8af7df08e 100644 --- a/trunk/drivers/net/ioc3-eth.c +++ b/trunk/drivers/net/ioc3-eth.c @@ -1387,7 +1387,7 @@ static int ioc3_start_xmit(struct sk_buff *skb, struct net_device *dev) * MAC header which should not be summed and the TCP/UDP pseudo headers * manually. */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { int proto = ntohs(skb->nh.iph->protocol); unsigned int csoff; struct iphdr *ih = skb->nh.iph; diff --git a/trunk/drivers/net/irda/ali-ircc.c b/trunk/drivers/net/irda/ali-ircc.c index 68d4c418cb98..e3c8cd5eca67 100644 --- a/trunk/drivers/net/irda/ali-ircc.c +++ b/trunk/drivers/net/irda/ali-ircc.c @@ -249,7 +249,7 @@ static void __exit ali_ircc_cleanup(void) IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__); - for (i=0; i < ARRAY_SIZE(dev_self); i++) { + for (i=0; i < 4; i++) { if (dev_self[i]) ali_ircc_close(dev_self[i]); } @@ -273,12 +273,6 @@ static int ali_ircc_open(int i, chipio_t *info) int err; IRDA_DEBUG(2, "%s(), ---------------- Start ----------------\n", __FUNCTION__); - - if (i >= ARRAY_SIZE(dev_self)) { - IRDA_ERROR("%s(), maximum number of supported chips reached!\n", - __FUNCTION__); - return -ENOMEM; - } /* Set FIR FIFO and DMA Threshold */ if ((ali_ircc_setup(info)) == -1) diff --git a/trunk/drivers/net/irda/irport.c b/trunk/drivers/net/irda/irport.c index ba4f3eb988b3..44efd49bf4a9 100644 --- a/trunk/drivers/net/irda/irport.c +++ b/trunk/drivers/net/irda/irport.c @@ -1090,7 +1090,7 @@ static int __init irport_init(void) { int i; - for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) { + for (i=0; (io[i] < 2000) && (i < 4); i++) { if (irport_open(i, io[i], irq[i]) != NULL) return 0; } @@ -1112,7 +1112,7 @@ static void __exit irport_cleanup(void) IRDA_DEBUG( 4, "%s()\n", __FUNCTION__); - for (i=0; i < ARRAY_SIZE(dev_self); i++) { + for (i=0; i < 4; i++) { if (dev_self[i]) irport_close(dev_self[i]); } diff --git a/trunk/drivers/net/irda/via-ircc.c b/trunk/drivers/net/irda/via-ircc.c index 79b85f327500..8bafb455c102 100644 --- a/trunk/drivers/net/irda/via-ircc.c +++ b/trunk/drivers/net/irda/via-ircc.c @@ -279,7 +279,7 @@ static void via_ircc_clean(void) IRDA_DEBUG(3, "%s()\n", __FUNCTION__); - for (i=0; i < ARRAY_SIZE(dev_self); i++) { + for (i=0; i < 4; i++) { if (dev_self[i]) via_ircc_close(dev_self[i]); } @@ -327,9 +327,6 @@ static __devinit int via_ircc_open(int i, chipio_t * info, unsigned int id) IRDA_DEBUG(3, "%s()\n", __FUNCTION__); - if (i >= ARRAY_SIZE(dev_self)) - return -ENOMEM; - /* Allocate new instance of the driver */ dev = alloc_irdadev(sizeof(struct via_ircc_cb)); if (dev == NULL) diff --git a/trunk/drivers/net/irda/w83977af_ir.c b/trunk/drivers/net/irda/w83977af_ir.c index 8421597072a7..0ea65c4c6f85 100644 --- a/trunk/drivers/net/irda/w83977af_ir.c +++ b/trunk/drivers/net/irda/w83977af_ir.c @@ -117,7 +117,7 @@ static int __init w83977af_init(void) IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); - for (i=0; (io[i] < 2000) && (i < ARRAY_SIZE(dev_self)); i++) { + for (i=0; (io[i] < 2000) && (i < 4); i++) { if (w83977af_open(i, io[i], irq[i], dma[i]) == 0) return 0; } @@ -136,7 +136,7 @@ static void __exit w83977af_cleanup(void) IRDA_DEBUG(4, "%s()\n", __FUNCTION__ ); - for (i=0; i < ARRAY_SIZE(dev_self); i++) { + for (i=0; i < 4; i++) { if (dev_self[i]) w83977af_close(dev_self[i]); } diff --git a/trunk/drivers/net/ixgb/ixgb_main.c b/trunk/drivers/net/ixgb/ixgb_main.c index 9405b44f3214..7bbd447289b5 100644 --- a/trunk/drivers/net/ixgb/ixgb_main.c +++ b/trunk/drivers/net/ixgb/ixgb_main.c @@ -1232,7 +1232,7 @@ ixgb_tx_csum(struct ixgb_adapter *adapter, struct sk_buff *skb) unsigned int i; uint8_t css, cso; - if(likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + if(likely(skb->ip_summed == CHECKSUM_HW)) { css = skb->h.raw - skb->data; cso = (skb->h.raw + skb->csum) - skb->data; diff --git a/trunk/drivers/net/mv643xx_eth.c b/trunk/drivers/net/mv643xx_eth.c index d4dcc856b3cd..59de3e74d2d7 100644 --- a/trunk/drivers/net/mv643xx_eth.c +++ b/trunk/drivers/net/mv643xx_eth.c @@ -1147,7 +1147,7 @@ static void eth_tx_submit_descs_for_skb(struct mv643xx_private *mp, desc->byte_cnt = length; desc->buf_ptr = dma_map_single(NULL, skb->data, length, DMA_TO_DEVICE); - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { BUG_ON(skb->protocol != ETH_P_IP); cmd_sts |= ETH_GEN_TCP_UDP_CHECKSUM | diff --git a/trunk/drivers/net/myri10ge/myri10ge.c b/trunk/drivers/net/myri10ge/myri10ge.c index 9f16681d0e7e..9bdd43ab3573 100644 --- a/trunk/drivers/net/myri10ge/myri10ge.c +++ b/trunk/drivers/net/myri10ge/myri10ge.c @@ -930,7 +930,7 @@ static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, u16 hw_csum) (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) || vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) { skb->csum = hw_csum; - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } } @@ -973,7 +973,7 @@ myri10ge_rx_done(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx, if ((skb->protocol == ntohs(ETH_P_IP)) || (skb->protocol == ntohs(ETH_P_IPV6))) { skb->csum = ntohs((u16) csum); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } else myri10ge_vlan_ip_csum(skb, ntohs((u16) csum)); } @@ -1897,13 +1897,13 @@ static int myri10ge_xmit(struct sk_buff *skb, struct net_device *dev) pseudo_hdr_offset = 0; odd_flag = 0; flags = (MXGEFW_FLAGS_NO_TSO | MXGEFW_FLAGS_FIRST); - if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { + if (likely(skb->ip_summed == CHECKSUM_HW)) { cksum_offset = (skb->h.raw - skb->data); pseudo_hdr_offset = (skb->h.raw + skb->csum) - skb->data; /* If the headers are excessively large, then we must * fall back to a software checksum */ if (unlikely(cksum_offset > 255 || pseudo_hdr_offset > 127)) { - if (skb_checksum_help(skb)) + if (skb_checksum_help(skb, 0)) goto drop; cksum_offset = 0; pseudo_hdr_offset = 0; diff --git a/trunk/drivers/net/ns83820.c b/trunk/drivers/net/ns83820.c index 5143f5dbb2e5..0e76859c90a2 100644 --- a/trunk/drivers/net/ns83820.c +++ b/trunk/drivers/net/ns83820.c @@ -1153,7 +1153,7 @@ static int ns83820_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev) if (!nr_frags) frag = NULL; extsts = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { extsts |= EXTSTS_IPPKT; if (IPPROTO_TCP == skb->nh.iph->protocol) extsts |= EXTSTS_TCPPKT; diff --git a/trunk/drivers/net/r8169.c b/trunk/drivers/net/r8169.c index d9b960aa9b0d..4c2f575faad7 100644 --- a/trunk/drivers/net/r8169.c +++ b/trunk/drivers/net/r8169.c @@ -2169,7 +2169,7 @@ static inline u32 rtl8169_tso_csum(struct sk_buff *skb, struct net_device *dev) if (mss) return LargeSend | ((mss & MSSMask) << MSSShift); } - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { const struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) diff --git a/trunk/drivers/net/s2io.c b/trunk/drivers/net/s2io.c index 5b3713f622d7..e72e0e099060 100644 --- a/trunk/drivers/net/s2io.c +++ b/trunk/drivers/net/s2io.c @@ -3893,7 +3893,7 @@ static int s2io_xmit(struct sk_buff *skb, struct net_device *dev) txdp->Control_1 |= TXD_TCP_LSO_MSS(s2io_tcp_mss(skb)); } #endif - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { txdp->Control_2 |= (TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN | TXD_TX_CKO_UDP_EN); diff --git a/trunk/drivers/net/sk98lin/skge.c b/trunk/drivers/net/sk98lin/skge.c index eb3b35180c2f..ee62845d3ac9 100644 --- a/trunk/drivers/net/sk98lin/skge.c +++ b/trunk/drivers/net/sk98lin/skge.c @@ -1559,7 +1559,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ pTxd->VDataHigh = (SK_U32) (PhysAddr >> 32); pTxd->pMBuf = pMessage; - if (pMessage->ip_summed == CHECKSUM_PARTIAL) { + if (pMessage->ip_summed == CHECKSUM_HW) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -1678,7 +1678,7 @@ struct sk_buff *pMessage) /* pointer to send-message */ /* ** Does the HW need to evaluate checksum for TCP or UDP packets? */ - if (pMessage->ip_summed == CHECKSUM_PARTIAL) { + if (pMessage->ip_summed == CHECKSUM_HW) { u16 hdrlen = pMessage->h.raw - pMessage->data; u16 offset = hdrlen + pMessage->csum; @@ -2158,7 +2158,7 @@ SK_U64 PhysAddr; #ifdef USE_SK_RX_CHECKSUM pMsg->csum = pRxd->TcpSums & 0xffff; - pMsg->ip_summed = CHECKSUM_COMPLETE; + pMsg->ip_summed = CHECKSUM_HW; #else pMsg->ip_summed = CHECKSUM_NONE; #endif diff --git a/trunk/drivers/net/skge.c b/trunk/drivers/net/skge.c index b3d6fa3d6df4..ad878dfddef4 100644 --- a/trunk/drivers/net/skge.c +++ b/trunk/drivers/net/skge.c @@ -2338,7 +2338,7 @@ static int skge_xmit_frame(struct sk_buff *skb, struct net_device *dev) td->dma_lo = map; td->dma_hi = map >> 32; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { int offset = skb->h.raw - skb->data; /* This seems backwards, but it is what the sk98lin @@ -2642,7 +2642,7 @@ static inline struct sk_buff *skge_rx_get(struct skge_port *skge, skb->dev = skge->netdev; if (skge->rx_csum) { skb->csum = csum; - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } skb->protocol = eth_type_trans(skb, skge->netdev); diff --git a/trunk/drivers/net/sky2.c b/trunk/drivers/net/sky2.c index 8e92566b587e..933e87f1cc68 100644 --- a/trunk/drivers/net/sky2.c +++ b/trunk/drivers/net/sky2.c @@ -1163,7 +1163,7 @@ static unsigned tx_le_req(const struct sk_buff *skb) if (skb_is_gso(skb)) ++count; - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) ++count; return count; @@ -1272,7 +1272,7 @@ static int sky2_xmit_frame(struct sk_buff *skb, struct net_device *dev) #endif /* Handle TCP checksum offload */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { u16 hdr = skb->h.raw - skb->data; u16 offset = hdr + skb->csum; @@ -2000,7 +2000,7 @@ static int sky2_status_intr(struct sky2_hw *hw, int to_do) #endif case OP_RXCHKS: skb = sky2->rx_ring[sky2->rx_next].skb; - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; skb->csum = le16_to_cpu(status); break; diff --git a/trunk/drivers/net/starfire.c b/trunk/drivers/net/starfire.c index 2607aa51d8e0..c0a62b00ffc8 100644 --- a/trunk/drivers/net/starfire.c +++ b/trunk/drivers/net/starfire.c @@ -1230,7 +1230,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) } #if defined(ZEROCOPY) && defined(HAS_BROKEN_FIRMWARE) - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { if (skb_padto(skb, (skb->len + PADDING_MASK) & ~PADDING_MASK)) return NETDEV_TX_OK; } @@ -1252,7 +1252,7 @@ static int start_tx(struct sk_buff *skb, struct net_device *dev) status |= TxDescIntr; np->reap_tx = 0; } - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { status |= TxCalTCP; np->stats.tx_compressed++; } @@ -1499,7 +1499,7 @@ static int __netdev_rx(struct net_device *dev, int *quota) * Until then, the printk stays. :-) -Ion */ else if (le16_to_cpu(desc->status2) & 0x0040) { - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; skb->csum = le16_to_cpu(desc->csum); printk(KERN_DEBUG "%s: checksum_hw, status2 = %#x\n", dev->name, le16_to_cpu(desc->status2)); } diff --git a/trunk/drivers/net/sungem.c b/trunk/drivers/net/sungem.c index b388651b7836..d7b1d1882cab 100644 --- a/trunk/drivers/net/sungem.c +++ b/trunk/drivers/net/sungem.c @@ -855,7 +855,7 @@ static int gem_rx(struct gem *gp, int work_to_do) } skb->csum = ntohs((status & RXDCTRL_TCPCSUM) ^ 0xffff); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; skb->protocol = eth_type_trans(skb, gp->dev); netif_receive_skb(skb); @@ -1026,7 +1026,7 @@ static int gem_start_xmit(struct sk_buff *skb, struct net_device *dev) unsigned long flags; ctrl = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { u64 csum_start_off, csum_stuff_off; csum_start_off = (u64) (skb->h.raw - skb->data); diff --git a/trunk/drivers/net/sunhme.c b/trunk/drivers/net/sunhme.c index 17981da22730..c6f5bc3c042f 100644 --- a/trunk/drivers/net/sunhme.c +++ b/trunk/drivers/net/sunhme.c @@ -1207,7 +1207,7 @@ static void happy_meal_transceiver_check(struct happy_meal *hp, void __iomem *tr * flags, thus: * * skb->csum = rxd->rx_flags & 0xffff; - * skb->ip_summed = CHECKSUM_COMPLETE; + * skb->ip_summed = CHECKSUM_HW; * * before sending off the skb to the protocols, and we are good as gold. */ @@ -2074,7 +2074,7 @@ static void happy_meal_rx(struct happy_meal *hp, struct net_device *dev) /* This card is _fucking_ hot... */ skb->csum = ntohs(csum ^ 0xffff); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; RXD(("len=%d csum=%4x]", len, csum)); skb->protocol = eth_type_trans(skb, dev); @@ -2268,7 +2268,7 @@ static int happy_meal_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 tx_flags; tx_flags = TXFLAG_OWN; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { u32 csum_start_off, csum_stuff_off; csum_start_off = (u32) (skb->h.raw - skb->data); diff --git a/trunk/drivers/net/tg3.c b/trunk/drivers/net/tg3.c index fb7026153861..eafabb253f08 100644 --- a/trunk/drivers/net/tg3.c +++ b/trunk/drivers/net/tg3.c @@ -149,67 +149,122 @@ module_param(tg3_debug, int, 0); MODULE_PARM_DESC(tg3_debug, "Tigon3 bitmapped debugging message enable value"); static struct pci_device_id tg3_pci_tbl[] = { - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S)}, - {PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781)}, - {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX)}, - {PCI_DEVICE(PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX)}, - {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000)}, - {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001)}, - {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003)}, - {PCI_DEVICE(PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100)}, - {PCI_DEVICE(PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3)}, - {} + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5700, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5701, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702FE, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705M_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702X, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703X, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5702A3, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5703A3, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5782, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5788, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5789, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5901_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5704S_2, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5705F, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5720, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5721, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5750M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5751F, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5752M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5753F, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5754M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5755M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5786, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5787M, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5714S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5715S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5780S, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_TIGON3_5781, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9DXX, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_SYSKONNECT, PCI_DEVICE_ID_SYSKONNECT_9MXX, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1000, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1001, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC1003, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_ALTIMA, PCI_DEVICE_ID_ALTIMA_AC9100, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { PCI_VENDOR_ID_APPLE, PCI_DEVICE_ID_APPLE_TIGON3, + PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0UL }, + { 0, } }; MODULE_DEVICE_TABLE(pci, tg3_pci_tbl); -static const struct { +static struct { const char string[ETH_GSTRING_LEN]; } ethtool_stats_keys[TG3_NUM_STATS] = { { "rx_octets" }, @@ -290,7 +345,7 @@ static const struct { { "nic_tx_threshold_hit" } }; -static const struct { +static struct { const char string[ETH_GSTRING_LEN]; } ethtool_test_keys[TG3_NUM_TEST] = { { "nvram test (online) " }, @@ -3796,11 +3851,11 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) skb->h.th->check = 0; } - else if (skb->ip_summed == CHECKSUM_PARTIAL) + else if (skb->ip_summed == CHECKSUM_HW) base_flags |= TXD_FLAG_TCPUDP_CSUM; #else mss = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) base_flags |= TXD_FLAG_TCPUDP_CSUM; #endif #if TG3_VLAN_TAG_USED @@ -3926,7 +3981,7 @@ static int tg3_start_xmit_dma_bug(struct sk_buff *skb, struct net_device *dev) entry = tp->tx_prod; base_flags = 0; - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) base_flags |= TXD_FLAG_TCPUDP_CSUM; #if TG3_TSO_SUPPORT != 0 mss = 0; @@ -4914,7 +4969,7 @@ static int tg3_halt(struct tg3 *tp, int kind, int silent) #define TG3_FW_BSS_ADDR 0x08000a70 #define TG3_FW_BSS_LEN 0x10 -static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { +static u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000018, 0x00000000, 0x0000000d, 0x3c1d0800, 0x37bd3ffc, 0x03a0f021, 0x3c100800, 0x26100034, @@ -5008,7 +5063,7 @@ static const u32 tg3FwText[(TG3_FW_TEXT_LEN / sizeof(u32)) + 1] = { 0x27bd0008, 0x03e00008, 0x00000000, 0x00000000, 0x00000000 }; -static const u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = { +static u32 tg3FwRodata[(TG3_FW_RODATA_LEN / sizeof(u32)) + 1] = { 0x35373031, 0x726c7341, 0x00000000, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x726c7045, 0x76656e74, 0x31000000, 0x556e6b6e, 0x45766e74, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x66617461, 0x6c457272, @@ -5073,13 +5128,13 @@ static int tg3_halt_cpu(struct tg3 *tp, u32 offset) struct fw_info { unsigned int text_base; unsigned int text_len; - const u32 *text_data; + u32 *text_data; unsigned int rodata_base; unsigned int rodata_len; - const u32 *rodata_data; + u32 *rodata_data; unsigned int data_base; unsigned int data_len; - const u32 *data_data; + u32 *data_data; }; /* tp->lock is held. */ @@ -5211,7 +5266,7 @@ static int tg3_load_5701_a0_firmware_fix(struct tg3 *tp) #define TG3_TSO_FW_BSS_ADDR 0x08001b80 #define TG3_TSO_FW_BSS_LEN 0x894 -static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { +static u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { 0x0e000003, 0x00000000, 0x08001b24, 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0800, 0x37bd4000, 0x03a0f021, 0x3c100800, 0x26100000, 0x0e000010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe, @@ -5498,7 +5553,7 @@ static const u32 tg3TsoFwText[(TG3_TSO_FW_TEXT_LEN / 4) + 1] = { 0xac470014, 0xac4a0018, 0x03e00008, 0xac4b001c, 0x00000000, 0x00000000, }; -static const u32 tg3TsoFwRodata[] = { +static u32 tg3TsoFwRodata[] = { 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x496e0000, 0x73746b6f, 0x66662a2a, 0x00000000, 0x53774576, 0x656e7430, 0x00000000, 0x00000000, @@ -5506,7 +5561,7 @@ static const u32 tg3TsoFwRodata[] = { 0x00000000, }; -static const u32 tg3TsoFwData[] = { +static u32 tg3TsoFwData[] = { 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x362e3000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, @@ -5528,7 +5583,7 @@ static const u32 tg3TsoFwData[] = { #define TG3_TSO5_FW_BSS_ADDR 0x00010f50 #define TG3_TSO5_FW_BSS_LEN 0x88 -static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { +static u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { 0x0c004003, 0x00000000, 0x00010f04, 0x00000000, 0x10000003, 0x00000000, 0x0000000d, 0x0000000d, 0x3c1d0001, 0x37bde000, 0x03a0f021, 0x3c100001, 0x26100000, 0x0c004010, 0x00000000, 0x0000000d, 0x27bdffe0, 0x3c04fefe, @@ -5687,14 +5742,14 @@ static const u32 tg3Tso5FwText[(TG3_TSO5_FW_TEXT_LEN / 4) + 1] = { 0x00000000, 0x00000000, 0x00000000, }; -static const u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = { +static u32 tg3Tso5FwRodata[(TG3_TSO5_FW_RODATA_LEN / 4) + 1] = { 0x4d61696e, 0x43707542, 0x00000000, 0x4d61696e, 0x43707541, 0x00000000, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x73746b6f, 0x66666c64, 0x00000000, 0x00000000, 0x66617461, 0x6c457272, 0x00000000, 0x00000000, 0x00000000, }; -static const u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = { +static u32 tg3Tso5FwData[(TG3_TSO5_FW_DATA_LEN / 4) + 1] = { 0x00000000, 0x73746b6f, 0x66666c64, 0x5f76312e, 0x322e3000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, }; diff --git a/trunk/drivers/net/typhoon.c b/trunk/drivers/net/typhoon.c index c6e601dc6bbc..4103c37172f9 100644 --- a/trunk/drivers/net/typhoon.c +++ b/trunk/drivers/net/typhoon.c @@ -830,7 +830,7 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev) first_txd->addrHi = (u64)((unsigned long) skb) >> 32; first_txd->processFlags = 0; - if(skb->ip_summed == CHECKSUM_PARTIAL) { + if(skb->ip_summed == CHECKSUM_HW) { /* The 3XP will figure out if this is UDP/TCP */ first_txd->processFlags |= TYPHOON_TX_PF_TCP_CHKSUM; first_txd->processFlags |= TYPHOON_TX_PF_UDP_CHKSUM; diff --git a/trunk/drivers/net/via-rhine.c b/trunk/drivers/net/via-rhine.c index 66547159bfd9..ae971080e2e4 100644 --- a/trunk/drivers/net/via-rhine.c +++ b/trunk/drivers/net/via-rhine.c @@ -1230,7 +1230,7 @@ static int rhine_start_tx(struct sk_buff *skb, struct net_device *dev) rp->tx_skbuff[entry] = skb; if ((rp->quirks & rqRhineI) && - (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_PARTIAL)) { + (((unsigned long)skb->data & 3) || skb_shinfo(skb)->nr_frags != 0 || skb->ip_summed == CHECKSUM_HW)) { /* Must use alignment buffer. */ if (skb->len > PKT_BUF_SZ) { /* packet too long, drop it */ diff --git a/trunk/drivers/net/via-velocity.c b/trunk/drivers/net/via-velocity.c index f1e0c746a388..aa9cd92f46b2 100644 --- a/trunk/drivers/net/via-velocity.c +++ b/trunk/drivers/net/via-velocity.c @@ -2002,7 +2002,7 @@ static int velocity_xmit(struct sk_buff *skb, struct net_device *dev) * Handle hardware checksum */ if ((vptr->flags & VELOCITY_FLAGS_TX_CSUM) - && (skb->ip_summed == CHECKSUM_PARTIAL)) { + && (skb->ip_summed == CHECKSUM_HW)) { struct iphdr *ip = skb->nh.iph; if (ip->protocol == IPPROTO_TCP) td_ptr->tdesc1.TCR |= TCR0_TCPCK; diff --git a/trunk/drivers/video/riva/fbdev.c b/trunk/drivers/video/riva/fbdev.c index 4acde4f7dbf8..67d1e1c8813d 100644 --- a/trunk/drivers/video/riva/fbdev.c +++ b/trunk/drivers/video/riva/fbdev.c @@ -1826,8 +1826,8 @@ static int __devinit riva_get_EDID_OF(struct fb_info *info, struct pci_dev *pd) { struct riva_par *par = info->par; struct device_node *dp; - const unsigned char *pedid = NULL; - const unsigned char *disptype = NULL; + unsigned char *pedid = NULL; + unsigned char *disptype = NULL; static char *propnames[] = { "DFP,EDID", "LCD,EDID", "EDID", "EDID1", "EDID,B", "EDID,A", NULL }; int i; diff --git a/trunk/fs/nfs/dir.c b/trunk/fs/nfs/dir.c index e7ffb4deb3e5..094afded2b11 100644 --- a/trunk/fs/nfs/dir.c +++ b/trunk/fs/nfs/dir.c @@ -1638,35 +1638,134 @@ static int nfs_rename(struct inode *old_dir, struct dentry *old_dentry, return error; } -int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +static void nfs_access_free_entry(struct nfs_access_entry *entry) +{ + put_rpccred(entry->cred); + kfree(entry); +} + +static void __nfs_access_zap_cache(struct inode *inode) { struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; + struct rb_root *root_node = &nfsi->access_cache; + struct rb_node *n, *dispose = NULL; + struct nfs_access_entry *entry; + + /* Unhook entries from the cache */ + while ((n = rb_first(root_node)) != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + rb_erase(n, root_node); + n->rb_left = dispose; + dispose = n; + } + nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + spin_unlock(&inode->i_lock); - if (cache->cred != cred - || time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) - || (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)) - return -ENOENT; - memcpy(res, cache, sizeof(*res)); - return 0; + /* Now kill them all! */ + while (dispose != NULL) { + n = dispose; + dispose = n->rb_left; + nfs_access_free_entry(rb_entry(n, struct nfs_access_entry, rb_node)); + } } -void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +void nfs_access_zap_cache(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct nfs_access_entry *cache = &nfsi->cache_access; + spin_lock(&inode->i_lock); + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); +} - if (cache->cred != set->cred) { - if (cache->cred) - put_rpccred(cache->cred); - cache->cred = get_rpccred(set->cred); +static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, struct rpc_cred *cred) +{ + struct rb_node *n = NFS_I(inode)->access_cache.rb_node; + struct nfs_access_entry *entry; + + while (n != NULL) { + entry = rb_entry(n, struct nfs_access_entry, rb_node); + + if (cred < entry->cred) + n = n->rb_left; + else if (cred > entry->cred) + n = n->rb_right; + else + return entry; } - /* FIXME: replace current access_cache BKL reliance with inode->i_lock */ + return NULL; +} + +int nfs_access_get_cached(struct inode *inode, struct rpc_cred *cred, struct nfs_access_entry *res) +{ + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_access_entry *cache; + int err = -ENOENT; + spin_lock(&inode->i_lock); - nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS; + if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS) + goto out_zap; + cache = nfs_access_search_rbtree(inode, cred); + if (cache == NULL) + goto out; + if (time_after(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) + goto out_stale; + res->jiffies = cache->jiffies; + res->cred = cache->cred; + res->mask = cache->mask; + err = 0; +out: + spin_unlock(&inode->i_lock); + return err; +out_stale: + rb_erase(&cache->rb_node, &nfsi->access_cache); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(cache); + return -ENOENT; +out_zap: + /* This will release the spinlock */ + __nfs_access_zap_cache(inode); + return -ENOENT; +} + +static void nfs_access_add_rbtree(struct inode *inode, struct nfs_access_entry *set) +{ + struct rb_root *root_node = &NFS_I(inode)->access_cache; + struct rb_node **p = &root_node->rb_node; + struct rb_node *parent = NULL; + struct nfs_access_entry *entry; + + spin_lock(&inode->i_lock); + while (*p != NULL) { + parent = *p; + entry = rb_entry(parent, struct nfs_access_entry, rb_node); + + if (set->cred < entry->cred) + p = &parent->rb_left; + else if (set->cred > entry->cred) + p = &parent->rb_right; + else + goto found; + } + rb_link_node(&set->rb_node, parent, p); + rb_insert_color(&set->rb_node, root_node); spin_unlock(&inode->i_lock); + return; +found: + rb_replace_node(parent, &set->rb_node, root_node); + spin_unlock(&inode->i_lock); + nfs_access_free_entry(entry); +} + +void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) +{ + struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); + if (cache == NULL) + return; + RB_CLEAR_NODE(&cache->rb_node); cache->jiffies = set->jiffies; + cache->cred = get_rpccred(set->cred); cache->mask = set->mask; + + nfs_access_add_rbtree(inode, cache); } static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) diff --git a/trunk/fs/nfs/inode.c b/trunk/fs/nfs/inode.c index d349fb2245da..b94ab060bb1e 100644 --- a/trunk/fs/nfs/inode.c +++ b/trunk/fs/nfs/inode.c @@ -76,19 +76,14 @@ int nfs_write_inode(struct inode *inode, int sync) void nfs_clear_inode(struct inode *inode) { - struct nfs_inode *nfsi = NFS_I(inode); - struct rpc_cred *cred; - /* * The following should never happen... */ BUG_ON(nfs_have_writebacks(inode)); - BUG_ON (!list_empty(&nfsi->open_files)); + BUG_ON(!list_empty(&NFS_I(inode)->open_files)); + BUG_ON(atomic_read(&NFS_I(inode)->data_updates) != 0); nfs_zap_acl_cache(inode); - cred = nfsi->cache_access.cred; - if (cred) - put_rpccred(cred); - BUG_ON(atomic_read(&nfsi->data_updates) != 0); + nfs_access_zap_cache(inode); } /** @@ -290,7 +285,7 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr) nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = jiffies; memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); - nfsi->cache_access.cred = NULL; + nfsi->access_cache = RB_ROOT; unlock_new_inode(inode); } else diff --git a/trunk/include/asm-ppc/ibm4xx.h b/trunk/include/asm-ppc/ibm4xx.h index 499c14691c71..cf62b69cb69a 100644 --- a/trunk/include/asm-ppc/ibm4xx.h +++ b/trunk/include/asm-ppc/ibm4xx.h @@ -86,7 +86,7 @@ void ppc4xx_init(unsigned long r3, unsigned long r4, unsigned long r5, #define PCI_DRAM_OFFSET 0 #endif -#elif defined(CONFIG_44x) +#elif CONFIG_44x #if defined(CONFIG_BAMBOO) #include diff --git a/trunk/include/linux/bootmem.h b/trunk/include/linux/bootmem.h index e319c649e4fd..1021f508d82c 100644 --- a/trunk/include/linux/bootmem.h +++ b/trunk/include/linux/bootmem.h @@ -114,7 +114,7 @@ extern void *__init alloc_large_system_hash(const char *tablename, #else #define HASHDIST_DEFAULT 0 #endif -extern int hashdist; /* Distribute hashes across NUMA nodes? */ +extern int __initdata hashdist; /* Distribute hashes across NUMA nodes? */ #endif /* _LINUX_BOOTMEM_H */ diff --git a/trunk/include/linux/dccp.h b/trunk/include/linux/dccp.h index 2d7671c92c0b..676333b9fad0 100644 --- a/trunk/include/linux/dccp.h +++ b/trunk/include/linux/dccp.h @@ -438,7 +438,6 @@ struct dccp_ackvec; * @dccps_role - Role of this sock, one of %dccp_role * @dccps_ndp_count - number of Non Data Packets since last data packet * @dccps_hc_rx_ackvec - rx half connection ack vector - * @dccps_xmit_timer - timer for when CCID is not ready to send */ struct dccp_sock { /* inet_connection_sock has to be the first member of dccp_sock */ @@ -471,7 +470,6 @@ struct dccp_sock { enum dccp_role dccps_role:2; __u8 dccps_hc_rx_insert_options:1; __u8 dccps_hc_tx_insert_options:1; - struct timer_list dccps_xmit_timer; }; static inline struct dccp_sock *dccp_sk(const struct sock *sk) diff --git a/trunk/include/linux/fib_rules.h b/trunk/include/linux/fib_rules.h deleted file mode 100644 index 4418c8d9d479..000000000000 --- a/trunk/include/linux/fib_rules.h +++ /dev/null @@ -1,65 +0,0 @@ -#ifndef __LINUX_FIB_RULES_H -#define __LINUX_FIB_RULES_H - -#include -#include - -/* rule is permanent, and cannot be deleted */ -#define FIB_RULE_PERMANENT 1 - -struct fib_rule_hdr -{ - __u8 family; - __u8 dst_len; - __u8 src_len; - __u8 tos; - - __u8 table; - __u8 res1; /* reserved */ - __u8 res2; /* reserved */ - __u8 action; - - __u32 flags; -}; - -enum -{ - FRA_UNSPEC, - FRA_DST, /* destination address */ - FRA_SRC, /* source address */ - FRA_IFNAME, /* interface name */ - FRA_UNUSED1, - FRA_UNUSED2, - FRA_PRIORITY, /* priority/preference */ - FRA_UNUSED3, - FRA_UNUSED4, - FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark */ - FRA_FLOW, /* flow/class id */ - FRA_UNUSED6, - FRA_UNUSED7, - FRA_UNUSED8, - FRA_TABLE, /* Extended table id */ - FRA_FWMASK, /* mask for netfilter mark */ - __FRA_MAX -}; - -#define FRA_MAX (__FRA_MAX - 1) - -enum -{ - FR_ACT_UNSPEC, - FR_ACT_TO_TBL, /* Pass to fixed table */ - FR_ACT_RES1, - FR_ACT_RES2, - FR_ACT_RES3, - FR_ACT_RES4, - FR_ACT_BLACKHOLE, /* Drop without notification */ - FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ - FR_ACT_PROHIBIT, /* Drop with EACCES */ - __FR_ACT_MAX, -}; - -#define FR_ACT_MAX (__FR_ACT_MAX - 1) - -#endif diff --git a/trunk/include/linux/filter.h b/trunk/include/linux/filter.h index 91b2e3b9251e..c6cb8f095088 100644 --- a/trunk/include/linux/filter.h +++ b/trunk/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,9 +41,8 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct rcu_head rcu; - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) diff --git a/trunk/include/linux/genetlink.h b/trunk/include/linux/genetlink.h index 9049dc65ae51..84f12a41dc01 100644 --- a/trunk/include/linux/genetlink.h +++ b/trunk/include/linux/genetlink.h @@ -16,8 +16,6 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) -#define GENL_ADMIN_PERM 0x01 - /* * List of reserved static generic netlink identifiers: */ @@ -45,25 +43,9 @@ enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, - CTRL_ATTR_VERSION, - CTRL_ATTR_HDRSIZE, - CTRL_ATTR_MAXATTR, - CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) -enum { - CTRL_ATTR_OP_UNSPEC, - CTRL_ATTR_OP_ID, - CTRL_ATTR_OP_FLAGS, - CTRL_ATTR_OP_POLICY, - CTRL_ATTR_OP_DOIT, - CTRL_ATTR_OP_DUMPIT, - __CTRL_ATTR_OP_MAX, -}; - -#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) - #endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/trunk/include/linux/if.h b/trunk/include/linux/if.h index cd080d765324..374e20ad8b0d 100644 --- a/trunk/include/linux/if.h +++ b/trunk/include/linux/if.h @@ -212,134 +212,5 @@ struct ifconf #define ifc_buf ifc_ifcu.ifcu_buf /* buffer address */ #define ifc_req ifc_ifcu.ifcu_req /* array of structures */ -/* The struct should be in sync with struct net_device_stats */ -struct rtnl_link_stats -{ - __u32 rx_packets; /* total packets received */ - __u32 tx_packets; /* total packets transmitted */ - __u32 rx_bytes; /* total bytes received */ - __u32 tx_bytes; /* total bytes transmitted */ - __u32 rx_errors; /* bad packets received */ - __u32 tx_errors; /* packet transmit problems */ - __u32 rx_dropped; /* no space in linux buffers */ - __u32 tx_dropped; /* no space available in linux */ - __u32 multicast; /* multicast packets received */ - __u32 collisions; - - /* detailed rx_errors: */ - __u32 rx_length_errors; - __u32 rx_over_errors; /* receiver ring buff overflow */ - __u32 rx_crc_errors; /* recved pkt with crc error */ - __u32 rx_frame_errors; /* recv'd frame alignment error */ - __u32 rx_fifo_errors; /* recv'r fifo overrun */ - __u32 rx_missed_errors; /* receiver missed packet */ - - /* detailed tx_errors */ - __u32 tx_aborted_errors; - __u32 tx_carrier_errors; - __u32 tx_fifo_errors; - __u32 tx_heartbeat_errors; - __u32 tx_window_errors; - - /* for cslip etc */ - __u32 rx_compressed; - __u32 tx_compressed; -}; - -/* The struct should be in sync with struct ifmap */ -struct rtnl_link_ifmap -{ - __u64 mem_start; - __u64 mem_end; - __u64 base_addr; - __u16 irq; - __u8 dma; - __u8 port; -}; - -enum -{ - IFLA_UNSPEC, - IFLA_ADDRESS, - IFLA_BROADCAST, - IFLA_IFNAME, - IFLA_MTU, - IFLA_LINK, - IFLA_QDISC, - IFLA_STATS, - IFLA_COST, -#define IFLA_COST IFLA_COST - IFLA_PRIORITY, -#define IFLA_PRIORITY IFLA_PRIORITY - IFLA_MASTER, -#define IFLA_MASTER IFLA_MASTER - IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ -#define IFLA_WIRELESS IFLA_WIRELESS - IFLA_PROTINFO, /* Protocol specific information for a link */ -#define IFLA_PROTINFO IFLA_PROTINFO - IFLA_TXQLEN, -#define IFLA_TXQLEN IFLA_TXQLEN - IFLA_MAP, -#define IFLA_MAP IFLA_MAP - IFLA_WEIGHT, -#define IFLA_WEIGHT IFLA_WEIGHT - IFLA_OPERSTATE, - IFLA_LINKMODE, - __IFLA_MAX -}; - - -#define IFLA_MAX (__IFLA_MAX - 1) - -/* ifi_flags. - - IFF_* flags. - - The only change is: - IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are - more not changeable by user. They describe link media - characteristics and set by device driver. - - Comments: - - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid - - If neither of these three flags are set; - the interface is NBMA. - - - IFF_MULTICAST does not mean anything special: - multicasts can be used on all not-NBMA links. - IFF_MULTICAST means that this media uses special encapsulation - for multicast frames. Apparently, all IFF_POINTOPOINT and - IFF_BROADCAST devices are able to use multicasts too. - */ - -/* IFLA_LINK. - For usual devices it is equal ifi_index. - If it is a "virtual interface" (f.e. tunnel), ifi_link - can point to real physical interface (f.e. for bandwidth calculations), - or maybe 0, what means, that real media is unknown (usual - for IPIP tunnels, when route to endpoint is allowed to change) - */ - -/* Subtype attributes for IFLA_PROTINFO */ -enum -{ - IFLA_INET6_UNSPEC, - IFLA_INET6_FLAGS, /* link flags */ - IFLA_INET6_CONF, /* sysctl parameters */ - IFLA_INET6_STATS, /* statistics */ - IFLA_INET6_MCAST, /* MC things. What of them? */ - IFLA_INET6_CACHEINFO, /* time values and max reasm size */ - __IFLA_INET6_MAX -}; - -#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) - -struct ifla_cacheinfo -{ - __u32 max_reasm_len; - __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ - __u32 reachable_time; - __u32 retrans_time; -}; #endif /* _LINUX_IF_H */ diff --git a/trunk/include/linux/if_addr.h b/trunk/include/linux/if_addr.h deleted file mode 100644 index dbe8f6120a40..000000000000 --- a/trunk/include/linux/if_addr.h +++ /dev/null @@ -1,55 +0,0 @@ -#ifndef __LINUX_IF_ADDR_H -#define __LINUX_IF_ADDR_H - -#include - -struct ifaddrmsg -{ - __u8 ifa_family; - __u8 ifa_prefixlen; /* The prefix length */ - __u8 ifa_flags; /* Flags */ - __u8 ifa_scope; /* Address scope */ - __u32 ifa_index; /* Link index */ -}; - -/* - * Important comment: - * IFA_ADDRESS is prefix address, rather than local interface address. - * It makes no difference for normally configured broadcast interfaces, - * but for point-to-point IFA_ADDRESS is DESTINATION address, - * local address is supplied in IFA_LOCAL attribute. - */ -enum -{ - IFA_UNSPEC, - IFA_ADDRESS, - IFA_LOCAL, - IFA_LABEL, - IFA_BROADCAST, - IFA_ANYCAST, - IFA_CACHEINFO, - IFA_MULTICAST, - __IFA_MAX, -}; - -#define IFA_MAX (__IFA_MAX - 1) - -/* ifa_flags */ -#define IFA_F_SECONDARY 0x01 -#define IFA_F_TEMPORARY IFA_F_SECONDARY - -#define IFA_F_NODAD 0x02 -#define IFA_F_HOMEADDRESS 0x10 -#define IFA_F_DEPRECATED 0x20 -#define IFA_F_TENTATIVE 0x40 -#define IFA_F_PERMANENT 0x80 - -struct ifa_cacheinfo -{ - __u32 ifa_prefered; - __u32 ifa_valid; - __u32 cstamp; /* created timestamp, hundredths of seconds */ - __u32 tstamp; /* updated timestamp, hundredths of seconds */ -}; - -#endif diff --git a/trunk/include/linux/in.h b/trunk/include/linux/in.h index bcaca8399aed..94f557fa4636 100644 --- a/trunk/include/linux/in.h +++ b/trunk/include/linux/in.h @@ -52,7 +52,7 @@ enum { /* Internet address. */ struct in_addr { - __be32 s_addr; + __u32 s_addr; }; #define IP_TOS 1 @@ -177,7 +177,7 @@ struct in_pktinfo #define __SOCK_SIZE__ 16 /* sizeof(struct sockaddr) */ struct sockaddr_in { sa_family_t sin_family; /* Address family */ - __be16 sin_port; /* Port number */ + unsigned short int sin_port; /* Port number */ struct in_addr sin_addr; /* Internet address */ /* Pad to size of `struct sockaddr'. */ diff --git a/trunk/include/linux/in6.h b/trunk/include/linux/in6.h index d776829b443f..304aaedea305 100644 --- a/trunk/include/linux/in6.h +++ b/trunk/include/linux/in6.h @@ -134,7 +134,6 @@ struct in6_flowlabel_req #define IPPROTO_ICMPV6 58 /* ICMPv6 */ #define IPPROTO_NONE 59 /* IPv6 no next header */ #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ -#define IPPROTO_MH 135 /* IPv6 mobility header */ /* * IPv6 TLV options. @@ -143,7 +142,6 @@ struct in6_flowlabel_req #define IPV6_TLV_PADN 1 #define IPV6_TLV_ROUTERALERT 5 #define IPV6_TLV_JUMBO 194 -#define IPV6_TLV_HAO 201 /* home address option */ /* * IPV6 socket options diff --git a/trunk/include/linux/inet.h b/trunk/include/linux/inet.h index b7c6da7d6d32..6c5587af118d 100644 --- a/trunk/include/linux/inet.h +++ b/trunk/include/linux/inet.h @@ -46,7 +46,5 @@ #include extern __be32 in_aton(const char *str); -extern int in4_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); -extern int in6_pton(const char *src, int srclen, u8 *dst, char delim, const char **end); #endif #endif /* _LINUX_INET_H */ diff --git a/trunk/include/linux/ip.h b/trunk/include/linux/ip.h index 2f4600146f83..4b55cf1df732 100644 --- a/trunk/include/linux/ip.h +++ b/trunk/include/linux/ip.h @@ -57,7 +57,6 @@ #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) -#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_RR (7 |IPOPT_CONTROL) #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) diff --git a/trunk/include/linux/ipv6.h b/trunk/include/linux/ipv6.h index caca57df0d7d..297853c841b4 100644 --- a/trunk/include/linux/ipv6.h +++ b/trunk/include/linux/ipv6.h @@ -29,7 +29,6 @@ struct in6_ifreq { #define IPV6_SRCRT_STRICT 0x01 /* this hop must be a neighbor */ #define IPV6_SRCRT_TYPE_0 0 /* IPv6 type 0 Routing Header */ -#define IPV6_SRCRT_TYPE_2 2 /* IPv6 type 2 Routing Header */ /* * routing header @@ -74,28 +73,6 @@ struct rt0_hdr { #define rt0_type rt_hdr.type }; -/* - * routing header type 2 - */ - -struct rt2_hdr { - struct ipv6_rt_hdr rt_hdr; - __u32 reserved; - struct in6_addr addr; - -#define rt2_type rt_hdr.type -}; - -/* - * home address option in destination options header - */ - -struct ipv6_destopt_hao { - __u8 type; - __u8 length; - struct in6_addr addr; -} __attribute__ ((__packed__)); - struct ipv6_auth_hdr { __u8 nexthdr; __u8 hdrlen; /* This one is measured in 32 bit units! */ @@ -176,7 +153,6 @@ struct ipv6_devconf { __s32 accept_ra_rt_info_max_plen; #endif #endif - __s32 proxy_ndp; void *sysctl; }; @@ -204,7 +180,6 @@ enum { DEVCONF_ACCEPT_RA_RTR_PREF, DEVCONF_RTR_PROBE_INTERVAL, DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN, - DEVCONF_PROXY_NDP, DEVCONF_MAX }; @@ -231,9 +206,6 @@ struct inet6_skb_parm { __u16 lastopt; __u32 nhoff; __u16 flags; -#ifdef CONFIG_IPV6_MIP6 - __u16 dsthao; -#endif #define IP6SKB_XFRM_TRANSFORMED 1 }; @@ -270,9 +242,6 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; -#ifdef CONFIG_IPV6_SUBTREES - struct in6_addr *saddr_cache; -#endif __u32 flow_label; __u32 frag_size; diff --git a/trunk/include/linux/kernel.h b/trunk/include/linux/kernel.h index 2b2ae4fdce8b..851aa1bcfc1a 100644 --- a/trunk/include/linux/kernel.h +++ b/trunk/include/linux/kernel.h @@ -31,7 +31,7 @@ extern const char linux_banner[]; #define STACK_MAGIC 0xdeadbeef #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) -#define ALIGN(x,a) (((x)+(a)-1UL)&~((a)-1UL)) +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define roundup(x, y) ((((x) + ((y) - 1)) / (y)) * (y)) diff --git a/trunk/include/linux/neighbour.h b/trunk/include/linux/neighbour.h deleted file mode 100644 index bd3bbf668cdb..000000000000 --- a/trunk/include/linux/neighbour.h +++ /dev/null @@ -1,159 +0,0 @@ -#ifndef __LINUX_NEIGHBOUR_H -#define __LINUX_NEIGHBOUR_H - -#include - -struct ndmsg -{ - __u8 ndm_family; - __u8 ndm_pad1; - __u16 ndm_pad2; - __s32 ndm_ifindex; - __u16 ndm_state; - __u8 ndm_flags; - __u8 ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT is also cannot be deleted by garbage collectors. - */ - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - -/***************************************************************** - * Neighbour tables specific messages. - * - * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the - * NLM_F_DUMP flag set. Every neighbour table configuration is - * spread over multiple messages to avoid running into message - * size limits on systems with many interfaces. The first message - * in the sequence transports all not device specific data such as - * statistics, configuration, and the default parameter set. - * This message is followed by 0..n messages carrying device - * specific parameter sets. - * Although the ordering should be sufficient, NDTA_NAME can be - * used to identify sequences. The initial message can be identified - * by checking for NDTA_CONFIG. The device specific messages do - * not contain this TLV but have NDTPA_IFINDEX set to the - * corresponding interface index. - * - * To change neighbour table attributes, send RTM_SETNEIGHTBL - * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], - * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked - * otherwise. Device specific parameter sets can be changed by - * setting NDTPA_IFINDEX to the interface index of the corresponding - * device. - ****/ - -struct ndt_stats -{ - __u64 ndts_allocs; - __u64 ndts_destroys; - __u64 ndts_hash_grows; - __u64 ndts_res_failed; - __u64 ndts_lookups; - __u64 ndts_hits; - __u64 ndts_rcv_probes_mcast; - __u64 ndts_rcv_probes_ucast; - __u64 ndts_periodic_gc_runs; - __u64 ndts_forced_gc_runs; -}; - -enum { - NDTPA_UNSPEC, - NDTPA_IFINDEX, /* u32, unchangeable */ - NDTPA_REFCNT, /* u32, read-only */ - NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ - NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ - NDTPA_RETRANS_TIME, /* u64, msecs */ - NDTPA_GC_STALETIME, /* u64, msecs */ - NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ - NDTPA_QUEUE_LEN, /* u32 */ - NDTPA_APP_PROBES, /* u32 */ - NDTPA_UCAST_PROBES, /* u32 */ - NDTPA_MCAST_PROBES, /* u32 */ - NDTPA_ANYCAST_DELAY, /* u64, msecs */ - NDTPA_PROXY_DELAY, /* u64, msecs */ - NDTPA_PROXY_QLEN, /* u32 */ - NDTPA_LOCKTIME, /* u64, msecs */ - __NDTPA_MAX -}; -#define NDTPA_MAX (__NDTPA_MAX - 1) - -struct ndtmsg -{ - __u8 ndtm_family; - __u8 ndtm_pad1; - __u16 ndtm_pad2; -}; - -struct ndt_config -{ - __u16 ndtc_key_len; - __u16 ndtc_entry_size; - __u32 ndtc_entries; - __u32 ndtc_last_flush; /* delta to now in msecs */ - __u32 ndtc_last_rand; /* delta to now in msecs */ - __u32 ndtc_hash_rnd; - __u32 ndtc_hash_mask; - __u32 ndtc_hash_chain_gc; - __u32 ndtc_proxy_qlen; -}; - -enum { - NDTA_UNSPEC, - NDTA_NAME, /* char *, unchangeable */ - NDTA_THRESH1, /* u32 */ - NDTA_THRESH2, /* u32 */ - NDTA_THRESH3, /* u32 */ - NDTA_CONFIG, /* struct ndt_config, read-only */ - NDTA_PARMS, /* nested TLV NDTPA_* */ - NDTA_STATS, /* struct ndt_stats, read-only */ - NDTA_GC_INTERVAL, /* u64, msecs */ - __NDTA_MAX -}; -#define NDTA_MAX (__NDTA_MAX - 1) - -#endif diff --git a/trunk/include/linux/net.h b/trunk/include/linux/net.h index c257f716e00f..b20c53c74413 100644 --- a/trunk/include/linux/net.h +++ b/trunk/include/linux/net.h @@ -169,6 +169,11 @@ struct proto_ops { struct net_proto_family { int family; int (*create)(struct socket *sock, int protocol); + /* These are counters for the number of different methods of + each we support */ + short authentication; + short encryption; + short encrypt_net; struct module *owner; }; @@ -176,8 +181,8 @@ struct iovec; struct kvec; extern int sock_wake_async(struct socket *sk, int how, int band); -extern int sock_register(const struct net_proto_family *fam); -extern void sock_unregister(int family); +extern int sock_register(struct net_proto_family *fam); +extern int sock_unregister(int family); extern int sock_create(int family, int type, int proto, struct socket **res); extern int sock_create_kern(int family, int type, int proto, @@ -203,25 +208,6 @@ extern int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len, int flags); -extern int kernel_bind(struct socket *sock, struct sockaddr *addr, - int addrlen); -extern int kernel_listen(struct socket *sock, int backlog); -extern int kernel_accept(struct socket *sock, struct socket **newsock, - int flags); -extern int kernel_connect(struct socket *sock, struct sockaddr *addr, - int addrlen, int flags); -extern int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen); -extern int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen); -extern int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen); -extern int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen); -extern int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags); -extern int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); - #ifndef CONFIG_SMP #define SOCKOPS_WRAPPED(name) name #define SOCKOPS_WRAP(name, fam) diff --git a/trunk/include/linux/netdevice.h b/trunk/include/linux/netdevice.h index 4f2c2b6beb5e..50a4719512ed 100644 --- a/trunk/include/linux/netdevice.h +++ b/trunk/include/linux/netdevice.h @@ -976,7 +976,7 @@ extern void dev_mcast_init(void); extern int netdev_max_backlog; extern int weight_p; extern int netdev_set_master(struct net_device *dev, struct net_device *master); -extern int skb_checksum_help(struct sk_buff *skb); +extern int skb_checksum_help(struct sk_buff *skb, int inward); extern struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features); #ifdef CONFIG_BUG extern void netdev_rx_csum_fault(struct net_device *dev); @@ -1012,7 +1012,7 @@ static inline int netif_needs_gso(struct net_device *dev, struct sk_buff *skb) { return skb_is_gso(skb) && (!skb_gso_ok(skb, dev->features) || - unlikely(skb->ip_summed != CHECKSUM_PARTIAL)); + unlikely(skb->ip_summed != CHECKSUM_HW)); } /* On bonding slaves other than the currently active slave, suppress diff --git a/trunk/include/linux/netfilter.h b/trunk/include/linux/netfilter.h index b7e67d1d4382..10168e26a846 100644 --- a/trunk/include/linux/netfilter.h +++ b/trunk/include/linux/netfilter.h @@ -282,12 +282,6 @@ extern void nf_invalidate_cache(int pf); Returns true or false. */ extern int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len); -extern u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, - u_int32_t csum); -extern u_int16_t nf_proto_csum_update(struct sk_buff *skb, - u_int32_t oldval, u_int32_t newval, - u_int16_t csum, int pseudohdr); - struct nf_afinfo { unsigned short family; unsigned int (*checksum)(struct sk_buff *skb, unsigned int hook, diff --git a/trunk/include/linux/netfilter/nf_conntrack_common.h b/trunk/include/linux/netfilter/nf_conntrack_common.h index 9e0dae07861e..d2e4bd7a7a14 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_common.h +++ b/trunk/include/linux/netfilter/nf_conntrack_common.h @@ -125,10 +125,6 @@ enum ip_conntrack_events /* Counter highest bit has been set */ IPCT_COUNTER_FILLING_BIT = 11, IPCT_COUNTER_FILLING = (1 << IPCT_COUNTER_FILLING_BIT), - - /* Mark is set */ - IPCT_MARK_BIT = 12, - IPCT_MARK = (1 << IPCT_MARK_BIT), }; enum ip_conntrack_expect_events { diff --git a/trunk/include/linux/netfilter/nf_conntrack_tcp.h b/trunk/include/linux/netfilter/nf_conntrack_tcp.h index 6b01ba297727..b2feeffde384 100644 --- a/trunk/include/linux/netfilter/nf_conntrack_tcp.h +++ b/trunk/include/linux/netfilter/nf_conntrack_tcp.h @@ -49,7 +49,6 @@ struct ip_ct_tcp u_int32_t last_seq; /* Last sequence number seen in dir */ u_int32_t last_ack; /* Last sequence number seen in opposite dir */ u_int32_t last_end; /* Last seq + len */ - u_int16_t last_win; /* Last window advertisement seen in dir */ }; #endif /* __KERNEL__ */ diff --git a/trunk/include/linux/netfilter/nfnetlink.h b/trunk/include/linux/netfilter/nfnetlink.h index 6d8e3e5a80e9..9f5b12cf489b 100644 --- a/trunk/include/linux/netfilter/nfnetlink.h +++ b/trunk/include/linux/netfilter/nfnetlink.h @@ -43,7 +43,7 @@ struct nfattr u_int16_t nfa_len; u_int16_t nfa_type; /* we use 15 bits for the type, and the highest * bit to indicate whether the payload is nested */ -}; +} __attribute__ ((packed)); /* FIXME: Apart from NFNL_NFA_NESTED shamelessly copy and pasted from * rtnetlink.h, it's time to put this in a generic file */ @@ -79,7 +79,7 @@ struct nfgenmsg { u_int8_t nfgen_family; /* AF_xxx */ u_int8_t version; /* nfnetlink version */ u_int16_t res_id; /* resource id */ -}; +} __attribute__ ((packed)); #define NFNETLINK_V0 0 diff --git a/trunk/include/linux/netfilter/nfnetlink_log.h b/trunk/include/linux/netfilter/nfnetlink_log.h index 87b92f8b988f..a7497c7436df 100644 --- a/trunk/include/linux/netfilter/nfnetlink_log.h +++ b/trunk/include/linux/netfilter/nfnetlink_log.h @@ -19,18 +19,18 @@ struct nfulnl_msg_packet_hdr { u_int16_t hw_protocol; /* hw protocol (network order) */ u_int8_t hook; /* netfilter hook */ u_int8_t _pad; -}; +} __attribute__ ((packed)); struct nfulnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -}; +} __attribute__ ((packed)); struct nfulnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -}; +} __attribute__ ((packed)); #define NFULNL_PREFIXLEN 30 /* just like old log target */ diff --git a/trunk/include/linux/netfilter/nfnetlink_queue.h b/trunk/include/linux/netfilter/nfnetlink_queue.h index 36af0360b56d..9e774373244c 100644 --- a/trunk/include/linux/netfilter/nfnetlink_queue.h +++ b/trunk/include/linux/netfilter/nfnetlink_queue.h @@ -22,12 +22,12 @@ struct nfqnl_msg_packet_hw { u_int16_t hw_addrlen; u_int16_t _pad; u_int8_t hw_addr[8]; -}; +} __attribute__ ((packed)); struct nfqnl_msg_packet_timestamp { aligned_u64 sec; aligned_u64 usec; -}; +} __attribute__ ((packed)); enum nfqnl_attr_type { NFQA_UNSPEC, @@ -49,7 +49,7 @@ enum nfqnl_attr_type { struct nfqnl_msg_verdict_hdr { u_int32_t verdict; u_int32_t id; -}; +} __attribute__ ((packed)); enum nfqnl_msg_config_cmds { @@ -64,7 +64,7 @@ struct nfqnl_msg_config_cmd { u_int8_t command; /* nfqnl_msg_config_cmds */ u_int8_t _pad; u_int16_t pf; /* AF_xxx for PF_[UN]BIND */ -}; +} __attribute__ ((packed)); enum nfqnl_config_mode { NFQNL_COPY_NONE, diff --git a/trunk/include/linux/netfilter/x_tables.h b/trunk/include/linux/netfilter/x_tables.h index 739a98eebe2c..48cc32d83f77 100644 --- a/trunk/include/linux/netfilter/x_tables.h +++ b/trunk/include/linux/netfilter/x_tables.h @@ -138,6 +138,16 @@ struct xt_counters_info #include +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) +#include + +#ifdef CONFIG_COMPAT +#define COMPAT_TO_USER 1 +#define COMPAT_FROM_USER -1 +#define COMPAT_CALC_SIZE 0 +#endif + struct xt_match { struct list_head list; @@ -164,24 +174,21 @@ struct xt_match const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_match *match, void *matchinfo); + void (*destroy)(const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize); /* Called when userspace align differs from kernel space one */ - void (*compat_from_user)(void *dst, void *src); - int (*compat_to_user)(void __user *dst, void *src); + int (*compat)(void *match, void **dstptr, int *size, int convert); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; - /* Free to use by each match */ - unsigned long data; - char *table; unsigned int matchsize; - unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -204,7 +211,8 @@ struct xt_target const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo); + const void *targinfo, + void *userdata); /* Called when user tries to insert an entry of this type: hook_mask is a bitmask of hooks from which it can be @@ -214,21 +222,21 @@ struct xt_target const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask); /* Called when entry of this type deleted. */ - void (*destroy)(const struct xt_target *target, void *targinfo); + void (*destroy)(const struct xt_target *target, void *targinfo, + unsigned int targinfosize); /* Called when userspace align differs from kernel space one */ - void (*compat_from_user)(void *dst, void *src); - int (*compat_to_user)(void __user *dst, void *src); + int (*compat)(void *target, void **dstptr, int *size, int convert); /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; char *table; unsigned int targetsize; - unsigned int compatsize; unsigned int hooks; unsigned short proto; @@ -282,13 +290,8 @@ struct xt_table_info extern int xt_register_target(struct xt_target *target); extern void xt_unregister_target(struct xt_target *target); -extern int xt_register_targets(struct xt_target *target, unsigned int n); -extern void xt_unregister_targets(struct xt_target *target, unsigned int n); - extern int xt_register_match(struct xt_match *target); extern void xt_unregister_match(struct xt_match *target); -extern int xt_register_matches(struct xt_match *match, unsigned int n); -extern void xt_unregister_matches(struct xt_match *match, unsigned int n); extern int xt_check_match(const struct xt_match *match, unsigned short family, unsigned int size, const char *table, unsigned int hook, @@ -385,18 +388,9 @@ struct compat_xt_counters_info extern void xt_compat_lock(int af); extern void xt_compat_unlock(int af); - -extern int xt_compat_match_offset(struct xt_match *match); -extern void xt_compat_match_from_user(struct xt_entry_match *m, - void **dstptr, int *size); -extern int xt_compat_match_to_user(struct xt_entry_match *m, - void * __user *dstptr, int *size); - -extern int xt_compat_target_offset(struct xt_target *target); -extern void xt_compat_target_from_user(struct xt_entry_target *t, - void **dstptr, int *size); -extern int xt_compat_target_to_user(struct xt_entry_target *t, - void * __user *dstptr, int *size); +extern int xt_compat_match(void *match, void **dstptr, int *size, int convert); +extern int xt_compat_target(void *target, void **dstptr, int *size, + int convert); #endif /* CONFIG_COMPAT */ #endif /* __KERNEL__ */ diff --git a/trunk/include/linux/netfilter/xt_DSCP.h b/trunk/include/linux/netfilter/xt_DSCP.h deleted file mode 100644 index 3c7c963997bd..000000000000 --- a/trunk/include/linux/netfilter/xt_DSCP.h +++ /dev/null @@ -1,20 +0,0 @@ -/* x_tables module for setting the IPv4/IPv6 DSCP field - * - * (C) 2002 Harald Welte - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh - * This software is distributed under GNU GPL v2, 1991 - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * xt_DSCP.h,v 1.7 2002/03/14 12:03:13 laforge Exp -*/ -#ifndef _XT_DSCP_TARGET_H -#define _XT_DSCP_TARGET_H -#include - -/* target info */ -struct xt_DSCP_info { - u_int8_t dscp; -}; - -#endif /* _XT_DSCP_TARGET_H */ diff --git a/trunk/include/linux/netfilter/xt_dscp.h b/trunk/include/linux/netfilter/xt_dscp.h deleted file mode 100644 index 1da61e6acaf7..000000000000 --- a/trunk/include/linux/netfilter/xt_dscp.h +++ /dev/null @@ -1,23 +0,0 @@ -/* x_tables module for matching the IPv4/IPv6 DSCP field - * - * (C) 2002 Harald Welte - * This software is distributed under GNU GPL v2, 1991 - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * xt_dscp.h,v 1.3 2002/08/05 19:00:21 laforge Exp -*/ -#ifndef _XT_DSCP_H -#define _XT_DSCP_H - -#define XT_DSCP_MASK 0xfc /* 11111100 */ -#define XT_DSCP_SHIFT 2 -#define XT_DSCP_MAX 0x3f /* 00111111 */ - -/* match info */ -struct xt_dscp_info { - u_int8_t dscp; - u_int8_t invert; -}; - -#endif /* _XT_DSCP_H */ diff --git a/trunk/include/linux/netfilter_arp/arp_tables.h b/trunk/include/linux/netfilter_arp/arp_tables.h index 149e87c9ab13..62cc27daca4e 100644 --- a/trunk/include/linux/netfilter_arp/arp_tables.h +++ b/trunk/include/linux/netfilter_arp/arp_tables.h @@ -248,7 +248,8 @@ extern unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table); + struct arpt_table *table, + void *userdata); #define ARPT_ALIGN(s) (((s) + (__alignof__(struct arpt_entry)-1)) & ~(__alignof__(struct arpt_entry)-1)) #endif /*__KERNEL__*/ diff --git a/trunk/include/linux/netfilter_bridge.h b/trunk/include/linux/netfilter_bridge.h index 9a4dd11af86e..427c67ff89e9 100644 --- a/trunk/include/linux/netfilter_bridge.h +++ b/trunk/include/linux/netfilter_bridge.h @@ -5,8 +5,9 @@ */ #include +#if defined(__KERNEL__) && defined(CONFIG_BRIDGE_NETFILTER) #include -#include +#endif /* Bridge Hooks */ /* After promisc drops, checksum checks. */ @@ -46,20 +47,40 @@ enum nf_br_hook_priorities { /* Only used in br_forward.c */ -extern int nf_bridge_copy_header(struct sk_buff *skb); -static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb) +static inline +int nf_bridge_maybe_copy_header(struct sk_buff *skb) { - if (skb->nf_bridge) - return nf_bridge_copy_header(skb); - return 0; + int err; + + if (skb->nf_bridge) { + if (skb->protocol == __constant_htons(ETH_P_8021Q)) { + err = skb_cow(skb, 18); + if (err) + return err; + memcpy(skb->data - 18, skb->nf_bridge->data, 18); + skb_push(skb, 4); + } else { + err = skb_cow(skb, 16); + if (err) + return err; + memcpy(skb->data - 16, skb->nf_bridge->data, 16); + } + } + return 0; } /* This is called by the IP fragmenting code and it ensures there is * enough room for the encapsulating header (if there is one). */ -static inline int nf_bridge_pad(const struct sk_buff *skb) +static inline +int nf_bridge_pad(struct sk_buff *skb) { - return (skb->nf_bridge && skb->protocol == htons(ETH_P_8021Q)) - ? VLAN_HLEN : 0; + if (skb->protocol == __constant_htons(ETH_P_IP)) + return 0; + if (skb->nf_bridge) { + if (skb->protocol == __constant_htons(ETH_P_8021Q)) + return 4; + } + return 0; } struct bridge_skb_cb { @@ -69,9 +90,6 @@ struct bridge_skb_cb { }; extern int brnf_deferred_hooks; -#else -#define nf_bridge_maybe_copy_header(skb) (0) -#define nf_bridge_pad(skb) (0) #endif /* CONFIG_BRIDGE_NETFILTER */ #endif /* __KERNEL__ */ diff --git a/trunk/include/linux/netfilter_ipv4/ip_conntrack_helper.h b/trunk/include/linux/netfilter_ipv4/ip_conntrack_helper.h index 77fe868d36ff..8d69279ccfe4 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_conntrack_helper.h +++ b/trunk/include/linux/netfilter_ipv4/ip_conntrack_helper.h @@ -25,8 +25,6 @@ struct ip_conntrack_helper struct ip_conntrack *ct, enum ip_conntrack_info conntrackinfo); - void (*destroy)(struct ip_conntrack *ct); - int (*to_nfattr)(struct sk_buff *skb, const struct ip_conntrack *ct); }; diff --git a/trunk/include/linux/netfilter_ipv4/ip_conntrack_pptp.h b/trunk/include/linux/netfilter_ipv4/ip_conntrack_pptp.h index 2644b1faddd6..816144c75de0 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_conntrack_pptp.h +++ b/trunk/include/linux/netfilter_ipv4/ip_conntrack_pptp.h @@ -31,8 +31,8 @@ struct ip_ct_pptp_master { /* everything below is going to be per-expectation in newnat, * since there could be more than one call within one session */ enum pptp_ctrlcall_state cstate; /* call state */ - __be16 pac_call_id; /* call id of PAC, host byte order */ - __be16 pns_call_id; /* call id of PNS, host byte order */ + u_int16_t pac_call_id; /* call id of PAC, host byte order */ + u_int16_t pns_call_id; /* call id of PNS, host byte order */ /* in pre-2.6.11 this used to be per-expect. Now it is per-conntrack * and therefore imposes a fixed limit on the number of maps */ @@ -42,8 +42,8 @@ struct ip_ct_pptp_master { /* conntrack_expect private member */ struct ip_ct_pptp_expect { enum pptp_ctrlcall_state cstate; /* call state */ - __be16 pac_call_id; /* call id of PAC */ - __be16 pns_call_id; /* call id of PNS */ + u_int16_t pac_call_id; /* call id of PAC */ + u_int16_t pns_call_id; /* call id of PNS */ }; @@ -107,7 +107,8 @@ struct PptpControlHeader { struct PptpStartSessionRequest { __be16 protocolVersion; - __u16 reserved1; + __u8 reserved1; + __u8 reserved2; __be32 framingCapability; __be32 bearerCapability; __be16 maxChannels; @@ -142,8 +143,6 @@ struct PptpStartSessionReply { struct PptpStopSessionRequest { __u8 reason; - __u8 reserved1; - __u16 reserved2; }; /* PptpStopSessionResultCode */ @@ -153,7 +152,6 @@ struct PptpStopSessionRequest { struct PptpStopSessionReply { __u8 resultCode; __u8 generalErrorCode; - __u16 reserved1; }; struct PptpEchoRequest { @@ -190,8 +188,9 @@ struct PptpOutCallRequest { __be32 framingType; __be16 packetWindow; __be16 packetProcDelay; - __be16 phoneNumberLength; __u16 reserved1; + __be16 phoneNumberLength; + __u16 reserved2; __u8 phoneNumber[64]; __u8 subAddress[64]; }; @@ -286,19 +285,19 @@ struct PptpSetLinkInfo { }; union pptp_ctrl_union { - struct PptpStartSessionRequest sreq; - struct PptpStartSessionReply srep; - struct PptpStopSessionRequest streq; - struct PptpStopSessionReply strep; - struct PptpOutCallRequest ocreq; - struct PptpOutCallReply ocack; - struct PptpInCallRequest icreq; - struct PptpInCallReply icack; - struct PptpInCallConnected iccon; - struct PptpClearCallRequest clrreq; - struct PptpCallDisconnectNotify disc; - struct PptpWanErrorNotify wanerr; - struct PptpSetLinkInfo setlink; + struct PptpStartSessionRequest sreq; + struct PptpStartSessionReply srep; + struct PptpStopSessionRequest streq; + struct PptpStopSessionReply strep; + struct PptpOutCallRequest ocreq; + struct PptpOutCallReply ocack; + struct PptpInCallRequest icreq; + struct PptpInCallReply icack; + struct PptpInCallConnected iccon; + struct PptpClearCallRequest clrreq; + struct PptpCallDisconnectNotify disc; + struct PptpWanErrorNotify wanerr; + struct PptpSetLinkInfo setlink; }; extern int @@ -315,7 +314,7 @@ extern int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -extern void +extern int (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *exp_orig, struct ip_conntrack_expect *exp_reply); diff --git a/trunk/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h b/trunk/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h index 1d853aa873eb..8d090ef82f5f 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h +++ b/trunk/include/linux/netfilter_ipv4/ip_conntrack_proto_gre.h @@ -49,18 +49,18 @@ struct gre_hdr { #else #error "Adjust your defines" #endif - __be16 protocol; + __u16 protocol; }; /* modified GRE header for PPTP */ struct gre_hdr_pptp { - __u8 flags; /* bitfield */ - __u8 version; /* should be GRE_VERSION_PPTP */ - __be16 protocol; /* should be GRE_PROTOCOL_PPTP */ - __be16 payload_len; /* size of ppp payload, not inc. gre header */ - __be16 call_id; /* peer's call_id for this session */ - __be32 seq; /* sequence number. Present if S==1 */ - __be32 ack; /* seq number of highest packet recieved by */ + __u8 flags; /* bitfield */ + __u8 version; /* should be GRE_VERSION_PPTP */ + __u16 protocol; /* should be GRE_PROTOCOL_PPTP */ + __u16 payload_len; /* size of ppp payload, not inc. gre header */ + __u16 call_id; /* peer's call_id for this session */ + __u32 seq; /* sequence number. Present if S==1 */ + __u32 ack; /* seq number of highest packet recieved by */ /* sender in this session */ }; @@ -92,13 +92,13 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct); /* get pointer to gre key, if present */ -static inline __be32 *gre_key(struct gre_hdr *greh) +static inline u_int32_t *gre_key(struct gre_hdr *greh) { if (!greh->key) return NULL; if (greh->csum || greh->routing) - return (__be32 *) (greh+sizeof(*greh)+4); - return (__be32 *) (greh+sizeof(*greh)); + return (u_int32_t *) (greh+sizeof(*greh)+4); + return (u_int32_t *) (greh+sizeof(*greh)); } /* get pointer ot gre csum, if present */ diff --git a/trunk/include/linux/netfilter_ipv4/ip_nat.h b/trunk/include/linux/netfilter_ipv4/ip_nat.h index 98f8407e4cb5..e9f5ed1d9f68 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_nat.h +++ b/trunk/include/linux/netfilter_ipv4/ip_nat.h @@ -72,6 +72,10 @@ extern unsigned int ip_nat_setup_info(struct ip_conntrack *conntrack, extern int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack); +/* Calculate relative checksum. */ +extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, + u_int32_t newval, + u_int16_t oldcheck); #else /* !__KERNEL__: iptables wants this to compile. */ #define ip_nat_multi_range ip_nat_multi_range_compat #endif /*__KERNEL__*/ diff --git a/trunk/include/linux/netfilter_ipv4/ip_nat_core.h b/trunk/include/linux/netfilter_ipv4/ip_nat_core.h index 60566f9fd7b3..30db23f06b03 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_nat_core.h +++ b/trunk/include/linux/netfilter_ipv4/ip_nat_core.h @@ -11,8 +11,8 @@ extern unsigned int ip_nat_packet(struct ip_conntrack *ct, unsigned int hooknum, struct sk_buff **pskb); -extern int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb); +extern int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir); #endif /* _IP_NAT_CORE_H */ diff --git a/trunk/include/linux/netfilter_ipv4/ip_nat_pptp.h b/trunk/include/linux/netfilter_ipv4/ip_nat_pptp.h index 36668bf0f373..eaf66c2e8f93 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_nat_pptp.h +++ b/trunk/include/linux/netfilter_ipv4/ip_nat_pptp.h @@ -4,8 +4,8 @@ /* conntrack private data */ struct ip_nat_pptp { - __be16 pns_call_id; /* NAT'ed PNS call id */ - __be16 pac_call_id; /* NAT'ed PAC call id */ + u_int16_t pns_call_id; /* NAT'ed PNS call id */ + u_int16_t pac_call_id; /* NAT'ed PAC call id */ }; #endif /* _NAT_PPTP_H */ diff --git a/trunk/include/linux/netfilter_ipv4/ip_tables.h b/trunk/include/linux/netfilter_ipv4/ip_tables.h index a536bbdef145..c0dac16e1902 100644 --- a/trunk/include/linux/netfilter_ipv4/ip_tables.h +++ b/trunk/include/linux/netfilter_ipv4/ip_tables.h @@ -312,7 +312,8 @@ extern unsigned int ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table); + struct ipt_table *table, + void *userdata); #define IPT_ALIGN(s) XT_ALIGN(s) diff --git a/trunk/include/linux/netfilter_ipv4/ipt_DSCP.h b/trunk/include/linux/netfilter_ipv4/ipt_DSCP.h index 3491e524d5ea..b30f510b5bef 100644 --- a/trunk/include/linux/netfilter_ipv4/ipt_DSCP.h +++ b/trunk/include/linux/netfilter_ipv4/ipt_DSCP.h @@ -11,8 +11,10 @@ #ifndef _IPT_DSCP_TARGET_H #define _IPT_DSCP_TARGET_H #include -#include -#define ipt_DSCP_info xt_DSCP_info +/* target info */ +struct ipt_DSCP_info { + u_int8_t dscp; +}; #endif /* _IPT_DSCP_TARGET_H */ diff --git a/trunk/include/linux/netfilter_ipv4/ipt_dscp.h b/trunk/include/linux/netfilter_ipv4/ipt_dscp.h index 4b82ca912b0e..2fa6dfe92894 100644 --- a/trunk/include/linux/netfilter_ipv4/ipt_dscp.h +++ b/trunk/include/linux/netfilter_ipv4/ipt_dscp.h @@ -10,12 +10,14 @@ #ifndef _IPT_DSCP_H #define _IPT_DSCP_H -#include +#define IPT_DSCP_MASK 0xfc /* 11111100 */ +#define IPT_DSCP_SHIFT 2 +#define IPT_DSCP_MAX 0x3f /* 00111111 */ -#define IPT_DSCP_MASK XT_DSCP_MASK -#define IPT_DSCP_SHIFT XT_DSCP_SHIFT -#define IPT_DSCP_MAX XT_DSCP_MAX - -#define ipt_dscp_info xt_dscp_info +/* match info */ +struct ipt_dscp_info { + u_int8_t dscp; + u_int8_t invert; +}; #endif /* _IPT_DSCP_H */ diff --git a/trunk/include/linux/netfilter_ipv4/listhelp.h b/trunk/include/linux/netfilter_ipv4/listhelp.h new file mode 100644 index 000000000000..5d92cf044d91 --- /dev/null +++ b/trunk/include/linux/netfilter_ipv4/listhelp.h @@ -0,0 +1,123 @@ +#ifndef _LISTHELP_H +#define _LISTHELP_H +#include + +/* Header to do more comprehensive job than linux/list.h; assume list + is first entry in structure. */ + +/* Return pointer to first true entry, if any, or NULL. A macro + required to allow inlining of cmpfn. */ +#define LIST_FIND(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i, *__j = NULL; \ + \ + ASSERT_READ_LOCK(head); \ + list_for_each(__i, (head)) \ + if (cmpfn((const type)__i , ## args)) { \ + __j = __i; \ + break; \ + } \ + (type)__j; \ +}) + +#define LIST_FIND_W(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i, *__j = NULL; \ + \ + ASSERT_WRITE_LOCK(head); \ + list_for_each(__i, (head)) \ + if (cmpfn((type)__i , ## args)) { \ + __j = __i; \ + break; \ + } \ + (type)__j; \ +}) + +/* Just like LIST_FIND but we search backwards */ +#define LIST_FIND_B(head, cmpfn, type, args...) \ +({ \ + const struct list_head *__i, *__j = NULL; \ + \ + ASSERT_READ_LOCK(head); \ + list_for_each_prev(__i, (head)) \ + if (cmpfn((const type)__i , ## args)) { \ + __j = __i; \ + break; \ + } \ + (type)__j; \ +}) + +static inline int +__list_cmp_same(const void *p1, const void *p2) { return p1 == p2; } + +/* Is this entry in the list? */ +static inline int +list_inlist(struct list_head *head, const void *entry) +{ + return LIST_FIND(head, __list_cmp_same, void *, entry) != NULL; +} + +/* Delete from list. */ +#ifdef CONFIG_NETFILTER_DEBUG +#define LIST_DELETE(head, oldentry) \ +do { \ + ASSERT_WRITE_LOCK(head); \ + if (!list_inlist(head, oldentry)) \ + printk("LIST_DELETE: %s:%u `%s'(%p) not in %s.\n", \ + __FILE__, __LINE__, #oldentry, oldentry, #head); \ + else list_del((struct list_head *)oldentry); \ +} while(0) +#else +#define LIST_DELETE(head, oldentry) list_del((struct list_head *)oldentry) +#endif + +/* Append. */ +static inline void +list_append(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add((new), (head)->prev); +} + +/* Prepend. */ +static inline void +list_prepend(struct list_head *head, void *new) +{ + ASSERT_WRITE_LOCK(head); + list_add(new, head); +} + +/* Insert according to ordering function; insert before first true. */ +#define LIST_INSERT(head, new, cmpfn) \ +do { \ + struct list_head *__i; \ + ASSERT_WRITE_LOCK(head); \ + list_for_each(__i, (head)) \ + if ((new), (typeof (new))__i) \ + break; \ + list_add((struct list_head *)(new), __i->prev); \ +} while(0) + +/* If the field after the list_head is a nul-terminated string, you + can use these functions. */ +static inline int __list_cmp_name(const void *i, const char *name) +{ + return strcmp(name, i+sizeof(struct list_head)) == 0; +} + +/* Returns false if same name already in list, otherwise does insert. */ +static inline int +list_named_insert(struct list_head *head, void *new) +{ + if (LIST_FIND(head, __list_cmp_name, void *, + new + sizeof(struct list_head))) + return 0; + list_prepend(head, new); + return 1; +} + +/* Find this named element in the list. */ +#define list_named_find(head, name) \ +LIST_FIND(head, __list_cmp_name, void *, name) + +#endif /*_LISTHELP_H*/ diff --git a/trunk/include/linux/netfilter_ipv6.h b/trunk/include/linux/netfilter_ipv6.h index d97e268cdfe5..52a7b9e76428 100644 --- a/trunk/include/linux/netfilter_ipv6.h +++ b/trunk/include/linux/netfilter_ipv6.h @@ -73,7 +73,6 @@ enum nf_ip6_hook_priorities { }; #ifdef CONFIG_NETFILTER -extern int ip6_route_me_harder(struct sk_buff *skb); extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/trunk/include/linux/netfilter_ipv6/ip6_tables.h b/trunk/include/linux/netfilter_ipv6/ip6_tables.h index d7a8e9c0dad0..d0d5d1ee4be3 100644 --- a/trunk/include/linux/netfilter_ipv6/ip6_tables.h +++ b/trunk/include/linux/netfilter_ipv6/ip6_tables.h @@ -300,7 +300,8 @@ extern unsigned int ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ip6t_table *table); + struct ip6t_table *table, + void *userdata); /* Check for an extension */ extern int ip6t_ext_hdr(u8 nexthdr); diff --git a/trunk/include/linux/netfilter_logging.h b/trunk/include/linux/netfilter_logging.h new file mode 100644 index 000000000000..562bb6aad4e1 --- /dev/null +++ b/trunk/include/linux/netfilter_logging.h @@ -0,0 +1,33 @@ +/* Internal logging interface, which relies on the real + LOG target modules */ +#ifndef __LINUX_NETFILTER_LOGGING_H +#define __LINUX_NETFILTER_LOGGING_H + +#ifdef __KERNEL__ +#include + +struct nf_logging_t { + void (*nf_log_packet)(struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const char *prefix); + void (*nf_log)(char *pfh, size_t len, + const char *prefix); +}; + +extern void nf_log_register(int pf, const struct nf_logging_t *logging); +extern void nf_log_unregister(int pf, const struct nf_logging_t *logging); + +extern void nf_log_packet(int pf, + struct sk_buff **pskb, + unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + const char *fmt, ...); +extern void nf_log(int pf, + char *pfh, size_t len, + const char *fmt, ...); +#endif /*__KERNEL__*/ + +#endif /*__LINUX_NETFILTER_LOGGING_H*/ diff --git a/trunk/include/linux/nfs_fs.h b/trunk/include/linux/nfs_fs.h index 6c2066caeaab..cc013ed2e52e 100644 --- a/trunk/include/linux/nfs_fs.h +++ b/trunk/include/linux/nfs_fs.h @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -69,6 +70,7 @@ * NFSv3/v4 Access mode cache entry */ struct nfs_access_entry { + struct rb_node rb_node; unsigned long jiffies; struct rpc_cred * cred; int mask; @@ -145,7 +147,7 @@ struct nfs_inode { */ atomic_t data_updates; - struct nfs_access_entry cache_access; + struct rb_root access_cache; #ifdef CONFIG_NFS_V3_ACL struct posix_acl *acl_access; struct posix_acl *acl_default; @@ -297,6 +299,7 @@ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int nfs_permission(struct inode *, int, struct nameidata *); extern int nfs_access_get_cached(struct inode *, struct rpc_cred *, struct nfs_access_entry *); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); +extern void nfs_access_zap_cache(struct inode *inode); extern int nfs_open(struct inode *, struct file *); extern int nfs_release(struct inode *, struct file *); extern int nfs_attribute_timeout(struct inode *inode); diff --git a/trunk/include/linux/pkt_cls.h b/trunk/include/linux/pkt_cls.h index c3f01b3085a4..bd2c5a2bbbf5 100644 --- a/trunk/include/linux/pkt_cls.h +++ b/trunk/include/linux/pkt_cls.h @@ -305,7 +305,6 @@ enum TCA_FW_POLICE, TCA_FW_INDEV, /* used by CONFIG_NET_CLS_IND */ TCA_FW_ACT, /* used by CONFIG_NET_CLS_ACT */ - TCA_FW_MASK, __TCA_FW_MAX }; diff --git a/trunk/include/linux/rtnetlink.h b/trunk/include/linux/rtnetlink.h index 9c92dc8b9a08..facd9ee37b76 100644 --- a/trunk/include/linux/rtnetlink.h +++ b/trunk/include/linux/rtnetlink.h @@ -2,7 +2,6 @@ #define __LINUX_RTNETLINK_H #include -#include /**** * Routing/neighbour discovery messages. @@ -239,8 +238,10 @@ enum rt_class_t RT_TABLE_DEFAULT=253, RT_TABLE_MAIN=254, RT_TABLE_LOCAL=255, - RT_TABLE_MAX=0xFFFFFFFF + __RT_TABLE_MAX }; +#define RT_TABLE_MAX (__RT_TABLE_MAX - 1) + /* Routing message attributes */ @@ -262,7 +263,6 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, - RTA_TABLE, __RTA_MAX }; @@ -383,6 +383,226 @@ struct rta_session } u; }; + +/********************************************************* + * Interface address. + ****/ + +struct ifaddrmsg +{ + unsigned char ifa_family; + unsigned char ifa_prefixlen; /* The prefix length */ + unsigned char ifa_flags; /* Flags */ + unsigned char ifa_scope; /* See above */ + int ifa_index; /* Link index */ +}; + +enum +{ + IFA_UNSPEC, + IFA_ADDRESS, + IFA_LOCAL, + IFA_LABEL, + IFA_BROADCAST, + IFA_ANYCAST, + IFA_CACHEINFO, + IFA_MULTICAST, + __IFA_MAX +}; + +#define IFA_MAX (__IFA_MAX - 1) + +/* ifa_flags */ + +#define IFA_F_SECONDARY 0x01 +#define IFA_F_TEMPORARY IFA_F_SECONDARY + +#define IFA_F_DEPRECATED 0x20 +#define IFA_F_TENTATIVE 0x40 +#define IFA_F_PERMANENT 0x80 + +struct ifa_cacheinfo +{ + __u32 ifa_prefered; + __u32 ifa_valid; + __u32 cstamp; /* created timestamp, hundredths of seconds */ + __u32 tstamp; /* updated timestamp, hundredths of seconds */ +}; + + +#define IFA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifaddrmsg)))) +#define IFA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifaddrmsg)) + +/* + Important comment: + IFA_ADDRESS is prefix address, rather than local interface address. + It makes no difference for normally configured broadcast interfaces, + but for point-to-point IFA_ADDRESS is DESTINATION address, + local address is supplied in IFA_LOCAL attribute. + */ + +/************************************************************** + * Neighbour discovery. + ****/ + +struct ndmsg +{ + unsigned char ndm_family; + unsigned char ndm_pad1; + unsigned short ndm_pad2; + int ndm_ifindex; /* Link index */ + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) +#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + + +/***************************************************************** + * Neighbour tables specific messages. + * + * To retrieve the neighbour tables send RTM_GETNEIGHTBL with the + * NLM_F_DUMP flag set. Every neighbour table configuration is + * spread over multiple messages to avoid running into message + * size limits on systems with many interfaces. The first message + * in the sequence transports all not device specific data such as + * statistics, configuration, and the default parameter set. + * This message is followed by 0..n messages carrying device + * specific parameter sets. + * Although the ordering should be sufficient, NDTA_NAME can be + * used to identify sequences. The initial message can be identified + * by checking for NDTA_CONFIG. The device specific messages do + * not contain this TLV but have NDTPA_IFINDEX set to the + * corresponding interface index. + * + * To change neighbour table attributes, send RTM_SETNEIGHTBL + * with NDTA_NAME set. Changeable attribute include NDTA_THRESH[1-3], + * NDTA_GC_INTERVAL, and all TLVs in NDTA_PARMS unless marked + * otherwise. Device specific parameter sets can be changed by + * setting NDTPA_IFINDEX to the interface index of the corresponding + * device. + ****/ + +struct ndt_stats +{ + __u64 ndts_allocs; + __u64 ndts_destroys; + __u64 ndts_hash_grows; + __u64 ndts_res_failed; + __u64 ndts_lookups; + __u64 ndts_hits; + __u64 ndts_rcv_probes_mcast; + __u64 ndts_rcv_probes_ucast; + __u64 ndts_periodic_gc_runs; + __u64 ndts_forced_gc_runs; +}; + +enum { + NDTPA_UNSPEC, + NDTPA_IFINDEX, /* u32, unchangeable */ + NDTPA_REFCNT, /* u32, read-only */ + NDTPA_REACHABLE_TIME, /* u64, read-only, msecs */ + NDTPA_BASE_REACHABLE_TIME, /* u64, msecs */ + NDTPA_RETRANS_TIME, /* u64, msecs */ + NDTPA_GC_STALETIME, /* u64, msecs */ + NDTPA_DELAY_PROBE_TIME, /* u64, msecs */ + NDTPA_QUEUE_LEN, /* u32 */ + NDTPA_APP_PROBES, /* u32 */ + NDTPA_UCAST_PROBES, /* u32 */ + NDTPA_MCAST_PROBES, /* u32 */ + NDTPA_ANYCAST_DELAY, /* u64, msecs */ + NDTPA_PROXY_DELAY, /* u64, msecs */ + NDTPA_PROXY_QLEN, /* u32 */ + NDTPA_LOCKTIME, /* u64, msecs */ + __NDTPA_MAX +}; +#define NDTPA_MAX (__NDTPA_MAX - 1) + +struct ndtmsg +{ + __u8 ndtm_family; + __u8 ndtm_pad1; + __u16 ndtm_pad2; +}; + +struct ndt_config +{ + __u16 ndtc_key_len; + __u16 ndtc_entry_size; + __u32 ndtc_entries; + __u32 ndtc_last_flush; /* delta to now in msecs */ + __u32 ndtc_last_rand; /* delta to now in msecs */ + __u32 ndtc_hash_rnd; + __u32 ndtc_hash_mask; + __u32 ndtc_hash_chain_gc; + __u32 ndtc_proxy_qlen; +}; + +enum { + NDTA_UNSPEC, + NDTA_NAME, /* char *, unchangeable */ + NDTA_THRESH1, /* u32 */ + NDTA_THRESH2, /* u32 */ + NDTA_THRESH3, /* u32 */ + NDTA_CONFIG, /* struct ndt_config, read-only */ + NDTA_PARMS, /* nested TLV NDTPA_* */ + NDTA_STATS, /* struct ndt_stats, read-only */ + NDTA_GC_INTERVAL, /* u64, msecs */ + __NDTA_MAX +}; +#define NDTA_MAX (__NDTA_MAX - 1) + +#define NDTA_RTA(r) ((struct rtattr*)(((char*)(r)) + \ + NLMSG_ALIGN(sizeof(struct ndtmsg)))) +#define NDTA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndtmsg)) + + /**** * General form of address family dependent message. ****/ @@ -443,6 +663,138 @@ struct prefix_cacheinfo __u32 valid_time; }; +/* The struct should be in sync with struct net_device_stats */ +struct rtnl_link_stats +{ + __u32 rx_packets; /* total packets received */ + __u32 tx_packets; /* total packets transmitted */ + __u32 rx_bytes; /* total bytes received */ + __u32 tx_bytes; /* total bytes transmitted */ + __u32 rx_errors; /* bad packets received */ + __u32 tx_errors; /* packet transmit problems */ + __u32 rx_dropped; /* no space in linux buffers */ + __u32 tx_dropped; /* no space available in linux */ + __u32 multicast; /* multicast packets received */ + __u32 collisions; + + /* detailed rx_errors: */ + __u32 rx_length_errors; + __u32 rx_over_errors; /* receiver ring buff overflow */ + __u32 rx_crc_errors; /* recved pkt with crc error */ + __u32 rx_frame_errors; /* recv'd frame alignment error */ + __u32 rx_fifo_errors; /* recv'r fifo overrun */ + __u32 rx_missed_errors; /* receiver missed packet */ + + /* detailed tx_errors */ + __u32 tx_aborted_errors; + __u32 tx_carrier_errors; + __u32 tx_fifo_errors; + __u32 tx_heartbeat_errors; + __u32 tx_window_errors; + + /* for cslip etc */ + __u32 rx_compressed; + __u32 tx_compressed; +}; + +/* The struct should be in sync with struct ifmap */ +struct rtnl_link_ifmap +{ + __u64 mem_start; + __u64 mem_end; + __u64 base_addr; + __u16 irq; + __u8 dma; + __u8 port; +}; + +enum +{ + IFLA_UNSPEC, + IFLA_ADDRESS, + IFLA_BROADCAST, + IFLA_IFNAME, + IFLA_MTU, + IFLA_LINK, + IFLA_QDISC, + IFLA_STATS, + IFLA_COST, +#define IFLA_COST IFLA_COST + IFLA_PRIORITY, +#define IFLA_PRIORITY IFLA_PRIORITY + IFLA_MASTER, +#define IFLA_MASTER IFLA_MASTER + IFLA_WIRELESS, /* Wireless Extension event - see wireless.h */ +#define IFLA_WIRELESS IFLA_WIRELESS + IFLA_PROTINFO, /* Protocol specific information for a link */ +#define IFLA_PROTINFO IFLA_PROTINFO + IFLA_TXQLEN, +#define IFLA_TXQLEN IFLA_TXQLEN + IFLA_MAP, +#define IFLA_MAP IFLA_MAP + IFLA_WEIGHT, +#define IFLA_WEIGHT IFLA_WEIGHT + IFLA_OPERSTATE, + IFLA_LINKMODE, + __IFLA_MAX +}; + + +#define IFLA_MAX (__IFLA_MAX - 1) + +#define IFLA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ifinfomsg)))) +#define IFLA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ifinfomsg)) + +/* ifi_flags. + + IFF_* flags. + + The only change is: + IFF_LOOPBACK, IFF_BROADCAST and IFF_POINTOPOINT are + more not changeable by user. They describe link media + characteristics and set by device driver. + + Comments: + - Combination IFF_BROADCAST|IFF_POINTOPOINT is invalid + - If neither of these three flags are set; + the interface is NBMA. + + - IFF_MULTICAST does not mean anything special: + multicasts can be used on all not-NBMA links. + IFF_MULTICAST means that this media uses special encapsulation + for multicast frames. Apparently, all IFF_POINTOPOINT and + IFF_BROADCAST devices are able to use multicasts too. + */ + +/* IFLA_LINK. + For usual devices it is equal ifi_index. + If it is a "virtual interface" (f.e. tunnel), ifi_link + can point to real physical interface (f.e. for bandwidth calculations), + or maybe 0, what means, that real media is unknown (usual + for IPIP tunnels, when route to endpoint is allowed to change) + */ + +/* Subtype attributes for IFLA_PROTINFO */ +enum +{ + IFLA_INET6_UNSPEC, + IFLA_INET6_FLAGS, /* link flags */ + IFLA_INET6_CONF, /* sysctl parameters */ + IFLA_INET6_STATS, /* statistics */ + IFLA_INET6_MCAST, /* MC things. What of them? */ + IFLA_INET6_CACHEINFO, /* time values and max reasm size */ + __IFLA_INET6_MAX +}; + +#define IFLA_INET6_MAX (__IFLA_INET6_MAX - 1) + +struct ifla_cacheinfo +{ + __u32 max_reasm_len; + __u32 tstamp; /* ipv6InterfaceTable updated timestamp */ + __u32 reachable_time; + __u32 retrans_time; +}; /***************************************************************** * Traffic control messages. @@ -533,13 +885,10 @@ enum rtnetlink_groups { RTNLGRP_NOP2, RTNLGRP_DECnet_ROUTE, #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE - RTNLGRP_DECnet_RULE, -#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE + RTNLGRP_NOP3, RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX - RTNLGRP_IPV6_RULE, -#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) @@ -574,6 +923,8 @@ extern int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, in #define rtattr_parse_nested(tb, max, rta) \ rtattr_parse((tb), (max), RTA_DATA((rta)), RTA_PAYLOAD((rta))) +extern struct sock *rtnl; + struct rtnetlink_link { int (*doit)(struct sk_buff *, struct nlmsghdr*, void *attr); @@ -582,10 +933,6 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); -extern int rtnl_unicast(struct sk_buff *skb, u32 pid); -extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags); -extern void rtnl_set_sk_err(u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); @@ -718,13 +1065,6 @@ extern void __rtnl_unlock(void); } \ } while(0) -static inline u32 rtm_get_table(struct rtattr **rta, u8 table) -{ - return RTA_GET_U32(rta[RTA_TABLE-1]); -rtattr_failure: - return table; -} - #endif /* __KERNEL__ */ diff --git a/trunk/include/linux/security.h b/trunk/include/linux/security.h index 9f56fb8a4a6c..6bc2aad494ff 100644 --- a/trunk/include/linux/security.h +++ b/trunk/include/linux/security.h @@ -31,8 +31,6 @@ #include #include #include -#include -#include struct ctl_table; @@ -90,7 +88,6 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; -struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -815,19 +812,9 @@ struct request_sock; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. - * @sk_clone_security: - * Clone/copy security structure. - * @sk_getsecid: - * Retrieve the LSM-specific secid for the sock to enable caching of network + * @sk_getsid: + * Retrieve the LSM-specific sid for the sock to enable caching of network * authorizations. - * @sock_graft: - * Sets the socket's isec sid to the sock's sid. - * @inet_conn_request: - * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. - * @inet_csk_clone: - * Sets the new child socket's sid to the openreq sid. - * @req_classify_flow: - * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -836,10 +823,9 @@ struct request_sock; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * @sk refers to the sock from which to derive the security context. - * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. Only - * one of sec_ctx or sock can be specified. + * Allocate a security structure to the xp->security field. + * The security field is initialized to NULL when the xfrm_policy is + * allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -858,14 +844,9 @@ struct request_sock; * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * @polsec contains the security context information associated with a xfrm - * policy rule from which to take the base context. polsec must be NULL - * when sec_ctx is specified. - * @secid contains the secid from which to take the mls portion of the context. - * Allocate a security structure to the x->security field; the security - * field is initialized to NULL when the xfrm_state is allocated. Set the - * context to correspond to either sec_ctx or polsec, with the mls portion - * taken from secid in the latter case. + * Allocate a security structure to the x->security field. The + * security field is initialized to NULL when the xfrm_state is + * allocated. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. @@ -876,27 +857,13 @@ struct request_sock; * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. - * @fl_secid contains the flow security label that is used to authorize + * @sk_sid contains the sock security label that is used to authorize * access to the policy xp. * @dir contains the direction of the flow (input or output). - * Check permission when a flow selects a xfrm_policy for processing + * Check permission when a sock selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. * Return 0 if permission is granted. - * @xfrm_state_pol_flow_match: - * @x contains the state to match. - * @xp contains the policy to check for a match. - * @fl contains the flow to check for a match. - * Return 1 if there is a match. - * @xfrm_flow_state_match: - * @fl contains the flow key to match. - * @xfrm points to the xfrm_state to match. - * Return 1 if there is a match. - * @xfrm_decode_session: - * @skb points to skb to decode. - * @secid points to the flow key secid to set. - * @ckall says if all xfrms used should be checked for same secid. - * Return 0 if ckall is zero or all xfrms used have the same secid. * * Security hooks affecting all Key Management operations * @@ -1341,8 +1308,8 @@ struct security_operations { int (*unix_may_send) (struct socket * sock, struct socket * other); int (*socket_create) (int family, int type, int protocol, int kern); - int (*socket_post_create) (struct socket * sock, int family, - int type, int protocol, int kern); + void (*socket_post_create) (struct socket * sock, int family, + int type, int protocol, int kern); int (*socket_bind) (struct socket * sock, struct sockaddr * address, int addrlen); int (*socket_connect) (struct socket * sock, @@ -1365,31 +1332,18 @@ struct security_operations { int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); - void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - void (*sk_getsecid) (struct sock *sk, u32 *secid); - void (*sock_graft)(struct sock* sk, struct socket *parent); - int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, - struct request_sock *req); - void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); - void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); + unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec, - u32 secid); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); - int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl); - int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); - int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -2824,13 +2778,13 @@ static inline int security_socket_create (int family, int type, return security_ops->socket_create(family, type, protocol, kern); } -static inline int security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline void security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { - return security_ops->socket_post_create(sock, family, type, - protocol, kern); + security_ops->socket_post_create(sock, family, type, + protocol, kern); } static inline int security_socket_bind(struct socket * sock, @@ -2931,36 +2885,9 @@ static inline void security_sk_free(struct sock *sk) return security_ops->sk_free_security(sk); } -static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) -{ - return security_ops->sk_clone_security(sk, newsk); -} - -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { - security_ops->sk_getsecid(sk, &fl->secid); -} - -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) -{ - security_ops->req_classify_flow(req, fl); -} - -static inline void security_sock_graft(struct sock* sk, struct socket *parent) -{ - security_ops->sock_graft(sk, parent); -} - -static inline int security_inet_conn_request(struct sock *sk, - struct sk_buff *skb, struct request_sock *req) -{ - return security_ops->inet_conn_request(sk, skb, req); -} - -static inline void security_inet_csk_clone(struct sock *newsk, - const struct request_sock *req) -{ - security_ops->inet_csk_clone(newsk, req); + return security_ops->sk_getsid(sk, fl, dir); } #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, @@ -2982,12 +2909,11 @@ static inline int security_socket_create (int family, int type, return 0; } -static inline int security_socket_post_create(struct socket * sock, - int family, - int type, - int protocol, int kern) +static inline void security_socket_post_create(struct socket * sock, + int family, + int type, + int protocol, int kern) { - return 0; } static inline int security_socket_bind(struct socket * sock, @@ -3085,43 +3011,16 @@ static inline void security_sk_free(struct sock *sk) { } -static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) -{ -} - -static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) -{ -} - -static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) -{ -} - -static inline void security_sock_graft(struct sock* sk, struct socket *parent) -{ -} - -static inline int security_inet_conn_request(struct sock *sk, - struct sk_buff *skb, struct request_sock *req) +static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; } - -static inline void security_inet_csk_clone(struct sock *newsk, - const struct request_sock *req) -{ -} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); -} - -static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) -{ - return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3139,18 +3038,9 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return security_ops->xfrm_policy_delete_security(xp); } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0); -} - -static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, u32 secid) -{ - if (!polsec) - return 0; - return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid); + return security_ops->xfrm_state_alloc_security(x, sec_ctx); } static inline int security_xfrm_state_delete(struct xfrm_state *x) @@ -3163,32 +3053,9 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) -{ - return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); -} - -static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { - return security_ops->xfrm_state_pol_flow_match(x, xp, fl); -} - -static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) -{ - return security_ops->xfrm_flow_state_match(fl, xfrm); -} - -static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) -{ - return security_ops->xfrm_decode_session(skb, secid, 1); -} - -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) -{ - int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); - - BUG_ON(rc); + return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3196,11 +3063,6 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } -static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) -{ - return 0; -} - static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; @@ -3215,14 +3077,7 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return 0; } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx) -{ - return 0; -} - -static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, - struct xfrm_sec_ctx *polsec, u32 secid) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return 0; } @@ -3236,32 +3091,10 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) -{ - return 0; -} - -static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl) -{ - return 1; -} - -static inline int security_xfrm_flow_state_match(struct flowi *fl, - struct xfrm_state *xfrm) -{ - return 1; -} - -static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; } - -static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) -{ -} - #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/trunk/include/linux/skbuff.h b/trunk/include/linux/skbuff.h index 85577a4ffa61..755e9cddac47 100644 --- a/trunk/include/linux/skbuff.h +++ b/trunk/include/linux/skbuff.h @@ -34,9 +34,8 @@ #define HAVE_ALIGNABLE_SKB /* Ditto 8) */ #define CHECKSUM_NONE 0 -#define CHECKSUM_PARTIAL 1 +#define CHECKSUM_HW 1 #define CHECKSUM_UNNECESSARY 2 -#define CHECKSUM_COMPLETE 3 #define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ ~(SMP_CACHE_BYTES - 1)) @@ -57,17 +56,17 @@ * Apparently with secret goal to sell you new device, when you * will add new protocol to your host. F.e. IPv6. 8) * - * COMPLETE: the most generic way. Device supplied checksum of _all_ + * HW: the most generic way. Device supplied checksum of _all_ * the packet as seen by netif_rx in skb->csum. * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use COMPLETE, + * is able to produce some skb->csum, it MUST use HW, * not UNNECESSARY. * * B. Checksumming on output. * * NONE: skb is checksummed by protocol or csum is not required. * - * PARTIAL: device is required to csum packet as seen by hard_start_xmit + * HW: device is required to csum packet as seen by hard_start_xmit * from skb->h.raw to the end and to record the checksum * at skb->h.raw+skb->csum. * @@ -1262,14 +1261,14 @@ static inline int skb_linearize_cow(struct sk_buff *skb) * @len: length of data pulled * * After doing a pull on a received packet, you need to call this to - * update the CHECKSUM_COMPLETE checksum, or set ip_summed to - * CHECKSUM_NONE so that it can be recomputed from scratch. + * update the CHECKSUM_HW checksum, or set ip_summed to CHECKSUM_NONE + * so that it can be recomputed from scratch. */ static inline void skb_postpull_rcsum(struct sk_buff *skb, const void *start, unsigned int len) { - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (skb->ip_summed == CHECKSUM_HW) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); } @@ -1288,7 +1287,7 @@ static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { if (likely(len >= skb->len)) return 0; - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; return __pskb_trim(skb, len); } diff --git a/trunk/include/linux/snmp.h b/trunk/include/linux/snmp.h index 854aa6b543f1..4db25d5c7cd1 100644 --- a/trunk/include/linux/snmp.h +++ b/trunk/include/linux/snmp.h @@ -155,11 +155,42 @@ enum UDP_MIB_NOPORTS, /* NoPorts */ UDP_MIB_INERRORS, /* InErrors */ UDP_MIB_OUTDATAGRAMS, /* OutDatagrams */ - UDP_MIB_RCVBUFERRORS, /* RcvbufErrors */ - UDP_MIB_SNDBUFERRORS, /* SndbufErrors */ __UDP_MIB_MAX }; +/* sctp mib definitions */ +/* + * draft-ietf-sigtran-sctp-mib-07.txt + */ +enum +{ + SCTP_MIB_NUM = 0, + SCTP_MIB_CURRESTAB, /* CurrEstab */ + SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ + SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ + SCTP_MIB_ABORTEDS, /* Aborteds */ + SCTP_MIB_SHUTDOWNS, /* Shutdowns */ + SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ + SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ + SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ + SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ + SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ + SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ + SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ + SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ + SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ + SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ + SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ + SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ + SCTP_MIB_RTOALGORITHM, /* RtoAlgorithm */ + SCTP_MIB_RTOMIN, /* RtoMin */ + SCTP_MIB_RTOMAX, /* RtoMax */ + SCTP_MIB_RTOINITIAL, /* RtoInitial */ + SCTP_MIB_VALCOOKIELIFE, /* ValCookieLife */ + SCTP_MIB_MAXINITRETR, /* MaxInitRetr */ + __SCTP_MIB_MAX +}; + /* linux mib definitions */ enum { diff --git a/trunk/include/linux/sysctl.h b/trunk/include/linux/sysctl.h index 736ed917a4f8..e4b1a4d4dcf3 100644 --- a/trunk/include/linux/sysctl.h +++ b/trunk/include/linux/sysctl.h @@ -411,10 +411,6 @@ enum NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS=115, NET_TCP_DMA_COPYBREAK=116, NET_TCP_SLOW_START_AFTER_IDLE=117, - NET_CIPSOV4_CACHE_ENABLE=118, - NET_CIPSOV4_CACHE_BUCKET_SIZE=119, - NET_CIPSOV4_RBM_OPTFMT=120, - NET_CIPSOV4_RBM_STRICTVALID=121, }; enum { @@ -556,7 +552,6 @@ enum { NET_IPV6_ACCEPT_RA_RTR_PREF=20, NET_IPV6_RTR_PROBE_INTERVAL=21, NET_IPV6_ACCEPT_RA_RT_INFO_MAX_PLEN=22, - NET_IPV6_PROXY_NDP=23, __NET_IPV6_MAX }; diff --git a/trunk/include/linux/xfrm.h b/trunk/include/linux/xfrm.h index 14ecd19f4cdc..46a15c7a1a13 100644 --- a/trunk/include/linux/xfrm.h +++ b/trunk/include/linux/xfrm.h @@ -102,13 +102,6 @@ struct xfrm_stats { __u32 integrity_failed; }; -enum -{ - XFRM_POLICY_TYPE_MAIN = 0, - XFRM_POLICY_TYPE_SUB = 1, - XFRM_POLICY_TYPE_MAX = 2 -}; - enum { XFRM_POLICY_IN = 0, @@ -127,9 +120,7 @@ enum #define XFRM_MODE_TRANSPORT 0 #define XFRM_MODE_TUNNEL 1 -#define XFRM_MODE_ROUTEOPTIMIZATION 2 -#define XFRM_MODE_IN_TRIGGER 3 -#define XFRM_MODE_MAX 4 +#define XFRM_MODE_MAX 2 /* Netlink configuration messages. */ enum { @@ -173,10 +164,6 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE - - XFRM_MSG_REPORT, -#define XFRM_MSG_REPORT XFRM_MSG_REPORT - __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -230,12 +217,6 @@ enum xfrm_ae_ftype_t { #define XFRM_AE_MAX (__XFRM_AE_MAX - 1) }; -struct xfrm_userpolicy_type { - __u8 type; - __u16 reserved1; - __u8 reserved2; -}; - /* Netlink message attributes. */ enum xfrm_attr_type_t { XFRMA_UNSPEC, @@ -251,10 +232,6 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, - XFRMA_SRCADDR, /* xfrm_address_t */ - XFRMA_COADDR, /* xfrm_address_t */ - XFRMA_LASTUSED, - XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) @@ -270,13 +247,12 @@ struct xfrm_usersa_info { __u32 seq; __u32 reqid; __u16 family; - __u8 mode; /* XFRM_MODE_xxx */ + __u8 mode; /* 0=transport,1=tunnel */ __u8 replay_window; __u8 flags; #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 -#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { @@ -343,18 +319,12 @@ struct xfrm_usersa_flush { __u8 proto; }; -struct xfrm_user_report { - __u8 proto; - struct xfrm_selector sel; -}; - #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 -#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -370,8 +340,6 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS - XFRMNLGRP_REPORT, -#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/trunk/include/net/act_api.h b/trunk/include/net/act_api.h index 8b06c2f3657f..11e9eaf79f5a 100644 --- a/trunk/include/net/act_api.h +++ b/trunk/include/net/act_api.h @@ -8,110 +8,70 @@ #include #include -struct tcf_common { - struct tcf_common *tcfc_next; - u32 tcfc_index; - int tcfc_refcnt; - int tcfc_bindcnt; - u32 tcfc_capab; - int tcfc_action; - struct tcf_t tcfc_tm; - struct gnet_stats_basic tcfc_bstats; - struct gnet_stats_queue tcfc_qstats; - struct gnet_stats_rate_est tcfc_rate_est; - spinlock_t *tcfc_stats_lock; - spinlock_t tcfc_lock; -}; -#define tcf_next common.tcfc_next -#define tcf_index common.tcfc_index -#define tcf_refcnt common.tcfc_refcnt -#define tcf_bindcnt common.tcfc_bindcnt -#define tcf_capab common.tcfc_capab -#define tcf_action common.tcfc_action -#define tcf_tm common.tcfc_tm -#define tcf_bstats common.tcfc_bstats -#define tcf_qstats common.tcfc_qstats -#define tcf_rate_est common.tcfc_rate_est -#define tcf_stats_lock common.tcfc_stats_lock -#define tcf_lock common.tcfc_lock - -struct tcf_police { - struct tcf_common common; - int tcfp_result; - u32 tcfp_ewma_rate; - u32 tcfp_burst; - u32 tcfp_mtu; - u32 tcfp_toks; - u32 tcfp_ptoks; - psched_time_t tcfp_t_c; - struct qdisc_rate_table *tcfp_R_tab; - struct qdisc_rate_table *tcfp_P_tab; -}; -#define to_police(pc) \ - container_of(pc, struct tcf_police, common) - -struct tcf_hashinfo { - struct tcf_common **htab; - unsigned int hmask; - rwlock_t *lock; -}; - -static inline unsigned int tcf_hash(u32 index, unsigned int hmask) +#define tca_gen(name) \ +struct tcf_##name *next; \ + u32 index; \ + int refcnt; \ + int bindcnt; \ + u32 capab; \ + int action; \ + struct tcf_t tm; \ + struct gnet_stats_basic bstats; \ + struct gnet_stats_queue qstats; \ + struct gnet_stats_rate_est rate_est; \ + spinlock_t *stats_lock; \ + spinlock_t lock + +struct tcf_police { - return index & hmask; -} + tca_gen(police); + int result; + u32 ewma_rate; + u32 burst; + u32 mtu; + u32 toks; + u32 ptoks; + psched_time_t t_c; + struct qdisc_rate_table *R_tab; + struct qdisc_rate_table *P_tab; +}; #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 -struct tcf_act_hdr { - struct tcf_common common; +struct tcf_act_hdr +{ + tca_gen(act_hdr); }; -struct tc_action { - void *priv; - struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct tc_action *next; +struct tc_action +{ + void *priv; + struct tc_action_ops *ops; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct tc_action *next; }; #define TCA_CAP_NONE 0 -struct tc_action_ops { +struct tc_action_ops +{ struct tc_action_ops *next; - struct tcf_hashinfo *hinfo; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; int (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); - int (*dump)(struct sk_buff *, struct tc_action *, int, int); + int (*dump)(struct sk_buff *, struct tc_action *,int , int); int (*cleanup)(struct tc_action *, int bind); - int (*lookup)(struct tc_action *, u32); - int (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int); - int (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *); + int (*lookup)(struct tc_action *, u32 ); + int (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int ); + int (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *); }; -extern struct tcf_common *tcf_hash_lookup(u32 index, - struct tcf_hashinfo *hinfo); -extern void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); -extern int tcf_hash_release(struct tcf_common *p, int bind, - struct tcf_hashinfo *hinfo); -extern int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a); -extern u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo); -extern int tcf_hash_search(struct tc_action *a, u32 index); -extern struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, - int bind, struct tcf_hashinfo *hinfo); -extern struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, - struct tc_action *a, int size, - int bind, u32 *idx_gen, - struct tcf_hashinfo *hinfo); -extern void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); - extern int tcf_register_action(struct tc_action_ops *a); extern int tcf_unregister_action(struct tc_action_ops *a); extern void tcf_action_destroy(struct tc_action *a, int bind); @@ -136,17 +96,17 @@ tcf_police_release(struct tcf_police *p, int bind) int ret = 0; #ifdef CONFIG_NET_CLS_ACT if (p) { - if (bind) - p->tcf_bindcnt--; - - p->tcf_refcnt--; - if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { + if (bind) { + p->bindcnt--; + } + p->refcnt--; + if (p->refcnt <= 0 && !p->bindcnt) { tcf_police_destroy(p); ret = 1; } } #else - if (p && --p->tcf_refcnt == 0) + if (p && --p->refcnt == 0) tcf_police_destroy(p); #endif /* CONFIG_NET_CLS_ACT */ diff --git a/trunk/include/net/act_generic.h b/trunk/include/net/act_generic.h new file mode 100644 index 000000000000..c9daa7e52300 --- /dev/null +++ b/trunk/include/net/act_generic.h @@ -0,0 +1,142 @@ +/* + * include/net/act_generic.h + * +*/ +#ifndef _NET_ACT_GENERIC_H +#define _NET_ACT_GENERIC_H +static inline int tcf_defact_release(struct tcf_defact *p, int bind) +{ + int ret = 0; + if (p) { + if (bind) { + p->bindcnt--; + } + p->refcnt--; + if (p->bindcnt <= 0 && p->refcnt <= 0) { + kfree(p->defdata); + tcf_hash_destroy(p); + ret = 1; + } + } + return ret; +} + +static inline int +alloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) +{ + p->defdata = kmalloc(datalen, GFP_KERNEL); + if (p->defdata == NULL) + return -ENOMEM; + p->datalen = datalen; + memcpy(p->defdata, defdata, datalen); + return 0; +} + +static inline int +realloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) +{ + /* safer to be just brute force for now */ + kfree(p->defdata); + return alloc_defdata(p, datalen, defdata); +} + +static inline int +tcf_defact_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_DEF_MAX]; + struct tc_defact *parm; + struct tcf_defact *p; + void *defdata; + u32 datalen = 0; + int ret = 0; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_DEF_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + + parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); + defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); + if (defdata == NULL) + return -EINVAL; + + datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); + if (datalen <= 0) + return -EINVAL; + + p = tcf_hash_check(parm->index, a, ovr, bind); + if (p == NULL) { + p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); + if (p == NULL) + return -ENOMEM; + + ret = alloc_defdata(p, datalen, defdata); + if (ret < 0) { + kfree(p); + return ret; + } + ret = ACT_P_CREATED; + } else { + if (!ovr) { + tcf_defact_release(p, bind); + return -EEXIST; + } + realloc_defdata(p, datalen, defdata); + } + + spin_lock_bh(&p->lock); + p->action = parm->action; + spin_unlock_bh(&p->lock); + if (ret == ACT_P_CREATED) + tcf_hash_insert(p); + return ret; +} + +static inline int tcf_defact_cleanup(struct tc_action *a, int bind) +{ + struct tcf_defact *p = PRIV(a, defact); + + if (p != NULL) + return tcf_defact_release(p, bind); + return 0; +} + +static inline int +tcf_defact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb->tail; + struct tc_defact opt; + struct tcf_defact *p = PRIV(a, defact); + struct tcf_t t; + + opt.index = p->index; + opt.refcnt = p->refcnt - ref; + opt.bindcnt = p->bindcnt - bind; + opt.action = p->action; + RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); + RTA_PUT(skb, TCA_DEF_DATA, p->datalen, p->defdata); + t.install = jiffies_to_clock_t(jiffies - p->tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); + t.expires = jiffies_to_clock_t(p->tm.expires); + RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#define tca_use_default_ops \ + .dump = tcf_defact_dump, \ + .cleanup = tcf_defact_cleanup, \ + .init = tcf_defact_init, \ + .walk = tcf_generic_walker, \ + +#define tca_use_default_defines(name) \ + static u32 idx_gen; \ + static struct tcf_defact *tcf_##name_ht[MY_TAB_SIZE]; \ + static DEFINE_RWLOCK(##name_lock); +#endif /* _NET_ACT_GENERIC_H */ diff --git a/trunk/include/net/addrconf.h b/trunk/include/net/addrconf.h index 44f1b673f916..3d71251b3eca 100644 --- a/trunk/include/net/addrconf.h +++ b/trunk/include/net/addrconf.h @@ -61,9 +61,6 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -#ifdef CONFIG_IPV6_MIP6 -extern int ipv6_chk_home_addr(struct in6_addr *addr); -#endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict); @@ -129,18 +126,20 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb); static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { - return rcu_dereference(dev->ip6_ptr); + return (struct inet6_dev *)dev->ip6_ptr; } +extern rwlock_t addrconf_lock; + static inline struct inet6_dev * in6_dev_get(struct net_device *dev) { struct inet6_dev *idev = NULL; - rcu_read_lock(); - idev = __in6_dev_get(dev); + read_lock(&addrconf_lock); + idev = dev->ip6_ptr; if (idev) atomic_inc(&idev->refcnt); - rcu_read_unlock(); + read_unlock(&addrconf_lock); return idev; } diff --git a/trunk/include/net/cipso_ipv4.h b/trunk/include/net/cipso_ipv4.h deleted file mode 100644 index 59406e0dc5b2..000000000000 --- a/trunk/include/net/cipso_ipv4.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * CIPSO - Commercial IP Security Option - * - * This is an implementation of the CIPSO 2.2 protocol as specified in - * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in - * FIPS-188, copies of both documents can be found in the Documentation - * directory. While CIPSO never became a full IETF RFC standard many vendors - * have chosen to adopt the protocol and over the years it has become a - * de-facto standard for labeled networking. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _CIPSO_IPV4_H -#define _CIPSO_IPV4_H - -#include -#include -#include -#include -#include -#include - -/* known doi values */ -#define CIPSO_V4_DOI_UNKNOWN 0x00000000 - -/* tag types */ -#define CIPSO_V4_TAG_INVALID 0 -#define CIPSO_V4_TAG_RBITMAP 1 -#define CIPSO_V4_TAG_ENUM 2 -#define CIPSO_V4_TAG_RANGE 5 -#define CIPSO_V4_TAG_PBITMAP 6 -#define CIPSO_V4_TAG_FREEFORM 7 - -/* doi mapping types */ -#define CIPSO_V4_MAP_UNKNOWN 0 -#define CIPSO_V4_MAP_STD 1 -#define CIPSO_V4_MAP_PASS 2 - -/* limits */ -#define CIPSO_V4_MAX_REM_LVLS 256 -#define CIPSO_V4_INV_LVL 0x80000000 -#define CIPSO_V4_MAX_LOC_LVLS (CIPSO_V4_INV_LVL - 1) -#define CIPSO_V4_MAX_REM_CATS 65536 -#define CIPSO_V4_INV_CAT 0x80000000 -#define CIPSO_V4_MAX_LOC_CATS (CIPSO_V4_INV_CAT - 1) - -/* - * CIPSO DOI definitions - */ - -/* DOI definition struct */ -#define CIPSO_V4_TAG_MAXCNT 5 -struct cipso_v4_doi { - u32 doi; - u32 type; - union { - struct cipso_v4_std_map_tbl *std; - } map; - u8 tags[CIPSO_V4_TAG_MAXCNT]; - - u32 valid; - struct list_head list; - struct rcu_head rcu; - struct list_head dom_list; -}; - -/* Standard CIPSO mapping table */ -/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the - * bit is set then consider that value as unspecified, meaning the - * mapping for that particular level/category is invalid */ -struct cipso_v4_std_map_tbl { - struct { - u32 *cipso; - u32 *local; - u32 cipso_size; - u32 local_size; - } lvl; - struct { - u32 *cipso; - u32 *local; - u32 cipso_size; - u32 local_size; - } cat; -}; - -/* - * Sysctl Variables - */ - -#ifdef CONFIG_NETLABEL -extern int cipso_v4_cache_enabled; -extern int cipso_v4_cache_bucketsize; -extern int cipso_v4_rbm_optfmt; -extern int cipso_v4_rbm_strictvalid; -#endif - -/* - * Helper Functions - */ - -#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) -#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso) - -/* - * DOI List Functions - */ - -#ifdef CONFIG_NETLABEL -int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); -struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain); -#else -static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) -{ - return -ENOSYS; -} - -static inline int cipso_v4_doi_remove(u32 doi, - void (*callback) (struct rcu_head * head)) -{ - return 0; -} - -static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) -{ - return NULL; -} - -static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) -{ - return NULL; -} - -static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - return NULL; -} - -static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, - const char *domain) -{ - return -ENOSYS; -} - -static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - return 0; -} -#endif /* CONFIG_NETLABEL */ - -/* - * Label Mapping Cache Functions - */ - -#ifdef CONFIG_NETLABEL -void cipso_v4_cache_invalidate(void); -int cipso_v4_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr); -#else -static inline void cipso_v4_cache_invalidate(void) -{ - return; -} - -static inline int cipso_v4_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr) -{ - return 0; -} -#endif /* CONFIG_NETLABEL */ - -/* - * Protocol Handling Functions - */ - -#ifdef CONFIG_NETLABEL -void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); -int cipso_v4_socket_setattr(const struct socket *sock, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr); -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr); -int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr); -int cipso_v4_validate(unsigned char **option); -#else -static inline void cipso_v4_error(struct sk_buff *skb, - int error, - u32 gateway) -{ - return; -} - -static inline int cipso_v4_socket_setattr(const struct socket *sock, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline int cipso_v4_validate(unsigned char **option) -{ - return -ENOSYS; -} -#endif /* CONFIG_NETLABEL */ - -#endif /* _CIPSO_IPV4_H */ diff --git a/trunk/include/net/dn_fib.h b/trunk/include/net/dn_fib.h index f01626cbbed6..a15dcf0d5c1e 100644 --- a/trunk/include/net/dn_fib.h +++ b/trunk/include/net/dn_fib.h @@ -22,7 +22,7 @@ struct dn_kern_rta }; struct dn_fib_res { - struct fib_rule *r; + struct dn_fib_rule *r; struct dn_fib_info *fi; unsigned char prefixlen; unsigned char nh_sel; @@ -94,8 +94,7 @@ struct dn_fib_node { struct dn_fib_table { - struct hlist_node hlist; - u32 n; + int n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, @@ -131,11 +130,14 @@ extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res); +extern int dn_fib_sync_down(__le16 local, struct net_device *dev, + int force); +extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c */ -extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat); +extern struct dn_fib_table *dn_fib_get_table(int n, int creat); extern struct dn_fib_table *dn_fib_empty_table(void); extern void dn_fib_table_init(void); extern void dn_fib_table_cleanup(void); @@ -145,8 +147,10 @@ extern void dn_fib_table_cleanup(void); */ extern void dn_fib_rules_init(void); extern void dn_fib_rules_cleanup(void); +extern void dn_fib_rule_put(struct dn_fib_rule *); +extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags); extern unsigned dnet_addr_type(__le16 addr); -extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res); +extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res); /* * rtnetlink interface @@ -172,9 +176,11 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) if (res->fi) dn_fib_info_put(res->fi); if (res->r) - fib_rule_put(res->r); + dn_fib_rule_put(res->r); } +extern struct dn_fib_table *dn_fib_tables[]; + #else /* Endnode */ #define dn_fib_init() do { } while(0) diff --git a/trunk/include/net/dst.h b/trunk/include/net/dst.h index a8d825f90305..36d54fc248b0 100644 --- a/trunk/include/net/dst.h +++ b/trunk/include/net/dst.h @@ -54,7 +54,6 @@ struct dst_entry unsigned long expires; unsigned short header_len; /* more space at head required */ - unsigned short nfheader_len; /* more non-fragment space at head required */ unsigned short trailer_len; /* space to reserve at tail */ u32 metrics[RTAX_MAX]; diff --git a/trunk/include/net/esp.h b/trunk/include/net/esp.h index 713d039f4af7..064366d66eea 100644 --- a/trunk/include/net/esp.h +++ b/trunk/include/net/esp.h @@ -15,14 +15,13 @@ struct esp_data struct { u8 *key; /* Key */ int key_len; /* Key length */ - int padlen; /* 0..255 */ + u8 *ivec; /* ivec buffer */ /* ivlen is offset from enc_data, where encrypted data start. * It is logically different of crypto_tfm_alg_ivsize(tfm). * We assume that it is either zero (no ivec), or * >= crypto_tfm_alg_ivsize(tfm). */ int ivlen; - int ivinitted; - u8 *ivec; /* ivec buffer */ + int padlen; /* 0..255 */ struct crypto_blkcipher *tfm; /* crypto handle */ } conf; diff --git a/trunk/include/net/fib_rules.h b/trunk/include/net/fib_rules.h deleted file mode 100644 index 8e2f473d3e82..000000000000 --- a/trunk/include/net/fib_rules.h +++ /dev/null @@ -1,97 +0,0 @@ -#ifndef __NET_FIB_RULES_H -#define __NET_FIB_RULES_H - -#include -#include -#include -#include -#include - -struct fib_rule -{ - struct list_head list; - atomic_t refcnt; - int ifindex; - char ifname[IFNAMSIZ]; - u32 pref; - u32 flags; - u32 table; - u8 action; - struct rcu_head rcu; -}; - -struct fib_lookup_arg -{ - void *lookup_ptr; - void *result; - struct fib_rule *rule; -}; - -struct fib_rules_ops -{ - int family; - struct list_head list; - int rule_size; - - int (*action)(struct fib_rule *, - struct flowi *, int, - struct fib_lookup_arg *); - int (*match)(struct fib_rule *, - struct flowi *, int); - int (*configure)(struct fib_rule *, - struct sk_buff *, - struct nlmsghdr *, - struct fib_rule_hdr *, - struct nlattr **); - int (*compare)(struct fib_rule *, - struct fib_rule_hdr *, - struct nlattr **); - int (*fill)(struct fib_rule *, struct sk_buff *, - struct nlmsghdr *, - struct fib_rule_hdr *); - u32 (*default_pref)(void); - - int nlgroup; - struct nla_policy *policy; - struct list_head *rules_list; - struct module *owner; -}; - -static inline void fib_rule_get(struct fib_rule *rule) -{ - atomic_inc(&rule->refcnt); -} - -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *rule = container_of(head, struct fib_rule, rcu); - kfree(rule); -} - -static inline void fib_rule_put(struct fib_rule *rule) -{ - if (atomic_dec_and_test(&rule->refcnt)) - call_rcu(&rule->rcu, fib_rule_put_rcu); -} - -static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) -{ - if (nla[FRA_TABLE]) - return nla_get_u32(nla[FRA_TABLE]); - return frh->table; -} - -extern int fib_rules_register(struct fib_rules_ops *); -extern int fib_rules_unregister(struct fib_rules_ops *); - -extern int fib_rules_lookup(struct fib_rules_ops *, - struct flowi *, int flags, - struct fib_lookup_arg *); - -extern int fib_nl_newrule(struct sk_buff *, - struct nlmsghdr *, void *); -extern int fib_nl_delrule(struct sk_buff *, - struct nlmsghdr *, void *); -extern int fib_rules_dump(struct sk_buff *, - struct netlink_callback *, int); -#endif diff --git a/trunk/include/net/flow.h b/trunk/include/net/flow.h index 3ca210ec1379..04d89f763451 100644 --- a/trunk/include/net/flow.h +++ b/trunk/include/net/flow.h @@ -26,7 +26,6 @@ struct flowi { struct { struct in6_addr daddr; struct in6_addr saddr; - __u32 fwmark; __u32 flowlabel; } ip6_u; @@ -43,7 +42,6 @@ struct flowi { #define fld_scope nl_u.dn_u.scope #define fl6_dst nl_u.ip6_u.daddr #define fl6_src nl_u.ip6_u.saddr -#define fl6_fwmark nl_u.ip6_u.fwmark #define fl6_flowlabel nl_u.ip6_u.flowlabel #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr @@ -74,22 +72,12 @@ struct flowi { } dnports; __u32 spi; - -#ifdef CONFIG_IPV6_MIP6 - struct { - __u8 type; - } mht; -#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi -#ifdef CONFIG_IPV6_MIP6 -#define fl_mh_type uli_u.mht.type -#endif - __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); #define FLOW_DIR_IN 0 @@ -97,10 +85,10 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, +typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/trunk/include/net/genetlink.h b/trunk/include/net/genetlink.h index 4a38d85e4e25..8c2287264266 100644 --- a/trunk/include/net/genetlink.h +++ b/trunk/include/net/genetlink.h @@ -27,6 +27,8 @@ struct genl_family struct list_head family_list; /* private */ }; +#define GENL_ADMIN_PERM 0x01 + /** * struct genl_info - receiving information * @snd_seq: sending sequence number @@ -131,12 +133,11 @@ static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id - * @flags: allocation flags */ static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, - unsigned int group, gfp_t flags) + unsigned int group) { - return nlmsg_multicast(genl_sock, skb, pid, group, flags); + return nlmsg_multicast(genl_sock, skb, pid, group); } /** diff --git a/trunk/include/net/if_inet6.h b/trunk/include/net/if_inet6.h index 34489c13c119..e459e1a0ae4a 100644 --- a/trunk/include/net/if_inet6.h +++ b/trunk/include/net/if_inet6.h @@ -189,7 +189,6 @@ struct inet6_dev struct ipv6_devconf cnf; struct ipv6_devstat stats; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ - struct rcu_head rcu; }; extern struct ipv6_devconf ipv6_devconf; diff --git a/trunk/include/net/inet_connection_sock.h b/trunk/include/net/inet_connection_sock.h index de4e83b6da4b..9bf73fe50948 100644 --- a/trunk/include/net/inet_connection_sock.h +++ b/trunk/include/net/inet_connection_sock.h @@ -147,8 +147,7 @@ extern struct sock *inet_csk_clone(struct sock *sk, enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4, - ICSK_ACK_PUSHED2 = 8 + ICSK_ACK_PUSHED = 4 }; extern void inet_csk_init_xmit_timers(struct sock *sk, diff --git a/trunk/include/net/inet_hashtables.h b/trunk/include/net/inet_hashtables.h index b4491c9e2a5a..98e0bb3014fe 100644 --- a/trunk/include/net/inet_hashtables.h +++ b/trunk/include/net/inet_hashtables.h @@ -271,15 +271,38 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, +extern struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, const unsigned short hnum, const int dif); -static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - u32 daddr, u16 dport, int dif) +/* Optimize the common listener case. */ +static inline struct sock * + inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, const int dif) { - return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; } /* Socket demux engine toys. */ @@ -368,25 +391,14 @@ static inline struct sock * goto out; } -static inline struct sock * - inet_lookup_established(struct inet_hashinfo *hashinfo, - const u32 saddr, const u16 sport, - const u32 daddr, const u16 dport, - const int dif) -{ - return __inet_lookup_established(hashinfo, saddr, sport, daddr, - ntohs(dport), dif); -} - static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, - const u32 daddr, const u16 dport, + const u32 daddr, const u16 hnum, const int dif) { - u16 hnum = ntohs(dport); struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); + return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, @@ -397,7 +409,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); local_bh_enable(); return sk; diff --git a/trunk/include/net/inet_sock.h b/trunk/include/net/inet_sock.h index f6242710f2ff..1f4a9a60d4cc 100644 --- a/trunk/include/net/inet_sock.h +++ b/trunk/include/net/inet_sock.h @@ -27,6 +27,7 @@ /** struct ip_options - IP Options * * @faddr - Saved first hop address + * @is_setbyuser - Set by setsockopt? * @is_data - Options in __data, rather than skb * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one @@ -41,7 +42,8 @@ struct ip_options { unsigned char srr; unsigned char rr; unsigned char ts; - unsigned char is_data:1, + unsigned char is_setbyuser:1, + is_data:1, is_strictroute:1, srr_is_hit:1, is_changed:1, @@ -49,7 +51,7 @@ struct ip_options { ts_needtime:1, ts_needaddr:1; unsigned char router_alert; - unsigned char cipso; + unsigned char __pad1; unsigned char __pad2; unsigned char __data[0]; }; diff --git a/trunk/include/net/ip6_fib.h b/trunk/include/net/ip6_fib.h index e4438de3bd6b..a66e9de16a6c 100644 --- a/trunk/include/net/ip6_fib.h +++ b/trunk/include/net/ip6_fib.h @@ -16,35 +16,14 @@ #ifdef __KERNEL__ #include -#include -#include + #include #include -#include +#include +#include struct rt6_info; -struct fib6_config -{ - u32 fc_table; - u32 fc_metric; - int fc_dst_len; - int fc_src_len; - int fc_ifindex; - u32 fc_flags; - u32 fc_protocol; - - struct in6_addr fc_dst; - struct in6_addr fc_src; - struct in6_addr fc_gateway; - - unsigned long fc_expires; - struct nlattr *fc_mx; - int fc_mx_len; - - struct nl_info fc_nlinfo; -}; - struct fib6_node { struct fib6_node *parent; @@ -60,11 +39,6 @@ struct fib6_node __u32 fn_sernum; }; -#ifndef CONFIG_IPV6_SUBTREES -#define FIB6_SUBTREE(fn) NULL -#else -#define FIB6_SUBTREE(fn) ((fn)->subtree) -#endif /* * routing information @@ -77,8 +51,6 @@ struct rt6key int plen; }; -struct fib6_table; - struct rt6_info { union { @@ -99,7 +71,6 @@ struct rt6_info u32 rt6i_flags; u32 rt6i_metric; atomic_t rt6i_ref; - struct fib6_table *rt6i_table; struct rt6key rt6i_dst; struct rt6key rt6i_src; @@ -118,6 +89,28 @@ struct fib6_walker_t void *args; }; +extern struct fib6_walker_t fib6_walker_list; +extern rwlock_t fib6_walker_lock; + +static inline void fib6_walker_link(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next = fib6_walker_list.next; + w->prev = &fib6_walker_list; + w->next->prev = w; + w->prev->next = w; + write_unlock_bh(&fib6_walker_lock); +} + +static inline void fib6_walker_unlink(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next->prev = w->prev; + w->prev->next = w->next; + w->prev = w->next = w; + write_unlock_bh(&fib6_walker_lock); +} + struct rt6_statistics { __u32 fib_nodes; __u32 fib_route_nodes; @@ -150,41 +143,12 @@ struct rt6_statistics { typedef void (*f_pnode)(struct fib6_node *fn, void *); -struct fib6_table { - struct hlist_node tb6_hlist; - u32 tb6_id; - rwlock_t tb6_lock; - struct fib6_node tb6_root; -}; - -#define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC -#define RT6_TABLE_MAIN RT_TABLE_MAIN -#define RT6_TABLE_DFLT RT6_TABLE_MAIN -#define RT6_TABLE_INFO RT6_TABLE_MAIN -#define RT6_TABLE_PREFIX RT6_TABLE_MAIN - -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -#define FIB6_TABLE_MIN 1 -#define FIB6_TABLE_MAX RT_TABLE_MAX -#define RT6_TABLE_LOCAL RT_TABLE_LOCAL -#else -#define FIB6_TABLE_MIN RT_TABLE_MAIN -#define FIB6_TABLE_MAX FIB6_TABLE_MIN -#define RT6_TABLE_LOCAL RT6_TABLE_MAIN -#endif - -typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *, - struct flowi *, int); +extern struct fib6_node ip6_routing_table; /* * exported functions */ -extern struct fib6_table * fib6_get_table(u32 id); -extern struct fib6_table * fib6_new_table(u32 id); -extern struct dst_entry * fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup); - extern struct fib6_node *fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr); @@ -193,29 +157,32 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct in6_addr *daddr, int dst_len, struct in6_addr *saddr, int src_len); -extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), - int prune, void *arg); +extern void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg); + +extern int fib6_walk(struct fib6_walker_t *w); +extern int fib6_walk_continue(struct fib6_walker_t *w); extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nl_info *info); + struct nlmsghdr *nlh, + void *rtattr, + struct netlink_skb_parms *req); extern int fib6_del(struct rt6_info *rt, - struct nl_info *info); + struct nlmsghdr *nlh, + void *rtattr, + struct netlink_skb_parms *req); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nl_info *info); + struct nlmsghdr *nlh, + struct netlink_skb_parms *req); extern void fib6_run_gc(unsigned long dummy); extern void fib6_gc_cleanup(void); extern void fib6_init(void); - -extern void fib6_rules_init(void); -extern void fib6_rules_cleanup(void); -extern int fib6_rules_dump(struct sk_buff *, - struct netlink_callback *); - #endif #endif diff --git a/trunk/include/net/ip6_route.h b/trunk/include/net/ip6_route.h index 6ca6b71dfe0f..96b0e66406ec 100644 --- a/trunk/include/net/ip6_route.h +++ b/trunk/include/net/ip6_route.h @@ -32,10 +32,6 @@ struct route_info { #include #include -#define RT6_LOOKUP_F_IFACE 0x1 -#define RT6_LOOKUP_F_REACHABLE 0x2 -#define RT6_LOOKUP_F_HAS_SADDR 0x4 - struct pol_chain { int type; int priority; @@ -45,11 +41,6 @@ struct pol_chain { extern struct rt6_info ip6_null_entry; -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -extern struct rt6_info ip6_prohibit_entry; -extern struct rt6_info ip6_blk_hole_entry; -#endif - extern int ip6_rt_gc_interval; extern void ip6_route_input(struct sk_buff *skb); @@ -57,14 +48,25 @@ extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl); +extern int ip6_route_me_harder(struct sk_buff *skb); + extern void ip6_route_init(void); extern void ip6_route_cleanup(void); extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); -extern int ip6_route_add(struct fib6_config *cfg); -extern int ip6_ins_rt(struct rt6_info *); -extern int ip6_del_rt(struct rt6_info *); +extern int ip6_route_add(struct in6_rtmsg *rtmsg, + struct nlmsghdr *, + void *rtattr, + struct netlink_skb_parms *req); +extern int ip6_ins_rt(struct rt6_info *, + struct nlmsghdr *, + void *rtattr, + struct netlink_skb_parms *req); +extern int ip6_del_rt(struct rt6_info *, + struct nlmsghdr *, + void *rtattr, + struct netlink_skb_parms *req); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, @@ -112,7 +114,6 @@ extern int rt6_route_rcv(struct net_device *dev, struct in6_addr *gwaddr); extern void rt6_redirect(struct in6_addr *dest, - struct in6_addr *src, struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, @@ -130,13 +131,6 @@ extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); -struct rt6_rtnl_dump_arg -{ - struct sk_buff *skb; - struct netlink_callback *cb; -}; - -extern int rt6_dump_route(struct rt6_info *rt, void *p_arg); extern void rt6_ifdown(struct net_device *dev); extern void rt6_mtu_change(struct net_device *dev, unsigned mtu); @@ -146,24 +140,21 @@ extern rwlock_t rt6_lock; * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; sk_setup_caps(sk, dst); np->daddr_cache = daddr; -#ifdef CONFIG_IPV6_SUBTREES - np->saddr_cache = saddr; -#endif np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr, struct in6_addr *saddr) + struct in6_addr *daddr) { write_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr, saddr); + __ip6_dst_store(sk, dst, daddr); write_unlock(&sk->sk_dst_lock); } diff --git a/trunk/include/net/ip_fib.h b/trunk/include/net/ip_fib.h index fcc159a4ac17..a095d1dec7a4 100644 --- a/trunk/include/net/ip_fib.h +++ b/trunk/include/net/ip_fib.h @@ -18,34 +18,26 @@ #include #include -#include - -struct fib_config { - u8 fc_family; - u8 fc_dst_len; - u8 fc_src_len; - u8 fc_tos; - u8 fc_protocol; - u8 fc_scope; - u8 fc_type; - /* 1 byte unused */ - u32 fc_table; - u32 fc_dst; - u32 fc_src; - u32 fc_gw; - int fc_oif; - u32 fc_flags; - u32 fc_priority; - u32 fc_prefsrc; - struct nlattr *fc_mx; - struct rtnexthop *fc_mp; - int fc_mx_len; - int fc_mp_len; - u32 fc_flow; - u32 fc_mp_alg; - u32 fc_nlflags; - struct nl_info fc_nlinfo; - }; + +/* WARNING: The ordering of these elements must match ordering + * of RTA_* rtnetlink attribute numbers. + */ +struct kern_rta { + void *rta_dst; + void *rta_src; + int *rta_iif; + int *rta_oif; + void *rta_gw; + u32 *rta_priority; + void *rta_prefsrc; + struct rtattr *rta_mx; + struct rtattr *rta_mp; + unsigned char *rta_protoinfo; + u32 *rta_flow; + struct rta_cacheinfo *rta_ci; + struct rta_session *rta_sess; + u32 *rta_mp_alg; +}; struct fib_info; @@ -157,12 +149,15 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { - struct hlist_node tb_hlist; - u32 tb_id; + unsigned char tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); - int (*tb_insert)(struct fib_table *, struct fib_config *); - int (*tb_delete)(struct fib_table *, struct fib_config *); + int (*tb_insert)(struct fib_table *table, struct rtmsg *r, + struct kern_rta *rta, struct nlmsghdr *n, + struct netlink_skb_parms *req); + int (*tb_delete)(struct fib_table *table, struct rtmsg *r, + struct kern_rta *rta, struct nlmsghdr *n, + struct netlink_skb_parms *req); int (*tb_dump)(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int (*tb_flush)(struct fib_table *table); @@ -177,14 +172,14 @@ struct fib_table { extern struct fib_table *ip_fib_local_table; extern struct fib_table *ip_fib_main_table; -static inline struct fib_table *fib_get_table(u32 id) +static inline struct fib_table *fib_get_table(int id) { if (id != RT_TABLE_LOCAL) return ip_fib_main_table; return ip_fib_local_table; } -static inline struct fib_table *fib_new_table(u32 id) +static inline struct fib_table *fib_new_table(int id) { return fib_get_table(id); } @@ -204,19 +199,35 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) -#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) +#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL]) +#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) + +extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; +extern int fib_lookup(const struct flowi *flp, struct fib_result *res); +extern struct fib_table *__fib_new_table(int id); +extern void fib_rule_put(struct fib_rule *r); -extern int fib_lookup(struct flowi *flp, struct fib_result *res); +static inline struct fib_table *fib_get_table(int id) +{ + if (id == 0) + id = RT_TABLE_MAIN; + + return fib_tables[id]; +} + +static inline struct fib_table *fib_new_table(int id) +{ + if (id == 0) + id = RT_TABLE_MAIN; + + return fib_tables[id] ? : __fib_new_table(id); +} -extern struct fib_table *fib_new_table(u32 id); -extern struct fib_table *fib_get_table(u32 id); extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ -extern struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); @@ -232,20 +243,23 @@ struct rtentry; extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); +extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, + struct kern_rta *rta, struct rtentry *r); extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ -extern struct fib_table *fib_hash_init(u32 id); +extern struct fib_table *fib_hash_init(int id); #ifdef CONFIG_IP_MULTIPLE_TABLES -extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); - -extern void __init fib4_rules_init(void); +/* Exported by fib_rules.c */ +extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); #endif - +extern void fib_rules_init(void); #endif static inline void fib_combine_itag(u32 *itag, struct fib_result *res) diff --git a/trunk/include/net/ipv6.h b/trunk/include/net/ipv6.h index 72bf47b2a4e0..ece7e8a84ffd 100644 --- a/trunk/include/net/ipv6.h +++ b/trunk/include/net/ipv6.h @@ -40,7 +40,6 @@ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ #define NEXTHDR_NONE 59 /* No next header */ #define NEXTHDR_DEST 60 /* Destination options header. */ -#define NEXTHDR_MOBILITY 135 /* Mobility header. */ #define NEXTHDR_MAX 255 @@ -230,7 +229,7 @@ extern int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff **skbp); +extern int ipv6_parse_hopopts(struct sk_buff *skb); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, @@ -507,8 +506,6 @@ extern int ipv6_skip_exthdr(const struct sk_buff *, int start, extern int ipv6_ext_hdr(u8 nexthdr); -extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); - extern struct ipv6_txoptions * ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr); diff --git a/trunk/include/net/mip6.h b/trunk/include/net/mip6.h deleted file mode 100644 index 68263c6d9996..000000000000 --- a/trunk/include/net/mip6.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Authors: - * Noriaki TAKAMIYA @USAGI - * Masahide NAKAMURA @USAGI - * YOSHIFUJI Hideaki @USAGI - */ -#ifndef _NET_MIP6_H -#define _NET_MIP6_H - -#include -#include - -#define MIP6_OPT_PAD_1 0 -#define MIP6_OPT_PAD_N 1 - -/* - * Mobility Header - */ -struct ip6_mh { - __u8 ip6mh_proto; - __u8 ip6mh_hdrlen; - __u8 ip6mh_type; - __u8 ip6mh_reserved; - __u16 ip6mh_cksum; - /* Followed by type specific messages */ - __u8 data[0]; -} __attribute__ ((__packed__)); - -#define IP6_MH_TYPE_BRR 0 /* Binding Refresh Request */ -#define IP6_MH_TYPE_HOTI 1 /* HOTI Message */ -#define IP6_MH_TYPE_COTI 2 /* COTI Message */ -#define IP6_MH_TYPE_HOT 3 /* HOT Message */ -#define IP6_MH_TYPE_COT 4 /* COT Message */ -#define IP6_MH_TYPE_BU 5 /* Binding Update */ -#define IP6_MH_TYPE_BACK 6 /* Binding ACK */ -#define IP6_MH_TYPE_BERROR 7 /* Binding Error */ -#define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR - -extern int mip6_init(void); -extern void mip6_fini(void); -extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); - -#endif diff --git a/trunk/include/net/neighbour.h b/trunk/include/net/neighbour.h index c8aacbd2e333..4901ee446879 100644 --- a/trunk/include/net/neighbour.h +++ b/trunk/include/net/neighbour.h @@ -1,8 +1,6 @@ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H -#include - /* * Generic neighbour manipulation * @@ -16,6 +14,40 @@ * - Add neighbour cache statistics like rtstat */ +/* The following flags & states are exported to user space, + so that they should be moved to include/linux/ directory. + */ + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +#ifdef __KERNEL__ + #include #include #include @@ -101,7 +133,7 @@ struct neighbour __u8 dead; atomic_t probes; rwlock_t lock; - unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; + unsigned char ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)]; struct hh_cache *hh; atomic_t refcnt; int (*output)(struct sk_buff *skb); @@ -126,7 +158,6 @@ struct pneigh_entry { struct pneigh_entry *next; struct net_device *dev; - u8 flags; u8 key[0]; }; @@ -343,3 +374,6 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif +#endif + + diff --git a/trunk/include/net/netlabel.h b/trunk/include/net/netlabel.h deleted file mode 100644 index fc2b72fc7e07..000000000000 --- a/trunk/include/net/netlabel.h +++ /dev/null @@ -1,292 +0,0 @@ -/* - * NetLabel System - * - * The NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_H -#define _NETLABEL_H - -#include -#include -#include -#include - -/* - * NetLabel - A management interface for maintaining network packet label - * mapping tables for explicit packet labling protocols. - * - * Network protocols such as CIPSO and RIPSO require a label translation layer - * to convert the label on the packet into something meaningful on the host - * machine. In the current Linux implementation these mapping tables live - * inside the kernel; NetLabel provides a mechanism for user space applications - * to manage these mapping tables. - * - * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to - * send messages between kernel and user space. The general format of a - * NetLabel message is shown below: - * - * +-----------------+-------------------+--------- --- -- - - * | struct nlmsghdr | struct genlmsghdr | payload - * +-----------------+-------------------+--------- --- -- - - * - * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal. - * The payload is dependent on the subsystem specified in the - * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions - * should be defined in the corresponding net/netlabel/netlabel_.h|c - * file. All of the fields in the NetLabel payload are NETLINK attributes, the - * length of each field is the length of the NETLINK attribute payload, see - * include/net/netlink.h for more information on NETLINK attributes. - * - */ - -/* - * NetLabel NETLINK protocol - */ - -#define NETLBL_PROTO_VERSION 1 - -/* NetLabel NETLINK types/families */ -#define NETLBL_NLTYPE_NONE 0 -#define NETLBL_NLTYPE_MGMT 1 -#define NETLBL_NLTYPE_MGMT_NAME "NLBL_MGMT" -#define NETLBL_NLTYPE_RIPSO 2 -#define NETLBL_NLTYPE_RIPSO_NAME "NLBL_RIPSO" -#define NETLBL_NLTYPE_CIPSOV4 3 -#define NETLBL_NLTYPE_CIPSOV4_NAME "NLBL_CIPSOv4" -#define NETLBL_NLTYPE_CIPSOV6 4 -#define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" -#define NETLBL_NLTYPE_UNLABELED 5 -#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" - -/* NetLabel return codes */ -#define NETLBL_E_OK 0 - -/* - * Helper functions - */ - -#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) -#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) -#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) - -/** - * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer - * @head: the amount of headroom in bytes - * @body: the desired size (minus headroom) in bytes - * @gfp_flags: the alloc flags to pass to alloc_skb() - * - * Description: - * Allocate a NETLINK message buffer based on the sizes given in @head and - * @body. If @head is greater than zero skb_reserve() is called to reserve - * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on - * success, NULL on failure. - * - */ -static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, - size_t body, - int gfp_flags) -{ - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); - if (skb == NULL) - return NULL; - if (head > 0) { - skb_reserve(skb, head); - if (skb_tailroom(skb) < body) { - kfree_skb(skb); - return NULL; - } - } - - return skb; -} - -/* - * NetLabel - Kernel API for accessing the network packet label mappings. - * - * The following functions are provided for use by other kernel modules, - * specifically kernel LSM modules, to provide a consistent, transparent API - * for dealing with explicit packet labeling protocols such as CIPSO and - * RIPSO. The functions defined here are implemented in the - * net/netlabel/netlabel_kapi.c file. - * - */ - -/* Domain mapping definition struct */ -struct netlbl_dom_map; - -/* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain); - -/* LSM security attributes */ -struct netlbl_lsm_cache { - void (*free) (const void *data); - void *data; -}; -struct netlbl_lsm_secattr { - char *domain; - - u32 mls_lvl; - u32 mls_lvl_vld; - unsigned char *mls_cat; - size_t mls_cat_len; - - struct netlbl_lsm_cache cache; -}; - -/* - * LSM security attribute operations - */ - - -/** - * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct - * @secattr: the struct to initialize - * - * Description: - * Initialize an already allocated netlbl_lsm_secattr struct. Returns zero on - * success, negative values on error. - * - */ -static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) -{ - memset(secattr, 0, sizeof(*secattr)); - return 0; -} - -/** - * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct - * @secattr: the struct to clear - * @clear_cache: cache clear flag - * - * Description: - * Destroys the @secattr struct, including freeing all of the internal buffers. - * If @clear_cache is true then free the cache fields, otherwise leave them - * intact. The struct must be reset with a call to netlbl_secattr_init() - * before reuse. - * - */ -static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, - u32 clear_cache) -{ - if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) - secattr->cache.free(secattr->cache.data); - kfree(secattr->domain); - kfree(secattr->mls_cat); -} - -/** - * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct - * @flags: the memory allocation flags - * - * Description: - * Allocate and initialize a netlbl_lsm_secattr struct. Returns a valid - * pointer on success, or NULL on failure. - * - */ -static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) -{ - return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); -} - -/** - * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct - * @secattr: the struct to free - * @clear_cache: cache clear flag - * - * Description: - * Frees @secattr including all of the internal buffers. If @clear_cache is - * true then free the cache fields, otherwise leave them intact. - * - */ -static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, - u32 clear_cache) -{ - netlbl_secattr_destroy(secattr, clear_cache); - kfree(secattr); -} - -/* - * LSM protocol operations - */ - -#ifdef CONFIG_NETLABEL -int netlbl_socket_setattr(const struct socket *sock, - const struct netlbl_lsm_secattr *secattr); -int netlbl_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr); -int netlbl_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr); -void netlbl_skbuff_err(struct sk_buff *skb, int error); -#else -static inline int netlbl_socket_setattr(const struct socket *sock, - const struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline int netlbl_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - return -ENOSYS; -} - -static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) -{ - return; -} -#endif /* CONFIG_NETLABEL */ - -/* - * LSM label mapping cache operations - */ - -#ifdef CONFIG_NETLABEL -void netlbl_cache_invalidate(void); -int netlbl_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr); -#else -static inline void netlbl_cache_invalidate(void) -{ - return; -} - -static inline int netlbl_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr) -{ - return 0; -} -#endif /* CONFIG_NETLABEL */ - -#endif /* _NETLABEL_H */ diff --git a/trunk/include/net/netlink.h b/trunk/include/net/netlink.h index 11dc2e7f679a..640c26a90cf1 100644 --- a/trunk/include/net/netlink.h +++ b/trunk/include/net/netlink.h @@ -35,15 +35,12 @@ * nlmsg_put() add a netlink message to an skb * nlmsg_put_answer() callback based nlmsg_put() * nlmsg_end() finanlize netlink message - * nlmsg_get_pos() return current position in message - * nlmsg_trim() trim part of message * nlmsg_cancel() cancel message construction * nlmsg_free() free a netlink message * * Message Sending: * nlmsg_multicast() multicast message to several groups * nlmsg_unicast() unicast a message to a single socket - * nlmsg_notify() send notification message * * Message Length Calculations: * nlmsg_msg_size(payload) length of message w/o padding @@ -65,9 +62,6 @@ * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes * - * Misc: - * nlmsg_report() report back to application? - * * ------------------------------------------------------------------------ * Attributes Interface * ------------------------------------------------------------------------ @@ -86,10 +80,8 @@ * struct nlattr netlink attribtue header * * Attribute Construction: - * nla_reserve(skb, type, len) reserve room for an attribute - * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr + * nla_reserve(skb, type, len) reserve skb tailroom for an attribute * nla_put(skb, type, len, data) add attribute to skb - * nla_put_nohdr(skb, len, data) add attribute w/o hdr * * Attribute Construction for Basic Types: * nla_put_u8(skb, type, value) add u8 attribute to skb @@ -147,7 +139,6 @@ * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes * nla_find() find attribute in stream of attributes - * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets * nla_for_each_attr() loop over all attributes @@ -167,7 +158,6 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, - NLA_NUL_STRING, __NLA_TYPE_MAX, }; @@ -176,37 +166,21 @@ enum { /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC - * @len: Type specific length of payload + * @minlen: Minimal length of payload required to be available * * Policies are defined as arrays of this struct, the array must be * accessible by attribute type up to the highest identifier to be expected. * - * Meaning of `len' field: - * NLA_STRING Maximum length of string - * NLA_NUL_STRING Maximum length of string (excluding NUL) - * NLA_FLAG Unused - * All other Exact length of attribute payload - * * Example: * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { * [ATTR_FOO] = { .type = NLA_U16 }, - * [ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ }, - * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, + * [ATTR_BAR] = { .type = NLA_STRING }, + * [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) }, * }; */ struct nla_policy { u16 type; - u16 len; -}; - -/** - * struct nl_info - netlink source information - * @nlh: Netlink message header of original request - * @pid: Netlink PID of requesting application - */ -struct nl_info { - struct nlmsghdr *nlh; - u32 pid; + u16 minlen; }; extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, @@ -214,9 +188,6 @@ extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, struct nlmsghdr *, int *)); extern void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); -extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, int report, - gfp_t flags); extern int nla_validate(struct nlattr *head, int len, int maxtype, struct nla_policy *policy); @@ -232,18 +203,12 @@ extern int nla_memcmp(const struct nlattr *nla, const void *data, extern int nla_strcmp(const struct nlattr *nla, const char *str); extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); -extern void * __nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); -extern void * nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); -extern void __nla_put_nohdr(struct sk_buff *skb, int attrlen, - const void *data); extern int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); -extern int nla_put_nohdr(struct sk_buff *skb, int attrlen, - const void *data); /************************************************************************** * Netlink Messages @@ -398,17 +363,6 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, nlmsg_attrlen(nlh, hdrlen), maxtype, policy); } -/** - * nlmsg_report - need to report back to application? - * @nlh: netlink message header - * - * Returns 1 if a report back to the application is requested. - */ -static inline int nlmsg_report(struct nlmsghdr *nlh) -{ - return !!(nlh->nlmsg_flags & NLM_F_ECHO); -} - /** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute @@ -499,13 +453,12 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, /** * nlmsg_new - Allocate a new netlink message * @size: maximum size of message - * @flags: the type of memory to allocate. * * Use NLMSG_GOODSIZE if size isn't know and you need a good default size. */ -static inline struct sk_buff *nlmsg_new(int size, gfp_t flags) +static inline struct sk_buff *nlmsg_new(int size) { - return alloc_skb(size, flags); + return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); } /** @@ -526,32 +479,6 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) return skb->len; } -/** - * nlmsg_get_pos - return current position in netlink message - * @skb: socket buffer the message is stored in - * - * Returns a pointer to the current tail of the message. - */ -static inline void *nlmsg_get_pos(struct sk_buff *skb) -{ - return skb->tail; -} - -/** - * nlmsg_trim - Trim message to a mark - * @skb: socket buffer the message is stored in - * @mark: mark to trim to - * - * Trims the message to the provided mark. Returns -1. - */ -static inline int nlmsg_trim(struct sk_buff *skb, void *mark) -{ - if (mark) - skb_trim(skb, (unsigned char *) mark - skb->data); - - return -1; -} - /** * nlmsg_cancel - Cancel construction of a netlink message * @skb: socket buffer the message is stored in @@ -562,7 +489,9 @@ static inline int nlmsg_trim(struct sk_buff *skb, void *mark) */ static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) { - return nlmsg_trim(skb, nlh); + skb_trim(skb, (unsigned char *) nlh - skb->data); + + return -1; } /** @@ -580,16 +509,15 @@ static inline void nlmsg_free(struct sk_buff *skb) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id - * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 pid, unsigned int group) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, flags); + err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL); if (err > 0) err = 0; @@ -702,18 +630,6 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) return (struct nlattr *) ((char *) nla + totlen); } -/** - * nla_find_nested - find attribute in a set of nested attributes - * @nla: attribute containing the nested attributes - * @attrtype: type of attribute to look for - * - * Returns the first attribute which matches the specified type. - */ -static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) -{ - return nla_find(nla_data(nla), nla_len(nla), attrtype); -} - /** * nla_parse_nested - parse nested attributes * @tb: destination array with maxtype+1 elements @@ -835,7 +751,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) -#define NLA_PUT_FLAG(skb, attrtype) \ +#define NLA_PUT_FLAG(skb, attrtype, value) \ NLA_PUT(skb, attrtype, 0, NULL) #define NLA_PUT_MSECS(skb, attrtype, jiffies) \ @@ -946,7 +862,10 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) */ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) { - return nlmsg_trim(skb, start); + if (start) + skb_trim(skb, (unsigned char *) start - skb->data); + + return -1; } /** @@ -961,13 +880,4 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) -/** - * nla_for_each_nested - iterate over nested attributes - * @pos: loop counter, set to current attribute - * @nla: attribute containing the nested attributes - * @rem: initialized to len, holds bytes currently remaining in stream - */ -#define nla_for_each_nested(pos, nla, rem) \ - nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) - #endif diff --git a/trunk/include/net/nexthop.h b/trunk/include/net/nexthop.h deleted file mode 100644 index 3334dbfa5aa4..000000000000 --- a/trunk/include/net/nexthop.h +++ /dev/null @@ -1,33 +0,0 @@ -#ifndef __NET_NEXTHOP_H -#define __NET_NEXTHOP_H - -#include -#include - -static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) -{ - return remaining >= sizeof(*rtnh) && - rtnh->rtnh_len >= sizeof(*rtnh) && - rtnh->rtnh_len <= remaining; -} - -static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, - int *remaining) -{ - int totlen = NLA_ALIGN(rtnh->rtnh_len); - - *remaining -= totlen; - return (struct rtnexthop *) ((char *) rtnh + totlen); -} - -static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) -{ - return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); -} - -static inline int rtnh_attrlen(const struct rtnexthop *rtnh) -{ - return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); -} - -#endif diff --git a/trunk/include/net/pkt_act.h b/trunk/include/net/pkt_act.h new file mode 100644 index 000000000000..cf5e4d2e4c21 --- /dev/null +++ b/trunk/include/net/pkt_act.h @@ -0,0 +1,273 @@ +#ifndef __NET_PKT_ACT_H +#define __NET_PKT_ACT_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define tca_st(val) (struct tcf_##val *) +#define PRIV(a,name) ( tca_st(name) (a)->priv) + +#if 0 /* control */ +#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) +#else +#define DPRINTK(format,args...) +#endif + +#if 0 /* data */ +#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) +#else +#define D2PRINTK(format,args...) +#endif + +static __inline__ unsigned +tcf_hash(u32 index) +{ + return index & MY_TAB_MASK; +} + +/* probably move this from being inline + * and put into act_generic +*/ +static inline void +tcf_hash_destroy(struct tcf_st *p) +{ + unsigned h = tcf_hash(p->index); + struct tcf_st **p1p; + + for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) { + if (*p1p == p) { + write_lock_bh(&tcf_t_lock); + *p1p = p->next; + write_unlock_bh(&tcf_t_lock); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&p->bstats, &p->rate_est); +#endif + kfree(p); + return; + } + } + BUG_TRAP(0); +} + +static inline int +tcf_hash_release(struct tcf_st *p, int bind ) +{ + int ret = 0; + if (p) { + if (bind) { + p->bindcnt--; + } + p->refcnt--; + if(p->bindcnt <=0 && p->refcnt <= 0) { + tcf_hash_destroy(p); + ret = 1; + } + } + return ret; +} + +static __inline__ int +tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, + struct tc_action *a) +{ + struct tcf_st *p; + int err =0, index = -1,i= 0, s_i = 0, n_i = 0; + struct rtattr *r ; + + read_lock(&tcf_t_lock); + + s_i = cb->args[0]; + + for (i = 0; i < MY_TAB_SIZE; i++) { + p = tcf_ht[tcf_hash(i)]; + + for (; p; p = p->next) { + index++; + if (index < s_i) + continue; + a->priv = p; + a->order = n_i; + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + err = tcf_action_dump_1(skb, a, 0, 0); + if (0 > err) { + index--; + skb_trim(skb, (u8*)r - skb->data); + goto done; + } + r->rta_len = skb->tail - (u8*)r; + n_i++; + if (n_i >= TCA_ACT_MAX_PRIO) { + goto done; + } + } + } +done: + read_unlock(&tcf_t_lock); + if (n_i) + cb->args[0] += n_i; + return n_i; + +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + goto done; +} + +static __inline__ int +tcf_del_walker(struct sk_buff *skb, struct tc_action *a) +{ + struct tcf_st *p, *s_p; + struct rtattr *r ; + int i= 0, n_i = 0; + + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); + for (i = 0; i < MY_TAB_SIZE; i++) { + p = tcf_ht[tcf_hash(i)]; + + while (p != NULL) { + s_p = p->next; + if (ACT_P_DELETED == tcf_hash_release(p, 0)) { + module_put(a->ops->owner); + } + n_i++; + p = s_p; + } + } + RTA_PUT(skb, TCA_FCNT, 4, &n_i); + r->rta_len = skb->tail - (u8*)r; + + return n_i; +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + return -EINVAL; +} + +static __inline__ int +tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, + struct tc_action *a) +{ + if (type == RTM_DELACTION) { + return tcf_del_walker(skb,a); + } else if (type == RTM_GETACTION) { + return tcf_dump_walker(skb,cb,a); + } else { + printk("tcf_generic_walker: unknown action %d\n",type); + return -EINVAL; + } +} + +static __inline__ struct tcf_st * +tcf_hash_lookup(u32 index) +{ + struct tcf_st *p; + + read_lock(&tcf_t_lock); + for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) { + if (p->index == index) + break; + } + read_unlock(&tcf_t_lock); + return p; +} + +static __inline__ u32 +tcf_hash_new_index(void) +{ + do { + if (++idx_gen == 0) + idx_gen = 1; + } while (tcf_hash_lookup(idx_gen)); + + return idx_gen; +} + + +static inline int +tcf_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_st *p = tcf_hash_lookup(index); + + if (p != NULL) { + a->priv = p; + return 1; + } + return 0; +} + +#ifdef CONFIG_NET_ACT_INIT +static inline struct tcf_st * +tcf_hash_check(u32 index, struct tc_action *a, int ovr, int bind) +{ + struct tcf_st *p = NULL; + if (index && (p = tcf_hash_lookup(index)) != NULL) { + if (bind) { + p->bindcnt++; + p->refcnt++; + } + a->priv = p; + } + return p; +} + +static inline struct tcf_st * +tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind) +{ + struct tcf_st *p = NULL; + + p = kmalloc(size, GFP_KERNEL); + if (p == NULL) + return p; + + memset(p, 0, size); + p->refcnt = 1; + + if (bind) { + p->bindcnt = 1; + } + + spin_lock_init(&p->lock); + p->stats_lock = &p->lock; + p->index = index ? : tcf_hash_new_index(); + p->tm.install = jiffies; + p->tm.lastuse = jiffies; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); +#endif + a->priv = (void *) p; + return p; +} + +static inline void tcf_hash_insert(struct tcf_st *p) +{ + unsigned h = tcf_hash(p->index); + + write_lock_bh(&tcf_t_lock); + p->next = tcf_ht[h]; + tcf_ht[h] = p; + write_unlock_bh(&tcf_t_lock); +} + +#endif + +#endif diff --git a/trunk/include/net/request_sock.h b/trunk/include/net/request_sock.h index 8e165ca16bd8..c5d7f920c352 100644 --- a/trunk/include/net/request_sock.h +++ b/trunk/include/net/request_sock.h @@ -53,7 +53,6 @@ struct request_sock { unsigned long expires; struct request_sock_ops *rsk_ops; struct sock *sk; - u32 secid; }; static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) diff --git a/trunk/include/net/route.h b/trunk/include/net/route.h index 7f93ac0e0899..c4a068692dcc 100644 --- a/trunk/include/net/route.h +++ b/trunk/include/net/route.h @@ -32,7 +32,6 @@ #include #include #include -#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -167,7 +166,6 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, ip_rt_put(*rp); *rp = NULL; } - security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } @@ -184,7 +182,6 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.proto = protocol; ip_rt_put(*rp); *rp = NULL; - security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } return 0; diff --git a/trunk/include/net/sctp/constants.h b/trunk/include/net/sctp/constants.h index 6c632e26f72d..57166bfdf8eb 100644 --- a/trunk/include/net/sctp/constants.h +++ b/trunk/include/net/sctp/constants.h @@ -264,10 +264,10 @@ enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; /* Heartbeat interval - 30 secs */ -#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30*1000) +#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30 * HZ) /* Delayed sack timer - 200ms */ -#define SCTP_DEFAULT_TIMEOUT_SACK (200) +#define SCTP_DEFAULT_TIMEOUT_SACK ((200 * HZ) / 1000) /* RTO.Initial - 3 seconds * RTO.Min - 1 second @@ -275,9 +275,9 @@ enum { SCTP_MAX_GABS = 16 }; * RTO.Alpha - 1/8 * RTO.Beta - 1/4 */ -#define SCTP_RTO_INITIAL (3 * 1000) -#define SCTP_RTO_MIN (1 * 1000) -#define SCTP_RTO_MAX (60 * 1000) +#define SCTP_RTO_INITIAL (3 * HZ) +#define SCTP_RTO_MIN (1 * HZ) +#define SCTP_RTO_MAX (60 * HZ) #define SCTP_RTO_ALPHA 3 /* 1/8 when converted to right shifts. */ #define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */ @@ -290,7 +290,8 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEF_MAX_INIT 6 #define SCTP_DEF_MAX_SEND 10 -#define SCTP_DEFAULT_COOKIE_LIFE (60 * 1000) /* 60 seconds */ +#define SCTP_DEFAULT_COOKIE_LIFE_SEC 60 /* seconds */ +#define SCTP_DEFAULT_COOKIE_LIFE_USEC 0 /* microseconds */ #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ diff --git a/trunk/include/net/sctp/sctp.h b/trunk/include/net/sctp/sctp.h index ee68a3124076..1c1abce5f6b6 100644 --- a/trunk/include/net/sctp/sctp.h +++ b/trunk/include/net/sctp/sctp.h @@ -128,8 +128,6 @@ extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); -int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, - void *ptr); /* * sctp/socket.c @@ -179,17 +177,6 @@ void sctp_icmp_proto_unreachable(struct sock *sk, void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); -/* - * sctp/proc.c - */ -int sctp_snmp_proc_init(void); -void sctp_snmp_proc_exit(void); -int sctp_eps_proc_init(void); -void sctp_eps_proc_exit(void); -int sctp_assocs_proc_init(void); -void sctp_assocs_proc_exit(void); - - /* * Section: Macros, externs, and inlines */ @@ -229,50 +216,6 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); #endif /* !TEST_FRAME */ -/* sctp mib definitions */ -enum -{ - SCTP_MIB_NUM = 0, - SCTP_MIB_CURRESTAB, /* CurrEstab */ - SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ - SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ - SCTP_MIB_ABORTEDS, /* Aborteds */ - SCTP_MIB_SHUTDOWNS, /* Shutdowns */ - SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ - SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ - SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ - SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ - SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ - SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ - SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ - SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ - SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ - SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ - SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ - SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ - SCTP_MIB_T1_INIT_EXPIREDS, - SCTP_MIB_T1_COOKIE_EXPIREDS, - SCTP_MIB_T2_SHUTDOWN_EXPIREDS, - SCTP_MIB_T3_RTX_EXPIREDS, - SCTP_MIB_T4_RTO_EXPIREDS, - SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, - SCTP_MIB_DELAY_SACK_EXPIREDS, - SCTP_MIB_AUTOCLOSE_EXPIREDS, - SCTP_MIB_T3_RETRANSMITS, - SCTP_MIB_PMTUD_RETRANSMITS, - SCTP_MIB_FAST_RETRANSMITS, - SCTP_MIB_IN_PKT_SOFTIRQ, - SCTP_MIB_IN_PKT_BACKLOG, - SCTP_MIB_IN_PKT_DISCARDS, - SCTP_MIB_IN_DATA_CHUNK_DISCARDS, - __SCTP_MIB_MAX -}; - -#define SCTP_MIB_MAX __SCTP_MIB_MAX -struct sctp_mib { - unsigned long mibs[SCTP_MIB_MAX]; -} __SNMP_MIB_ALIGN__; - /* Print debugging messages. */ #if SCTP_DEBUG diff --git a/trunk/include/net/sctp/structs.h b/trunk/include/net/sctp/structs.h index c6d93bb0dcd2..0412e730c765 100644 --- a/trunk/include/net/sctp/structs.h +++ b/trunk/include/net/sctp/structs.h @@ -128,9 +128,9 @@ extern struct sctp_globals { * RTO.Alpha - 1/8 (3 when converted to right shifts.) * RTO.Beta - 1/4 (2 when converted to right shifts.) */ - unsigned int rto_initial; - unsigned int rto_min; - unsigned int rto_max; + unsigned long rto_initial; + unsigned long rto_min; + unsigned long rto_max; /* Note: rto_alpha and rto_beta are really defined as inverse * powers of two to facilitate integer operations. @@ -145,13 +145,13 @@ extern struct sctp_globals { int cookie_preserve_enable; /* Valid.Cookie.Life - 60 seconds */ - unsigned int valid_cookie_life; + unsigned long valid_cookie_life; /* Delayed SACK timeout 200ms default*/ - unsigned int sack_timeout; + unsigned long sack_timeout; /* HB.interval - 30 seconds */ - unsigned int hb_interval; + unsigned long hb_interval; /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) diff --git a/trunk/include/net/snmp.h b/trunk/include/net/snmp.h index 464970e39ec0..a36bed8ea210 100644 --- a/trunk/include/net/snmp.h +++ b/trunk/include/net/snmp.h @@ -100,6 +100,12 @@ struct udp_mib { unsigned long mibs[UDP_MIB_MAX]; } __SNMP_MIB_ALIGN__; +/* SCTP */ +#define SCTP_MIB_MAX __SCTP_MIB_MAX +struct sctp_mib { + unsigned long mibs[SCTP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + /* Linux */ #define LINUX_MIB_MAX __LINUX_MIB_MAX struct linux_mib { diff --git a/trunk/include/net/sock.h b/trunk/include/net/sock.h index edd4d73ce7f5..324b3ea233d6 100644 --- a/trunk/include/net/sock.h +++ b/trunk/include/net/sock.h @@ -862,24 +862,30 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) { int err; - struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - rcu_read_lock_bh(); - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - rcu_read_unlock_bh(); + if (sk->sk_filter) { + struct sk_filter *filter; + + if (needlock) + bh_lock_sock(sk); + + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; + } + if (needlock) + bh_unlock_sock(sk); + } return err; } @@ -891,12 +897,6 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb) * Remove a filter from a socket and release its resources. */ -static inline void sk_filter_rcu_free(struct rcu_head *rcu) -{ - struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); - kfree(fp); -} - static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - call_rcu_bh(&fp->rcu, sk_filter_rcu_free); + kfree(fp); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) @@ -969,23 +969,9 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; - security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } -static inline void sock_copy(struct sock *nsk, const struct sock *osk) -{ -#ifdef CONFIG_SECURITY_NETWORK - void *sptr = nsk->sk_security; -#endif - - memcpy(nsk, osk, osk->sk_prot->obj_size); -#ifdef CONFIG_SECURITY_NETWORK - nsk->sk_security = sptr; - security_sk_clone(osk, nsk); -#endif -} - extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); diff --git a/trunk/include/net/tc_act/tc_defact.h b/trunk/include/net/tc_act/tc_defact.h index 65f024b80958..463aa671f95d 100644 --- a/trunk/include/net/tc_act/tc_defact.h +++ b/trunk/include/net/tc_act/tc_defact.h @@ -3,12 +3,11 @@ #include -struct tcf_defact { - struct tcf_common common; - u32 tcfd_datalen; - void *tcfd_defdata; +struct tcf_defact +{ + tca_gen(defact); + u32 datalen; + void *defdata; }; -#define to_defact(pc) \ - container_of(pc, struct tcf_defact, common) -#endif /* __NET_TC_DEF_H */ +#endif diff --git a/trunk/include/net/tc_act/tc_gact.h b/trunk/include/net/tc_act/tc_gact.h index 9e3f6767b80e..59f0d9628ad1 100644 --- a/trunk/include/net/tc_act/tc_gact.h +++ b/trunk/include/net/tc_act/tc_gact.h @@ -3,15 +3,15 @@ #include -struct tcf_gact { - struct tcf_common common; +struct tcf_gact +{ + tca_gen(gact); #ifdef CONFIG_GACT_PROB - u16 tcfg_ptype; - u16 tcfg_pval; - int tcfg_paction; + u16 ptype; + u16 pval; + int paction; #endif + }; -#define to_gact(pc) \ - container_of(pc, struct tcf_gact, common) - -#endif /* __NET_TC_GACT_H */ + +#endif diff --git a/trunk/include/net/tc_act/tc_ipt.h b/trunk/include/net/tc_act/tc_ipt.h index f7d25dfcc4b7..cb37ad08427f 100644 --- a/trunk/include/net/tc_act/tc_ipt.h +++ b/trunk/include/net/tc_act/tc_ipt.h @@ -5,13 +5,12 @@ struct xt_entry_target; -struct tcf_ipt { - struct tcf_common common; - u32 tcfi_hook; - char *tcfi_tname; - struct xt_entry_target *tcfi_t; +struct tcf_ipt +{ + tca_gen(ipt); + u32 hook; + char *tname; + struct xt_entry_target *t; }; -#define to_ipt(pc) \ - container_of(pc, struct tcf_ipt, common) -#endif /* __NET_TC_IPT_H */ +#endif diff --git a/trunk/include/net/tc_act/tc_mirred.h b/trunk/include/net/tc_act/tc_mirred.h index ceac661cdfd5..b5c32f65c12c 100644 --- a/trunk/include/net/tc_act/tc_mirred.h +++ b/trunk/include/net/tc_act/tc_mirred.h @@ -3,14 +3,13 @@ #include -struct tcf_mirred { - struct tcf_common common; - int tcfm_eaction; - int tcfm_ifindex; - int tcfm_ok_push; - struct net_device *tcfm_dev; +struct tcf_mirred +{ + tca_gen(mirred); + int eaction; + int ifindex; + int ok_push; + struct net_device *dev; }; -#define to_mirred(pc) \ - container_of(pc, struct tcf_mirred, common) -#endif /* __NET_TC_MIR_H */ +#endif diff --git a/trunk/include/net/tc_act/tc_pedit.h b/trunk/include/net/tc_act/tc_pedit.h index e6f6e15956f5..eb21689d759d 100644 --- a/trunk/include/net/tc_act/tc_pedit.h +++ b/trunk/include/net/tc_act/tc_pedit.h @@ -3,13 +3,12 @@ #include -struct tcf_pedit { - struct tcf_common common; - unsigned char tcfp_nkeys; - unsigned char tcfp_flags; - struct tc_pedit_key *tcfp_keys; +struct tcf_pedit +{ + tca_gen(pedit); + unsigned char nkeys; + unsigned char flags; + struct tc_pedit_key *keys; }; -#define to_pedit(pc) \ - container_of(pc, struct tcf_pedit, common) -#endif /* __NET_TC_PED_H */ +#endif diff --git a/trunk/include/net/udp.h b/trunk/include/net/udp.h index db0c05f67546..766fba1369ce 100644 --- a/trunk/include/net/udp.h +++ b/trunk/include/net/udp.h @@ -30,9 +30,25 @@ #define UDP_HTABLE_SIZE 128 +/* udp.c: This needs to be shared by v4 and v6 because the lookup + * and hashing code needs to work with different AF's yet + * the port space is shared. + */ extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; +extern int udp_port_rover; + +static inline int udp_lport_inuse(u16 num) +{ + struct sock *sk; + struct hlist_node *node; + + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + if (inet_sk(sk)->num == num) + return 1; + return 0; +} /* Note: this must match 'valbool' in sock_setsockopt */ #define UDP_CSUM_NOXMIT 1 @@ -47,8 +63,6 @@ extern struct proto udp_prot; struct sk_buff; -extern int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/trunk/include/net/xfrm.h b/trunk/include/net/xfrm.h index 11e0b1d6bd47..3ecd9fa1ed4b 100644 --- a/trunk/include/net/xfrm.h +++ b/trunk/include/net/xfrm.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -94,9 +93,8 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ - struct hlist_node bydst; - struct hlist_node bysrc; - struct hlist_node byspi; + struct list_head bydst; + struct list_head byspi; atomic_t refcnt; spinlock_t lock; @@ -104,8 +102,6 @@ struct xfrm_state struct xfrm_id id; struct xfrm_selector sel; - u32 genid; - /* Key manger bits */ struct { u8 state; @@ -136,9 +132,6 @@ struct xfrm_state /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; - /* Data for care-of address */ - xfrm_address_t *coaddr; - /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; @@ -169,9 +162,6 @@ struct xfrm_state struct xfrm_lifetime_cur curlft; struct timer_list timer; - /* Last used time */ - u64 lastused; - /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; @@ -205,7 +195,6 @@ struct km_event u32 proto; u32 byid; u32 aevent; - u32 type; } data; u32 seq; @@ -222,7 +211,6 @@ struct xfrm_policy_afinfo { struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); - int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); int (*bundle_create)(struct xfrm_policy *policy, struct xfrm_state **xfrm, @@ -246,12 +234,16 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; + struct list_head *state_bydst; + struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); - int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); - int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); + struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); + struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, + int create); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); @@ -264,17 +256,11 @@ struct xfrm_type char *description; struct module *owner; __u8 proto; - __u8 flags; -#define XFRM_TYPE_NON_FRAGMENT 1 int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); - int (*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *); - int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); - xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); - xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; @@ -286,7 +272,7 @@ extern void xfrm_put_type(struct xfrm_type *type); struct xfrm_mode { int (*input)(struct xfrm_state *x, struct sk_buff *skb); - int (*output)(struct xfrm_state *x,struct sk_buff *skb); + int (*output)(struct sk_buff *skb); struct module *owner; unsigned int encap; @@ -312,7 +298,7 @@ struct xfrm_tmpl __u32 reqid; -/* Mode: transport, tunnel etc. */ +/* Mode: transport/tunnel */ __u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ @@ -327,20 +313,18 @@ struct xfrm_tmpl __u32 calgos; }; -#define XFRM_MAX_DEPTH 6 +#define XFRM_MAX_DEPTH 4 struct xfrm_policy { struct xfrm_policy *next; - struct hlist_node bydst; - struct hlist_node byidx; + struct list_head list; /* This lock only affects elements except for entry. */ rwlock_t lock; atomic_t refcnt; struct timer_list timer; - u8 type; u32 priority; u32 index; struct xfrm_selector selector; @@ -378,16 +362,16 @@ struct xfrm_mgr char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); - struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); + struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); - int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); }; extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); -extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; + +extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -403,19 +387,67 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy) __xfrm_policy_destroy(policy); } -#ifdef CONFIG_XFRM_SUB_POLICY -static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +#define XFRM_DST_HSIZE 1024 + +static __inline__ +unsigned __xfrm4_dst_hash(xfrm_address_t *addr) { - int i; - for (i = npols - 1; i >= 0; --i) - xfrm_pol_put(pols[i]); + unsigned h; + h = ntohl(addr->a4); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; } -#else -static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) + +static __inline__ +unsigned __xfrm6_dst_hash(xfrm_address_t *addr) { - xfrm_pol_put(pols[0]); + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_dst_hash(addr); + case AF_INET6: + return __xfrm6_dst_hash(addr); + } + return 0; +} + +static __inline__ +unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a4^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_spi_hash(addr, spi, proto); + case AF_INET6: + return __xfrm6_spi_hash(addr, spi, proto); + } + return 0; /*XXX*/ } -#endif extern void __xfrm_state_destroy(struct xfrm_state *); @@ -475,11 +507,6 @@ u16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; -#ifdef CONFIG_IPV6_MIP6 - case IPPROTO_MH: - port = htons(fl->fl_mh_type); - break; -#endif default: port = 0; /*XXX*/ } @@ -580,7 +607,6 @@ struct xfrm_dst struct rt6_info rt6; } u; struct dst_entry *route; - u32 genid; u32 route_mtu_cached; u32 child_mtu_cached; u32 route_cookie; @@ -631,18 +657,6 @@ secpath_reset(struct sk_buff *skb) #endif } -static inline int -xfrm_addr_any(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return addr->a4 == 0; - case AF_INET6: - return ipv6_addr_any((struct in6_addr *)&addr->a6); - } - return 0; -} - static inline int __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x) { @@ -677,8 +691,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - - return (!xfrm_policy_count[dir] && !skb->sp) || + + return (!xfrm_policy_list[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -698,7 +712,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_count[XFRM_POLICY_OUT] || + return !xfrm_policy_list[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } @@ -816,36 +830,11 @@ xfrm_state_addr_check(struct xfrm_state *x, return 0; } -static __inline__ int -xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl, - unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_state_addr_check(x, - (xfrm_address_t *)&fl->fl4_dst, - (xfrm_address_t *)&fl->fl4_src); - case AF_INET6: - return __xfrm6_state_addr_check(x, - (xfrm_address_t *)&fl->fl6_dst, - (xfrm_address_t *)&fl->fl6_src); - } - return 0; -} - static inline int xfrm_state_kern(struct xfrm_state *x) { return atomic_read(&x->tunnel_users); } -static inline int xfrm_id_proto_match(u8 proto, u8 userproto) -{ - return (!userproto || proto == userproto || - (userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH || - proto == IPPROTO_ESP || - proto == IPPROTO_COMP))); -} - /* * xfrm algorithm information */ @@ -913,25 +902,6 @@ extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); -extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); -#ifdef CONFIG_XFRM_SUB_POLICY -extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, - int n, unsigned short family); -extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, - int n, unsigned short family); -#else -static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, - int n, unsigned short family) -{ - return -ENOSYS; -} - -static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, - int n, unsigned short family) -{ - return -ENOSYS; -} -#endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); @@ -947,16 +917,12 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); -extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, - xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr); extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr); extern int xfrm6_output(struct sk_buff *skb); -extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, - u8 **prevhdr); #ifdef CONFIG_XFRM extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); @@ -981,27 +947,27 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); -struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); -void xfrm_policy_flush(u8 type); +struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); +void xfrm_policy_flush(void); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(u8 type); +extern void xfrm_policy_flush(void); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); +extern int xfrm_flush_bundles(void); +extern void xfrm_flush_all_bundles(void); +extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family); extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); -extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); diff --git a/trunk/kernel/taskstats.c b/trunk/kernel/taskstats.c index 2ed4040d0dc5..e78187657330 100644 --- a/trunk/kernel/taskstats.c +++ b/trunk/kernel/taskstats.c @@ -75,7 +75,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(size, GFP_KERNEL); + skb = nlmsg_new(size); if (!skb) return -ENOMEM; diff --git a/trunk/mm/page_alloc.c b/trunk/mm/page_alloc.c index 3b5358a0561f..54a4f5375bba 100644 --- a/trunk/mm/page_alloc.c +++ b/trunk/mm/page_alloc.c @@ -2363,7 +2363,7 @@ int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write, return 0; } -int hashdist = HASHDIST_DEFAULT; +__initdata int hashdist = HASHDIST_DEFAULT; #ifdef CONFIG_NUMA static int __init set_hashdist(char *str) diff --git a/trunk/net/Kconfig b/trunk/net/Kconfig index 6528a935622c..4959a4e1e0fe 100644 --- a/trunk/net/Kconfig +++ b/trunk/net/Kconfig @@ -249,11 +249,6 @@ source "net/ieee80211/Kconfig" config WIRELESS_EXT bool -source "net/netlabel/Kconfig" - -config FIB_RULES - bool - endif # if NET endmenu # Networking diff --git a/trunk/net/Makefile b/trunk/net/Makefile index ad4d14f4bb29..065796f5fb17 100644 --- a/trunk/net/Makefile +++ b/trunk/net/Makefile @@ -46,7 +46,6 @@ obj-$(CONFIG_IP_DCCP) += dccp/ obj-$(CONFIG_IP_SCTP) += sctp/ obj-$(CONFIG_IEEE80211) += ieee80211/ obj-$(CONFIG_TIPC) += tipc/ -obj-$(CONFIG_NETLABEL) += netlabel/ ifeq ($(CONFIG_NET),y) obj-$(CONFIG_SYSCTL) += sysctl_net.o diff --git a/trunk/net/atm/atm_sysfs.c b/trunk/net/atm/atm_sysfs.c index c0a4ae28fcfa..5df4b9a068bb 100644 --- a/trunk/net/atm/atm_sysfs.c +++ b/trunk/net/atm/atm_sysfs.c @@ -1,5 +1,6 @@ /* ATM driver model support. */ +#include #include #include #include diff --git a/trunk/net/atm/mpc.c b/trunk/net/atm/mpc.c index b87c2a88bdce..00704661e83f 100644 --- a/trunk/net/atm/mpc.c +++ b/trunk/net/atm/mpc.c @@ -98,6 +98,11 @@ static struct notifier_block mpoa_notifier = { 0 }; +#ifdef CONFIG_PROC_FS +extern int mpc_proc_init(void); +extern void mpc_proc_clean(void); +#endif + struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; static DEFINE_TIMER(mpc_timer, NULL, 0, 0); @@ -1434,8 +1439,12 @@ static __init int atm_mpoa_init(void) { register_atm_ioctl(&atm_ioctl_ops); +#ifdef CONFIG_PROC_FS if (mpc_proc_init() != 0) printk(KERN_INFO "mpoa: failed to initialize /proc/mpoa\n"); + else + printk(KERN_INFO "mpoa: /proc/mpoa initialized\n"); +#endif printk("mpc.c: " __DATE__ " " __TIME__ " initialized\n"); @@ -1448,7 +1457,9 @@ static void __exit atm_mpoa_cleanup(void) struct atm_mpoa_qos *qos, *nextqos; struct lec_priv *priv; +#ifdef CONFIG_PROC_FS mpc_proc_clean(); +#endif del_timer(&mpc_timer); unregister_netdevice_notifier(&mpoa_notifier); diff --git a/trunk/net/atm/mpc.h b/trunk/net/atm/mpc.h index 3c7981a229e8..863ddf6079e1 100644 --- a/trunk/net/atm/mpc.h +++ b/trunk/net/atm/mpc.h @@ -50,12 +50,4 @@ int atm_mpoa_delete_qos(struct atm_mpoa_qos *qos); struct seq_file; void atm_mpoa_disp_qos(struct seq_file *m); -#ifdef CONFIG_PROC_FS -int mpc_proc_init(void); -void mpc_proc_clean(void); -#else -#define mpc_proc_init() (0) -#define mpc_proc_clean() do { } while(0) -#endif - #endif /* _MPC_H_ */ diff --git a/trunk/net/bridge/br_forward.c b/trunk/net/bridge/br_forward.c index 191b861e5e53..864fbbc7b24d 100644 --- a/trunk/net/bridge/br_forward.c +++ b/trunk/net/bridge/br_forward.c @@ -38,10 +38,13 @@ int br_dev_queue_push_xmit(struct sk_buff *skb) if (packet_length(skb) > skb->dev->mtu && !skb_is_gso(skb)) kfree_skb(skb); else { +#ifdef CONFIG_BRIDGE_NETFILTER /* ip_refrag calls ip_fragment, doesn't copy the MAC header. */ if (nf_bridge_maybe_copy_header(skb)) kfree_skb(skb); - else { + else +#endif + { skb_push(skb, ETH_HLEN); dev_queue_xmit(skb); diff --git a/trunk/net/bridge/br_netfilter.c b/trunk/net/bridge/br_netfilter.c index ac181be13d83..05b3de888243 100644 --- a/trunk/net/bridge/br_netfilter.c +++ b/trunk/net/bridge/br_netfilter.c @@ -53,10 +53,10 @@ #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; -static int brnf_call_iptables __read_mostly = 1; -static int brnf_call_ip6tables __read_mostly = 1; -static int brnf_call_arptables __read_mostly = 1; -static int brnf_filter_vlan_tagged __read_mostly = 1; +static int brnf_call_iptables = 1; +static int brnf_call_ip6tables = 1; +static int brnf_call_arptables = 1; +static int brnf_filter_vlan_tagged = 1; #else #define brnf_filter_vlan_tagged 1 #endif @@ -127,37 +127,14 @@ static inline struct nf_bridge_info *nf_bridge_alloc(struct sk_buff *skb) static inline void nf_bridge_save_header(struct sk_buff *skb) { - int header_size = ETH_HLEN; + int header_size = 16; if (skb->protocol == htons(ETH_P_8021Q)) - header_size += VLAN_HLEN; + header_size = 18; memcpy(skb->nf_bridge->data, skb->data - header_size, header_size); } -/* - * When forwarding bridge frames, we save a copy of the original - * header before processing. - */ -int nf_bridge_copy_header(struct sk_buff *skb) -{ - int err; - int header_size = ETH_HLEN; - - if (skb->protocol == htons(ETH_P_8021Q)) - header_size += VLAN_HLEN; - - err = skb_cow(skb, header_size); - if (err) - return err; - - memcpy(skb->data - header_size, skb->nf_bridge->data, header_size); - - if (skb->protocol == htons(ETH_P_8021Q)) - __skb_push(skb, VLAN_HLEN); - return 0; -} - /* PF_BRIDGE/PRE_ROUTING *********************************************/ /* Undo the changes made for ip6tables PREROUTING and continue the * bridge PRE_ROUTING hook. */ @@ -718,6 +695,16 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb, else pf = PF_INET6; +#ifdef CONFIG_NETFILTER_DEBUG + /* Sometimes we get packets with NULL ->dst here (for example, + * running a dhcp client daemon triggers this). This should now + * be fixed, but let's keep the check around. */ + if (skb->dst == NULL) { + printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); + return NF_ACCEPT; + } +#endif + nf_bridge = skb->nf_bridge; nf_bridge->physoutdev = skb->dev; realindev = nf_bridge->physindev; @@ -799,7 +786,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, * keep the check just to be sure... */ if (skb->mac.raw < skb->head || skb->mac.raw + ETH_HLEN > skb->data) { printk(KERN_CRIT "br_netfilter: Argh!! br_nf_post_routing: " - "bad mac.raw pointer.\n"); + "bad mac.raw pointer."); goto print_error; } #endif @@ -817,7 +804,7 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, #ifdef CONFIG_NETFILTER_DEBUG if (skb->dst == NULL) { - printk(KERN_INFO "br_netfilter post_routing: skb->dst == NULL\n"); + printk(KERN_CRIT "br_netfilter: skb->dst == NULL."); goto print_error; } #endif @@ -854,7 +841,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb, } printk(" head:%p, raw:%p, data:%p\n", skb->head, skb->mac.raw, skb->data); - dump_stack(); return NF_ACCEPT; #endif } diff --git a/trunk/net/bridge/br_netlink.c b/trunk/net/bridge/br_netlink.c index 8f661195d09d..53086fb75089 100644 --- a/trunk/net/bridge/br_netlink.c +++ b/trunk/net/bridge/br_netlink.c @@ -12,7 +12,6 @@ #include #include -#include #include "br_private.h" /* @@ -77,24 +76,26 @@ static int br_fill_ifinfo(struct sk_buff *skb, const struct net_bridge_port *por void br_ifinfo_notify(int event, struct net_bridge_port *port) { struct sk_buff *skb; - int payload = sizeof(struct ifinfomsg) + 128; - int err = -ENOBUFS; + int err = -ENOMEM; pr_debug("bridge notify event=%d\n", event); - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = br_fill_ifinfo(skb, port, 0, 0, event, 0); - if (err < 0) { - kfree_skb(skb); - goto errout; - } + skb = alloc_skb(NLMSG_SPACE(sizeof(struct ifinfomsg) + 128), + GFP_ATOMIC); + if (!skb) + goto err_out; - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); -errout: + err = br_fill_ifinfo(skb, port, current->pid, 0, event, 0); if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + goto err_kfree; + + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); + return; + +err_kfree: + kfree_skb(skb); +err_out: + netlink_set_err(rtnl, 0, RTNLGRP_LINK, err); } /* diff --git a/trunk/net/bridge/netfilter/ebtables.c b/trunk/net/bridge/netfilter/ebtables.c index 3df55b2bd91d..3a13ed643459 100644 --- a/trunk/net/bridge/netfilter/ebtables.c +++ b/trunk/net/bridge/netfilter/ebtables.c @@ -24,7 +24,6 @@ #include #include #include -#include #include #include #include @@ -32,9 +31,36 @@ /* needed for logical [in,out]-dev filtering */ #include "../br_private.h" +/* list_named_find */ +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) +#include +#include + +#if 0 +/* use this for remote debugging + * Copyright (C) 1998 by Ori Pomerantz + * Print the string to the appropriate tty, the one + * the current task uses + */ +static void print_string(char *str) +{ + struct tty_struct *my_tty; + + /* The tty for the current task */ + my_tty = current->signal->tty; + if (my_tty != NULL) { + my_tty->driver->write(my_tty, 0, str, strlen(str)); + my_tty->driver->write(my_tty, 0, "\015\012", 2); + } +} + +#define BUGPRINT(args) print_string(args); +#else #define BUGPRINT(format, args...) printk("kernel msg: ebtables bug: please "\ "report to author: "format, ## args) /* #define BUGPRINT(format, args...) */ +#endif #define MEMPRINT(format, args...) printk("kernel msg: ebtables "\ ": out of memory: "format, ## args) /* #define MEMPRINT(format, args...) */ @@ -273,22 +299,18 @@ static inline void * find_inlist_lock_noload(struct list_head *head, const char *name, int *error, struct mutex *mutex) { - struct { - struct list_head list; - char name[EBT_FUNCTION_MAXNAMELEN]; - } *e; + void *ret; *error = mutex_lock_interruptible(mutex); if (*error != 0) return NULL; - list_for_each_entry(e, head, list) { - if (strcmp(e->name, name) == 0) - return e; + ret = list_named_find(head, name); + if (!ret) { + *error = -ENOENT; + mutex_unlock(mutex); } - *error = -ENOENT; - mutex_unlock(mutex); - return NULL; + return ret; } #ifndef CONFIG_KMOD @@ -1042,19 +1064,15 @@ static int do_replace(void __user *user, unsigned int len) int ebt_register_target(struct ebt_target *target) { - struct ebt_target *t; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - list_for_each_entry(t, &ebt_targets, list) { - if (strcmp(t->name, target->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } + if (!list_named_insert(&ebt_targets, target)) { + mutex_unlock(&ebt_mutex); + return -EEXIST; } - list_add(&target->list, &ebt_targets); mutex_unlock(&ebt_mutex); return 0; @@ -1063,25 +1081,21 @@ int ebt_register_target(struct ebt_target *target) void ebt_unregister_target(struct ebt_target *target) { mutex_lock(&ebt_mutex); - list_del(&target->list); + LIST_DELETE(&ebt_targets, target); mutex_unlock(&ebt_mutex); } int ebt_register_match(struct ebt_match *match) { - struct ebt_match *m; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - list_for_each_entry(m, &ebt_matches, list) { - if (strcmp(m->name, match->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } + if (!list_named_insert(&ebt_matches, match)) { + mutex_unlock(&ebt_mutex); + return -EEXIST; } - list_add(&match->list, &ebt_matches); mutex_unlock(&ebt_mutex); return 0; @@ -1090,25 +1104,21 @@ int ebt_register_match(struct ebt_match *match) void ebt_unregister_match(struct ebt_match *match) { mutex_lock(&ebt_mutex); - list_del(&match->list); + LIST_DELETE(&ebt_matches, match); mutex_unlock(&ebt_mutex); } int ebt_register_watcher(struct ebt_watcher *watcher) { - struct ebt_watcher *w; int ret; ret = mutex_lock_interruptible(&ebt_mutex); if (ret != 0) return ret; - list_for_each_entry(w, &ebt_watchers, list) { - if (strcmp(w->name, watcher->name) == 0) { - mutex_unlock(&ebt_mutex); - return -EEXIST; - } + if (!list_named_insert(&ebt_watchers, watcher)) { + mutex_unlock(&ebt_mutex); + return -EEXIST; } - list_add(&watcher->list, &ebt_watchers); mutex_unlock(&ebt_mutex); return 0; @@ -1117,14 +1127,13 @@ int ebt_register_watcher(struct ebt_watcher *watcher) void ebt_unregister_watcher(struct ebt_watcher *watcher) { mutex_lock(&ebt_mutex); - list_del(&watcher->list); + LIST_DELETE(&ebt_watchers, watcher); mutex_unlock(&ebt_mutex); } int ebt_register_table(struct ebt_table *table) { struct ebt_table_info *newinfo; - struct ebt_table *t; int ret, i, countersize; if (!table || !table->table ||!table->table->entries || @@ -1170,12 +1179,10 @@ int ebt_register_table(struct ebt_table *table) if (ret != 0) goto free_chainstack; - list_for_each_entry(t, &ebt_tables, list) { - if (strcmp(t->name, table->name) == 0) { - ret = -EEXIST; - BUGPRINT("Table name already exists\n"); - goto free_unlock; - } + if (list_named_find(&ebt_tables, table->name)) { + ret = -EEXIST; + BUGPRINT("Table name already exists\n"); + goto free_unlock; } /* Hold a reference count if the chains aren't empty */ @@ -1183,7 +1190,7 @@ int ebt_register_table(struct ebt_table *table) ret = -ENOENT; goto free_unlock; } - list_add(&table->list, &ebt_tables); + list_prepend(&ebt_tables, table); mutex_unlock(&ebt_mutex); return 0; free_unlock: @@ -1209,7 +1216,7 @@ void ebt_unregister_table(struct ebt_table *table) return; } mutex_lock(&ebt_mutex); - list_del(&table->list); + LIST_DELETE(&ebt_tables, table); mutex_unlock(&ebt_mutex); vfree(table->private->entries); if (table->private->chainstack) { @@ -1479,7 +1486,7 @@ static int __init ebtables_init(void) int ret; mutex_lock(&ebt_mutex); - list_add(&ebt_standard_target.list, &ebt_targets); + list_named_insert(&ebt_targets, &ebt_standard_target); mutex_unlock(&ebt_mutex); if ((ret = nf_register_sockopt(&ebt_sockopts)) < 0) return ret; diff --git a/trunk/net/core/Makefile b/trunk/net/core/Makefile index 119568077dab..2645ba428d48 100644 --- a/trunk/net/core/Makefile +++ b/trunk/net/core/Makefile @@ -17,4 +17,3 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o -obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/trunk/net/core/datagram.c b/trunk/net/core/datagram.c index f558c61aecc7..aecddcc30401 100644 --- a/trunk/net/core/datagram.c +++ b/trunk/net/core/datagram.c @@ -417,7 +417,7 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb) sum = (u16)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)); if (likely(!sum)) { - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_HW)) netdev_rx_csum_fault(skb->dev); skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -462,7 +462,7 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, goto fault; if ((unsigned short)csum_fold(csum)) goto csum_error; - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_HW)) netdev_rx_csum_fault(skb->dev); iov->iov_len -= chunk; iov->iov_base += chunk; diff --git a/trunk/net/core/dev.c b/trunk/net/core/dev.c index 14de297d024d..d4a1ec3bded5 100644 --- a/trunk/net/core/dev.c +++ b/trunk/net/core/dev.c @@ -640,8 +640,6 @@ int dev_valid_name(const char *name) { if (*name == '\0') return 0; - if (strlen(name) >= IFNAMSIZ) - return 0; if (!strcmp(name, ".") || !strcmp(name, "..")) return 0; @@ -1168,12 +1166,12 @@ EXPORT_SYMBOL(netif_device_attach); * Invalidate hardware checksum when packet is to be mangled, and * complete checksum manually on outgoing path. */ -int skb_checksum_help(struct sk_buff *skb) +int skb_checksum_help(struct sk_buff *skb, int inward) { unsigned int csum; int ret = 0, offset = skb->h.raw - skb->data; - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (inward) goto out_set_summed; if (unlikely(skb_shinfo(skb)->gso_size)) { @@ -1225,7 +1223,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) skb->mac_len = skb->nh.raw - skb->data; __skb_pull(skb, skb->mac_len); - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { if (skb_header_cloned(skb) && (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) return ERR_PTR(err); @@ -1234,7 +1232,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features) rcu_read_lock(); list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) { if (ptype->type == type && !ptype->dev && ptype->gso_segment) { - if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) { + if (unlikely(skb->ip_summed != CHECKSUM_HW)) { err = ptype->gso_send_check(skb); segs = ERR_PTR(err); if (err || skb_gso_ok(skb, features)) @@ -1446,11 +1444,11 @@ int dev_queue_xmit(struct sk_buff *skb) /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ - if (skb->ip_summed == CHECKSUM_PARTIAL && + if (skb->ip_summed == CHECKSUM_HW && (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) - if (skb_checksum_help(skb)) + if (skb_checksum_help(skb, 0)) goto out_kfree_skb; gso: @@ -3193,15 +3191,13 @@ struct net_device *alloc_netdev(int sizeof_priv, const char *name, struct net_device *dev; int alloc_size; - BUG_ON(strlen(name) >= sizeof(dev->name)); - /* ensure 32-byte alignment of both the device and private area */ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { - printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n"); + printk(KERN_ERR "alloc_dev: Unable to allocate device.\n"); return NULL; } diff --git a/trunk/net/core/dev_mcast.c b/trunk/net/core/dev_mcast.c index b22648d04d36..c57d887da2ef 100644 --- a/trunk/net/core/dev_mcast.c +++ b/trunk/net/core/dev_mcast.c @@ -21,7 +21,8 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include +#include #include #include #include diff --git a/trunk/net/core/fib_rules.c b/trunk/net/core/fib_rules.c deleted file mode 100644 index a99d87d82b7f..000000000000 --- a/trunk/net/core/fib_rules.c +++ /dev/null @@ -1,421 +0,0 @@ -/* - * net/core/fib_rules.c Generic Routing Rules - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2. - * - * Authors: Thomas Graf - */ - -#include -#include -#include -#include -#include - -static LIST_HEAD(rules_ops); -static DEFINE_SPINLOCK(rules_mod_lock); - -static void notify_rule_change(int event, struct fib_rule *rule, - struct fib_rules_ops *ops, struct nlmsghdr *nlh, - u32 pid); - -static struct fib_rules_ops *lookup_rules_ops(int family) -{ - struct fib_rules_ops *ops; - - rcu_read_lock(); - list_for_each_entry_rcu(ops, &rules_ops, list) { - if (ops->family == family) { - if (!try_module_get(ops->owner)) - ops = NULL; - rcu_read_unlock(); - return ops; - } - } - rcu_read_unlock(); - - return NULL; -} - -static void rules_ops_put(struct fib_rules_ops *ops) -{ - if (ops) - module_put(ops->owner); -} - -int fib_rules_register(struct fib_rules_ops *ops) -{ - int err = -EEXIST; - struct fib_rules_ops *o; - - if (ops->rule_size < sizeof(struct fib_rule)) - return -EINVAL; - - if (ops->match == NULL || ops->configure == NULL || - ops->compare == NULL || ops->fill == NULL || - ops->action == NULL) - return -EINVAL; - - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) - if (ops->family == o->family) - goto errout; - - list_add_tail_rcu(&ops->list, &rules_ops); - err = 0; -errout: - spin_unlock(&rules_mod_lock); - - return err; -} - -EXPORT_SYMBOL_GPL(fib_rules_register); - -static void cleanup_ops(struct fib_rules_ops *ops) -{ - struct fib_rule *rule, *tmp; - - list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { - list_del_rcu(&rule->list); - fib_rule_put(rule); - } -} - -int fib_rules_unregister(struct fib_rules_ops *ops) -{ - int err = 0; - struct fib_rules_ops *o; - - spin_lock(&rules_mod_lock); - list_for_each_entry(o, &rules_ops, list) { - if (o == ops) { - list_del_rcu(&o->list); - cleanup_ops(ops); - goto out; - } - } - - err = -ENOENT; -out: - spin_unlock(&rules_mod_lock); - - synchronize_rcu(); - - return err; -} - -EXPORT_SYMBOL_GPL(fib_rules_unregister); - -int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, - int flags, struct fib_lookup_arg *arg) -{ - struct fib_rule *rule; - int err; - - rcu_read_lock(); - - list_for_each_entry_rcu(rule, ops->rules_list, list) { - if (rule->ifindex && (rule->ifindex != fl->iif)) - continue; - - if (!ops->match(rule, fl, flags)) - continue; - - err = ops->action(rule, fl, flags, arg); - if (err != -EAGAIN) { - fib_rule_get(rule); - arg->rule = rule; - goto out; - } - } - - err = -ENETUNREACH; -out: - rcu_read_unlock(); - - return err; -} - -EXPORT_SYMBOL_GPL(fib_rules_lookup); - -int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) -{ - struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rules_ops *ops = NULL; - struct fib_rule *rule, *r, *last = NULL; - struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; - - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) - goto errout; - - ops = lookup_rules_ops(frh->family); - if (ops == NULL) { - err = EAFNOSUPPORT; - goto errout; - } - - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); - if (err < 0) - goto errout; - - rule = kzalloc(ops->rule_size, GFP_KERNEL); - if (rule == NULL) { - err = -ENOMEM; - goto errout; - } - - if (tb[FRA_PRIORITY]) - rule->pref = nla_get_u32(tb[FRA_PRIORITY]); - - if (tb[FRA_IFNAME]) { - struct net_device *dev; - - rule->ifindex = -1; - nla_strlcpy(rule->ifname, tb[FRA_IFNAME], IFNAMSIZ); - dev = __dev_get_by_name(rule->ifname); - if (dev) - rule->ifindex = dev->ifindex; - } - - rule->action = frh->action; - rule->flags = frh->flags; - rule->table = frh_get_table(frh, tb); - - if (!rule->pref && ops->default_pref) - rule->pref = ops->default_pref(); - - err = ops->configure(rule, skb, nlh, frh, tb); - if (err < 0) - goto errout_free; - - list_for_each_entry(r, ops->rules_list, list) { - if (r->pref > rule->pref) - break; - last = r; - } - - fib_rule_get(rule); - - if (last) - list_add_rcu(&rule->list, &last->list); - else - list_add_rcu(&rule->list, ops->rules_list); - - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); - rules_ops_put(ops); - return 0; - -errout_free: - kfree(rule); -errout: - rules_ops_put(ops); - return err; -} - -int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) -{ - struct fib_rule_hdr *frh = nlmsg_data(nlh); - struct fib_rules_ops *ops = NULL; - struct fib_rule *rule; - struct nlattr *tb[FRA_MAX+1]; - int err = -EINVAL; - - if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) - goto errout; - - ops = lookup_rules_ops(frh->family); - if (ops == NULL) { - err = EAFNOSUPPORT; - goto errout; - } - - err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); - if (err < 0) - goto errout; - - list_for_each_entry(rule, ops->rules_list, list) { - if (frh->action && (frh->action != rule->action)) - continue; - - if (frh->table && (frh_get_table(frh, tb) != rule->table)) - continue; - - if (tb[FRA_PRIORITY] && - (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) - continue; - - if (tb[FRA_IFNAME] && - nla_strcmp(tb[FRA_IFNAME], rule->ifname)) - continue; - - if (!ops->compare(rule, frh, tb)) - continue; - - if (rule->flags & FIB_RULE_PERMANENT) { - err = -EPERM; - goto errout; - } - - list_del_rcu(&rule->list); - synchronize_rcu(); - notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); - fib_rule_put(rule); - rules_ops_put(ops); - return 0; - } - - err = -ENOENT; -errout: - rules_ops_put(ops); - return err; -} - -static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, - u32 pid, u32 seq, int type, int flags, - struct fib_rules_ops *ops) -{ - struct nlmsghdr *nlh; - struct fib_rule_hdr *frh; - - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); - if (nlh == NULL) - return -1; - - frh = nlmsg_data(nlh); - frh->table = rule->table; - NLA_PUT_U32(skb, FRA_TABLE, rule->table); - frh->res1 = 0; - frh->res2 = 0; - frh->action = rule->action; - frh->flags = rule->flags; - - if (rule->ifname[0]) - NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); - - if (rule->pref) - NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); - - if (ops->fill(rule, skb, nlh, frh) < 0) - goto nla_put_failure; - - return nlmsg_end(skb, nlh); - -nla_put_failure: - return nlmsg_cancel(skb, nlh); -} - -int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) -{ - int idx = 0; - struct fib_rule *rule; - struct fib_rules_ops *ops; - - ops = lookup_rules_ops(family); - if (ops == NULL) - return -EAFNOSUPPORT; - - rcu_read_lock(); - list_for_each_entry(rule, ops->rules_list, list) { - if (idx < cb->args[0]) - goto skip; - - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWRULE, - NLM_F_MULTI, ops) < 0) - break; -skip: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - rules_ops_put(ops); - - return skb->len; -} - -EXPORT_SYMBOL_GPL(fib_rules_dump); - -static void notify_rule_change(int event, struct fib_rule *rule, - struct fib_rules_ops *ops, struct nlmsghdr *nlh, - u32 pid) -{ - struct sk_buff *skb; - int err = -ENOBUFS; - - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = fib_nl_fill_rule(skb, rule, pid, nlh->nlmsg_seq, event, 0, ops); - if (err < 0) { - kfree_skb(skb); - goto errout; - } - - err = rtnl_notify(skb, pid, ops->nlgroup, nlh, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(ops->nlgroup, err); -} - -static void attach_rules(struct list_head *rules, struct net_device *dev) -{ - struct fib_rule *rule; - - list_for_each_entry(rule, rules, list) { - if (rule->ifindex == -1 && - strcmp(dev->name, rule->ifname) == 0) - rule->ifindex = dev->ifindex; - } -} - -static void detach_rules(struct list_head *rules, struct net_device *dev) -{ - struct fib_rule *rule; - - list_for_each_entry(rule, rules, list) - if (rule->ifindex == dev->ifindex) - rule->ifindex = -1; -} - - -static int fib_rules_event(struct notifier_block *this, unsigned long event, - void *ptr) -{ - struct net_device *dev = ptr; - struct fib_rules_ops *ops; - - ASSERT_RTNL(); - rcu_read_lock(); - - switch (event) { - case NETDEV_REGISTER: - list_for_each_entry(ops, &rules_ops, list) - attach_rules(ops->rules_list, dev); - break; - - case NETDEV_UNREGISTER: - list_for_each_entry(ops, &rules_ops, list) - detach_rules(ops->rules_list, dev); - break; - } - - rcu_read_unlock(); - - return NOTIFY_DONE; -} - -static struct notifier_block fib_rules_notifier = { - .notifier_call = fib_rules_event, -}; - -static int __init fib_rules_init(void) -{ - return register_netdevice_notifier(&fib_rules_notifier); -} - -subsys_initcall(fib_rules_init); diff --git a/trunk/net/core/filter.c b/trunk/net/core/filter.c index 6732782a5a40..5b4486a60cf6 100644 --- a/trunk/net/core/filter.c +++ b/trunk/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - rcu_read_lock_bh(); - old_fp = rcu_dereference(sk->sk_filter); - rcu_assign_pointer(sk->sk_filter, fp); - rcu_read_unlock_bh(); + spin_lock_bh(&sk->sk_lock.slock); + old_fp = sk->sk_filter; + sk->sk_filter = fp; + spin_unlock_bh(&sk->sk_lock.slock); fp = old_fp; } diff --git a/trunk/net/core/flow.c b/trunk/net/core/flow.c index f23e7e386543..2191af5f26ac 100644 --- a/trunk/net/core/flow.c +++ b/trunk/net/core/flow.c @@ -32,6 +32,7 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; + u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -164,7 +165,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -188,6 +189,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && + fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -212,6 +214,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; + fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -223,7 +226,7 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, void *obj; atomic_t *obj_ref; - resolver(key, family, dir, &obj, &obj_ref); + resolver(key, sk_sid, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); @@ -343,8 +346,12 @@ static int __init flow_cache_init(void) flow_cachep = kmem_cache_create("flow_cache", sizeof(struct flow_cache_entry), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + + if (!flow_cachep) + panic("NET: failed to allocate flow cache slab\n"); + flow_hash_shift = 10; flow_lwm = 2 * flow_hash_size; flow_hwm = 4 * flow_hash_size; diff --git a/trunk/net/core/neighbour.c b/trunk/net/core/neighbour.c index b6c69e1463e8..fe2113f54e2b 100644 --- a/trunk/net/core/neighbour.c +++ b/trunk/net/core/neighbour.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -889,7 +888,7 @@ int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) return rc; } -static void neigh_update_hhs(struct neighbour *neigh) +static __inline__ void neigh_update_hhs(struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, struct net_device*, unsigned char *) = @@ -1339,10 +1338,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) neigh_rand_reach_time(tbl->parms.base_reachable_time); if (!tbl->kmem_cachep) - tbl->kmem_cachep = - kmem_cache_create(tbl->id, tbl->entry_size, 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, - NULL, NULL); + tbl->kmem_cachep = kmem_cache_create(tbl->id, + tbl->entry_size, + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + if (!tbl->kmem_cachep) + panic("cannot create neighbour cache"); + tbl->stats = alloc_percpu(struct neigh_statistics); if (!tbl->stats) panic("cannot create neighbour cache statistics"); @@ -1437,62 +1440,48 @@ int neigh_table_clear(struct neigh_table *tbl) int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm; - struct nlattr *dst_attr; + struct ndmsg *ndm = NLMSG_DATA(nlh); + struct rtattr **nda = arg; struct neigh_table *tbl; struct net_device *dev = NULL; - int err = -EINVAL; + int err = -ENODEV; - if (nlmsg_len(nlh) < sizeof(*ndm)) + if (ndm->ndm_ifindex && + (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) goto out; - dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST); - if (dst_attr == NULL) - goto out; - - ndm = nlmsg_data(nlh); - if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); - if (dev == NULL) { - err = -ENODEV; - goto out; - } - } - read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - struct neighbour *neigh; + struct rtattr *dst_attr = nda[NDA_DST - 1]; + struct neighbour *n; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - if (nla_len(dst_attr) < tbl->key_len) + err = -EINVAL; + if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) goto out_dev_put; if (ndm->ndm_flags & NTF_PROXY) { - err = pneigh_delete(tbl, nla_data(dst_attr), dev); + err = pneigh_delete(tbl, RTA_DATA(dst_attr), dev); goto out_dev_put; } - if (dev == NULL) - goto out_dev_put; + if (!dev) + goto out; - neigh = neigh_lookup(tbl, nla_data(dst_attr), dev); - if (neigh == NULL) { - err = -ENOENT; - goto out_dev_put; + n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); + if (n) { + err = neigh_update(n, NULL, NUD_FAILED, + NEIGH_UPDATE_F_OVERRIDE| + NEIGH_UPDATE_F_ADMIN); + neigh_release(n); } - - err = neigh_update(neigh, NULL, NUD_FAILED, - NEIGH_UPDATE_F_OVERRIDE | - NEIGH_UPDATE_F_ADMIN); - neigh_release(neigh); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; - + err = -EADDRNOTAVAIL; out_dev_put: if (dev) dev_put(dev); @@ -1502,93 +1491,76 @@ int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ndmsg *ndm; - struct nlattr *tb[NDA_MAX+1]; + struct ndmsg *ndm = NLMSG_DATA(nlh); + struct rtattr **nda = arg; struct neigh_table *tbl; struct net_device *dev = NULL; - int err; + int err = -ENODEV; - err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL); - if (err < 0) + if (ndm->ndm_ifindex && + (dev = dev_get_by_index(ndm->ndm_ifindex)) == NULL) goto out; - err = -EINVAL; - if (tb[NDA_DST] == NULL) - goto out; - - ndm = nlmsg_data(nlh); - if (ndm->ndm_ifindex) { - dev = dev_get_by_index(ndm->ndm_ifindex); - if (dev == NULL) { - err = -ENODEV; - goto out; - } - - if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len) - goto out_dev_put; - } - read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { - int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE; - struct neighbour *neigh; - void *dst, *lladdr; + struct rtattr *lladdr_attr = nda[NDA_LLADDR - 1]; + struct rtattr *dst_attr = nda[NDA_DST - 1]; + int override = 1; + struct neighbour *n; if (tbl->family != ndm->ndm_family) continue; read_unlock(&neigh_tbl_lock); - if (nla_len(tb[NDA_DST]) < tbl->key_len) + err = -EINVAL; + if (!dst_attr || RTA_PAYLOAD(dst_attr) < tbl->key_len) goto out_dev_put; - dst = nla_data(tb[NDA_DST]); - lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm->ndm_flags & NTF_PROXY) { - struct pneigh_entry *pn; - err = -ENOBUFS; - pn = pneigh_lookup(tbl, dst, dev, 1); - if (pn) { - pn->flags = ndm->ndm_flags; + if (pneigh_lookup(tbl, RTA_DATA(dst_attr), dev, 1)) err = 0; - } goto out_dev_put; } - if (dev == NULL) + err = -EINVAL; + if (!dev) + goto out; + if (lladdr_attr && RTA_PAYLOAD(lladdr_attr) < dev->addr_len) goto out_dev_put; - - neigh = neigh_lookup(tbl, dst, dev); - if (neigh == NULL) { - if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { - err = -ENOENT; - goto out_dev_put; - } - neigh = __neigh_lookup_errno(tbl, dst, dev); - if (IS_ERR(neigh)) { - err = PTR_ERR(neigh); + n = neigh_lookup(tbl, RTA_DATA(dst_attr), dev); + if (n) { + if (nlh->nlmsg_flags & NLM_F_EXCL) { + err = -EEXIST; + neigh_release(n); goto out_dev_put; } + + override = nlh->nlmsg_flags & NLM_F_REPLACE; + } else if (!(nlh->nlmsg_flags & NLM_F_CREATE)) { + err = -ENOENT; + goto out_dev_put; } else { - if (nlh->nlmsg_flags & NLM_F_EXCL) { - err = -EEXIST; - neigh_release(neigh); + n = __neigh_lookup_errno(tbl, RTA_DATA(dst_attr), dev); + if (IS_ERR(n)) { + err = PTR_ERR(n); goto out_dev_put; } - - if (!(nlh->nlmsg_flags & NLM_F_REPLACE)) - flags &= ~NEIGH_UPDATE_F_OVERRIDE; } - err = neigh_update(neigh, lladdr, ndm->ndm_state, flags); - neigh_release(neigh); + err = neigh_update(n, + lladdr_attr ? RTA_DATA(lladdr_attr) : NULL, + ndm->ndm_state, + (override ? NEIGH_UPDATE_F_OVERRIDE : 0) | + NEIGH_UPDATE_F_ADMIN); + + neigh_release(n); goto out_dev_put; } read_unlock(&neigh_tbl_lock); - err = -EAFNOSUPPORT; - + err = -EADDRNOTAVAIL; out_dev_put: if (dev) dev_put(dev); @@ -1598,59 +1570,56 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms) { - struct nlattr *nest; - - nest = nla_nest_start(skb, NDTA_PARMS); - if (nest == NULL) - return -ENOBUFS; + struct rtattr *nest = NULL; + + nest = RTA_NEST(skb, NDTA_PARMS); if (parms->dev) - NLA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); - - NLA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); - NLA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); - NLA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); - NLA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); - NLA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); - NLA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); - NLA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); - NLA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, + RTA_PUT_U32(skb, NDTPA_IFINDEX, parms->dev->ifindex); + + RTA_PUT_U32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)); + RTA_PUT_U32(skb, NDTPA_QUEUE_LEN, parms->queue_len); + RTA_PUT_U32(skb, NDTPA_PROXY_QLEN, parms->proxy_qlen); + RTA_PUT_U32(skb, NDTPA_APP_PROBES, parms->app_probes); + RTA_PUT_U32(skb, NDTPA_UCAST_PROBES, parms->ucast_probes); + RTA_PUT_U32(skb, NDTPA_MCAST_PROBES, parms->mcast_probes); + RTA_PUT_MSECS(skb, NDTPA_REACHABLE_TIME, parms->reachable_time); + RTA_PUT_MSECS(skb, NDTPA_BASE_REACHABLE_TIME, parms->base_reachable_time); - NLA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); - NLA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); - NLA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); - NLA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); - NLA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); - NLA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); + RTA_PUT_MSECS(skb, NDTPA_GC_STALETIME, parms->gc_staletime); + RTA_PUT_MSECS(skb, NDTPA_DELAY_PROBE_TIME, parms->delay_probe_time); + RTA_PUT_MSECS(skb, NDTPA_RETRANS_TIME, parms->retrans_time); + RTA_PUT_MSECS(skb, NDTPA_ANYCAST_DELAY, parms->anycast_delay); + RTA_PUT_MSECS(skb, NDTPA_PROXY_DELAY, parms->proxy_delay); + RTA_PUT_MSECS(skb, NDTPA_LOCKTIME, parms->locktime); - return nla_nest_end(skb, nest); + return RTA_NEST_END(skb, nest); -nla_put_failure: - return nla_nest_cancel(skb, nest); +rtattr_failure: + return RTA_NEST_CANCEL(skb, nest); } -static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, - u32 pid, u32 seq, int type, int flags) +static int neightbl_fill_info(struct neigh_table *tbl, struct sk_buff *skb, + struct netlink_callback *cb) { struct nlmsghdr *nlh; struct ndtmsg *ndtmsg; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); - if (nlh == NULL) - return -ENOBUFS; + nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), + NLM_F_MULTI); - ndtmsg = nlmsg_data(nlh); + ndtmsg = NLMSG_DATA(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; - NLA_PUT_STRING(skb, NDTA_NAME, tbl->id); - NLA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); - NLA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); - NLA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); - NLA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); + RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); + RTA_PUT_MSECS(skb, NDTA_GC_INTERVAL, tbl->gc_interval); + RTA_PUT_U32(skb, NDTA_THRESH1, tbl->gc_thresh1); + RTA_PUT_U32(skb, NDTA_THRESH2, tbl->gc_thresh2); + RTA_PUT_U32(skb, NDTA_THRESH3, tbl->gc_thresh3); { unsigned long now = jiffies; @@ -1669,7 +1638,7 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; - NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); + RTA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); } { @@ -1694,50 +1663,55 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, ndst.ndts_forced_gc_runs += st->forced_gc_runs; } - NLA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); + RTA_PUT(skb, NDTA_STATS, sizeof(ndst), &ndst); } BUG_ON(tbl->parms.dev); if (neightbl_fill_parms(skb, &tbl->parms) < 0) - goto nla_put_failure; + goto rtattr_failure; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); + return NLMSG_END(skb, nlh); -nla_put_failure: +rtattr_failure: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + return NLMSG_CANCEL(skb, nlh); + +nlmsg_failure: + return -1; } -static int neightbl_fill_param_info(struct sk_buff *skb, - struct neigh_table *tbl, +static int neightbl_fill_param_info(struct neigh_table *tbl, struct neigh_parms *parms, - u32 pid, u32 seq, int type, - unsigned int flags) + struct sk_buff *skb, + struct netlink_callback *cb) { struct ndtmsg *ndtmsg; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags); - if (nlh == NULL) - return -ENOBUFS; + nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWNEIGHTBL, sizeof(struct ndtmsg), + NLM_F_MULTI); - ndtmsg = nlmsg_data(nlh); + ndtmsg = NLMSG_DATA(nlh); read_lock_bh(&tbl->lock); ndtmsg->ndtm_family = tbl->family; ndtmsg->ndtm_pad1 = 0; ndtmsg->ndtm_pad2 = 0; + RTA_PUT_STRING(skb, NDTA_NAME, tbl->id); - if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 || - neightbl_fill_parms(skb, parms) < 0) - goto errout; + if (neightbl_fill_parms(skb, parms) < 0) + goto rtattr_failure; read_unlock_bh(&tbl->lock); - return nlmsg_end(skb, nlh); -errout: + return NLMSG_END(skb, nlh); + +rtattr_failure: read_unlock_bh(&tbl->lock); - return nlmsg_cancel(skb, nlh); + return NLMSG_CANCEL(skb, nlh); + +nlmsg_failure: + return -1; } static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, @@ -1753,61 +1727,28 @@ static inline struct neigh_parms *lookup_neigh_params(struct neigh_table *tbl, return NULL; } -static struct nla_policy nl_neightbl_policy[NDTA_MAX+1] __read_mostly = { - [NDTA_NAME] = { .type = NLA_STRING }, - [NDTA_THRESH1] = { .type = NLA_U32 }, - [NDTA_THRESH2] = { .type = NLA_U32 }, - [NDTA_THRESH3] = { .type = NLA_U32 }, - [NDTA_GC_INTERVAL] = { .type = NLA_U64 }, - [NDTA_PARMS] = { .type = NLA_NESTED }, -}; - -static struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] __read_mostly = { - [NDTPA_IFINDEX] = { .type = NLA_U32 }, - [NDTPA_QUEUE_LEN] = { .type = NLA_U32 }, - [NDTPA_PROXY_QLEN] = { .type = NLA_U32 }, - [NDTPA_APP_PROBES] = { .type = NLA_U32 }, - [NDTPA_UCAST_PROBES] = { .type = NLA_U32 }, - [NDTPA_MCAST_PROBES] = { .type = NLA_U32 }, - [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 }, - [NDTPA_GC_STALETIME] = { .type = NLA_U64 }, - [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 }, - [NDTPA_RETRANS_TIME] = { .type = NLA_U64 }, - [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 }, - [NDTPA_PROXY_DELAY] = { .type = NLA_U64 }, - [NDTPA_LOCKTIME] = { .type = NLA_U64 }, -}; - int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { struct neigh_table *tbl; - struct ndtmsg *ndtmsg; - struct nlattr *tb[NDTA_MAX+1]; - int err; - - err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX, - nl_neightbl_policy); - if (err < 0) - goto errout; + struct ndtmsg *ndtmsg = NLMSG_DATA(nlh); + struct rtattr **tb = arg; + int err = -EINVAL; - if (tb[NDTA_NAME] == NULL) { - err = -EINVAL; - goto errout; - } + if (!tb[NDTA_NAME - 1] || !RTA_PAYLOAD(tb[NDTA_NAME - 1])) + return -EINVAL; - ndtmsg = nlmsg_data(nlh); read_lock(&neigh_tbl_lock); for (tbl = neigh_tables; tbl; tbl = tbl->next) { if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family) continue; - if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0) + if (!rtattr_strcmp(tb[NDTA_NAME - 1], tbl->id)) break; } if (tbl == NULL) { err = -ENOENT; - goto errout_locked; + goto errout; } /* @@ -1816,178 +1757,165 @@ int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) */ write_lock_bh(&tbl->lock); - if (tb[NDTA_PARMS]) { - struct nlattr *tbp[NDTPA_MAX+1]; + if (tb[NDTA_THRESH1 - 1]) + tbl->gc_thresh1 = RTA_GET_U32(tb[NDTA_THRESH1 - 1]); + + if (tb[NDTA_THRESH2 - 1]) + tbl->gc_thresh2 = RTA_GET_U32(tb[NDTA_THRESH2 - 1]); + + if (tb[NDTA_THRESH3 - 1]) + tbl->gc_thresh3 = RTA_GET_U32(tb[NDTA_THRESH3 - 1]); + + if (tb[NDTA_GC_INTERVAL - 1]) + tbl->gc_interval = RTA_GET_MSECS(tb[NDTA_GC_INTERVAL - 1]); + + if (tb[NDTA_PARMS - 1]) { + struct rtattr *tbp[NDTPA_MAX]; struct neigh_parms *p; - int i, ifindex = 0; + u32 ifindex = 0; - err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS], - nl_ntbl_parm_policy); - if (err < 0) - goto errout_tbl_lock; + if (rtattr_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS - 1]) < 0) + goto rtattr_failure; - if (tbp[NDTPA_IFINDEX]) - ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]); + if (tbp[NDTPA_IFINDEX - 1]) + ifindex = RTA_GET_U32(tbp[NDTPA_IFINDEX - 1]); p = lookup_neigh_params(tbl, ifindex); if (p == NULL) { err = -ENOENT; - goto errout_tbl_lock; + goto rtattr_failure; } + + if (tbp[NDTPA_QUEUE_LEN - 1]) + p->queue_len = RTA_GET_U32(tbp[NDTPA_QUEUE_LEN - 1]); - for (i = 1; i <= NDTPA_MAX; i++) { - if (tbp[i] == NULL) - continue; + if (tbp[NDTPA_PROXY_QLEN - 1]) + p->proxy_qlen = RTA_GET_U32(tbp[NDTPA_PROXY_QLEN - 1]); - switch (i) { - case NDTPA_QUEUE_LEN: - p->queue_len = nla_get_u32(tbp[i]); - break; - case NDTPA_PROXY_QLEN: - p->proxy_qlen = nla_get_u32(tbp[i]); - break; - case NDTPA_APP_PROBES: - p->app_probes = nla_get_u32(tbp[i]); - break; - case NDTPA_UCAST_PROBES: - p->ucast_probes = nla_get_u32(tbp[i]); - break; - case NDTPA_MCAST_PROBES: - p->mcast_probes = nla_get_u32(tbp[i]); - break; - case NDTPA_BASE_REACHABLE_TIME: - p->base_reachable_time = nla_get_msecs(tbp[i]); - break; - case NDTPA_GC_STALETIME: - p->gc_staletime = nla_get_msecs(tbp[i]); - break; - case NDTPA_DELAY_PROBE_TIME: - p->delay_probe_time = nla_get_msecs(tbp[i]); - break; - case NDTPA_RETRANS_TIME: - p->retrans_time = nla_get_msecs(tbp[i]); - break; - case NDTPA_ANYCAST_DELAY: - p->anycast_delay = nla_get_msecs(tbp[i]); - break; - case NDTPA_PROXY_DELAY: - p->proxy_delay = nla_get_msecs(tbp[i]); - break; - case NDTPA_LOCKTIME: - p->locktime = nla_get_msecs(tbp[i]); - break; - } - } - } + if (tbp[NDTPA_APP_PROBES - 1]) + p->app_probes = RTA_GET_U32(tbp[NDTPA_APP_PROBES - 1]); + + if (tbp[NDTPA_UCAST_PROBES - 1]) + p->ucast_probes = + RTA_GET_U32(tbp[NDTPA_UCAST_PROBES - 1]); + + if (tbp[NDTPA_MCAST_PROBES - 1]) + p->mcast_probes = + RTA_GET_U32(tbp[NDTPA_MCAST_PROBES - 1]); + + if (tbp[NDTPA_BASE_REACHABLE_TIME - 1]) + p->base_reachable_time = + RTA_GET_MSECS(tbp[NDTPA_BASE_REACHABLE_TIME - 1]); - if (tb[NDTA_THRESH1]) - tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]); + if (tbp[NDTPA_GC_STALETIME - 1]) + p->gc_staletime = + RTA_GET_MSECS(tbp[NDTPA_GC_STALETIME - 1]); - if (tb[NDTA_THRESH2]) - tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]); + if (tbp[NDTPA_DELAY_PROBE_TIME - 1]) + p->delay_probe_time = + RTA_GET_MSECS(tbp[NDTPA_DELAY_PROBE_TIME - 1]); - if (tb[NDTA_THRESH3]) - tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]); + if (tbp[NDTPA_RETRANS_TIME - 1]) + p->retrans_time = + RTA_GET_MSECS(tbp[NDTPA_RETRANS_TIME - 1]); - if (tb[NDTA_GC_INTERVAL]) - tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]); + if (tbp[NDTPA_ANYCAST_DELAY - 1]) + p->anycast_delay = + RTA_GET_MSECS(tbp[NDTPA_ANYCAST_DELAY - 1]); + + if (tbp[NDTPA_PROXY_DELAY - 1]) + p->proxy_delay = + RTA_GET_MSECS(tbp[NDTPA_PROXY_DELAY - 1]); + + if (tbp[NDTPA_LOCKTIME - 1]) + p->locktime = RTA_GET_MSECS(tbp[NDTPA_LOCKTIME - 1]); + } err = 0; -errout_tbl_lock: +rtattr_failure: write_unlock_bh(&tbl->lock); -errout_locked: - read_unlock(&neigh_tbl_lock); errout: + read_unlock(&neigh_tbl_lock); return err; } int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) { - int family, tidx, nidx = 0; - int tbl_skip = cb->args[0]; - int neigh_skip = cb->args[1]; + int idx, family; + int s_idx = cb->args[0]; struct neigh_table *tbl; - family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; read_lock(&neigh_tbl_lock); - for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) { + for (tbl = neigh_tables, idx = 0; tbl; tbl = tbl->next) { struct neigh_parms *p; - if (tidx < tbl_skip || (family && tbl->family != family)) + if (idx < s_idx || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + if (neightbl_fill_info(tbl, skb, cb) <= 0) break; - for (nidx = 0, p = tbl->parms.next; p; p = p->next, nidx++) { - if (nidx < neigh_skip) + for (++idx, p = tbl->parms.next; p; p = p->next, idx++) { + if (idx < s_idx) continue; - if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWNEIGHTBL, - NLM_F_MULTI) <= 0) + if (neightbl_fill_param_info(tbl, p, skb, cb) <= 0) goto out; } - neigh_skip = 0; } out: read_unlock(&neigh_tbl_lock); - cb->args[0] = tidx; - cb->args[1] = nidx; + cb->args[0] = idx; return skb->len; } -static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, - u32 pid, u32 seq, int type, unsigned int flags) +static int neigh_fill_info(struct sk_buff *skb, struct neighbour *n, + u32 pid, u32 seq, int event, unsigned int flags) { unsigned long now = jiffies; + unsigned char *b = skb->tail; struct nda_cacheinfo ci; - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); - if (nlh == NULL) - return -ENOBUFS; + int locked = 0; + u32 probes; + struct nlmsghdr *nlh = NLMSG_NEW(skb, pid, seq, event, + sizeof(struct ndmsg), flags); + struct ndmsg *ndm = NLMSG_DATA(nlh); - ndm = nlmsg_data(nlh); - ndm->ndm_family = neigh->ops->family; + ndm->ndm_family = n->ops->family; ndm->ndm_pad1 = 0; ndm->ndm_pad2 = 0; - ndm->ndm_flags = neigh->flags; - ndm->ndm_type = neigh->type; - ndm->ndm_ifindex = neigh->dev->ifindex; - - NLA_PUT(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key); - - read_lock_bh(&neigh->lock); - ndm->ndm_state = neigh->nud_state; - if ((neigh->nud_state & NUD_VALID) && - nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { - read_unlock_bh(&neigh->lock); - goto nla_put_failure; - } - - ci.ndm_used = now - neigh->used; - ci.ndm_confirmed = now - neigh->confirmed; - ci.ndm_updated = now - neigh->updated; - ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1; - read_unlock_bh(&neigh->lock); - - NLA_PUT_U32(skb, NDA_PROBES, atomic_read(&neigh->probes)); - NLA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); - - return nlmsg_end(skb, nlh); + ndm->ndm_flags = n->flags; + ndm->ndm_type = n->type; + ndm->ndm_ifindex = n->dev->ifindex; + RTA_PUT(skb, NDA_DST, n->tbl->key_len, n->primary_key); + read_lock_bh(&n->lock); + locked = 1; + ndm->ndm_state = n->nud_state; + if (n->nud_state & NUD_VALID) + RTA_PUT(skb, NDA_LLADDR, n->dev->addr_len, n->ha); + ci.ndm_used = now - n->used; + ci.ndm_confirmed = now - n->confirmed; + ci.ndm_updated = now - n->updated; + ci.ndm_refcnt = atomic_read(&n->refcnt) - 1; + probes = atomic_read(&n->probes); + read_unlock_bh(&n->lock); + locked = 0; + RTA_PUT(skb, NDA_CACHEINFO, sizeof(ci), &ci); + RTA_PUT(skb, NDA_PROBES, sizeof(probes), &probes); + nlh->nlmsg_len = skb->tail - b; + return skb->len; -nla_put_failure: - return nlmsg_cancel(skb, nlh); +nlmsg_failure: +rtattr_failure: + if (locked) + read_unlock_bh(&n->lock); + skb_trim(skb, b - skb->data); + return -1; } @@ -2031,7 +1959,7 @@ int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) int t, family, s_t; read_lock(&neigh_tbl_lock); - family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family; + family = ((struct rtgenmsg *)NLMSG_DATA(cb->nlh))->rtgen_family; s_t = cb->args[0]; for (tbl = neigh_tables, t = 0; tbl; tbl = tbl->next, t++) { @@ -2410,35 +2338,41 @@ static struct file_operations neigh_stat_seq_fops = { #endif /* CONFIG_PROC_FS */ #ifdef CONFIG_ARPD -static void __neigh_notify(struct neighbour *n, int type, int flags) +void neigh_app_ns(struct neighbour *n) { - struct sk_buff *skb; - int err = -ENOBUFS; + struct nlmsghdr *nlh; + int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); + struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_ATOMIC); - if (skb == NULL) - goto errout; + if (!skb) + return; - err = neigh_fill_info(skb, n, 0, 0, type, flags); - if (err < 0) { + if (neigh_fill_info(skb, n, 0, 0, RTM_GETNEIGH, 0) < 0) { kfree_skb(skb); - goto errout; + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_NEIGH, err); -} - -void neigh_app_ns(struct neighbour *n) -{ - __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST); + nlh = (struct nlmsghdr *)skb->data; + nlh->nlmsg_flags = NLM_F_REQUEST; + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } static void neigh_app_notify(struct neighbour *n) { - __neigh_notify(n, RTM_NEWNEIGH, 0); + struct nlmsghdr *nlh; + int size = NLMSG_SPACE(sizeof(struct ndmsg) + 256); + struct sk_buff *skb = alloc_skb(size, GFP_ATOMIC); + + if (!skb) + return; + + if (neigh_fill_info(skb, n, 0, 0, RTM_NEWNEIGH, 0) < 0) { + kfree_skb(skb); + return; + } + nlh = (struct nlmsghdr *)skb->data; + NETLINK_CB(skb).dst_group = RTNLGRP_NEIGH; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_NEIGH, GFP_ATOMIC); } #endif /* CONFIG_ARPD */ @@ -2452,7 +2386,7 @@ static struct neigh_sysctl_table { ctl_table neigh_neigh_dir[2]; ctl_table neigh_proto_dir[2]; ctl_table neigh_root_dir[2]; -} neigh_sysctl_template __read_mostly = { +} neigh_sysctl_template = { .neigh_vars = { { .ctl_name = NET_NEIGH_MCAST_SOLICIT, @@ -2725,6 +2659,7 @@ void neigh_sysctl_unregister(struct neigh_parms *p) #endif /* CONFIG_SYSCTL */ EXPORT_SYMBOL(__neigh_event_send); +EXPORT_SYMBOL(neigh_add); EXPORT_SYMBOL(neigh_changeaddr); EXPORT_SYMBOL(neigh_compat_output); EXPORT_SYMBOL(neigh_connected_output); @@ -2744,8 +2679,11 @@ EXPORT_SYMBOL(neigh_table_clear); EXPORT_SYMBOL(neigh_table_init); EXPORT_SYMBOL(neigh_table_init_no_netlink); EXPORT_SYMBOL(neigh_update); +EXPORT_SYMBOL(neigh_update_hhs); EXPORT_SYMBOL(pneigh_enqueue); EXPORT_SYMBOL(pneigh_lookup); +EXPORT_SYMBOL(neightbl_dump_info); +EXPORT_SYMBOL(neightbl_set); #ifdef CONFIG_ARPD EXPORT_SYMBOL(neigh_app_ns); diff --git a/trunk/net/core/netpoll.c b/trunk/net/core/netpoll.c index ead5920c26d6..471da451cd48 100644 --- a/trunk/net/core/netpoll.c +++ b/trunk/net/core/netpoll.c @@ -110,7 +110,7 @@ static int checksum_udp(struct sk_buff *skb, struct udphdr *uh, psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0); - if (skb->ip_summed == CHECKSUM_COMPLETE && + if (skb->ip_summed == CHECKSUM_HW && !(u16)csum_fold(csum_add(psum, skb->csum))) return 0; diff --git a/trunk/net/core/pktgen.c b/trunk/net/core/pktgen.c index 72145d4a2600..6a7320b39ed0 100644 --- a/trunk/net/core/pktgen.c +++ b/trunk/net/core/pktgen.c @@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) * use ipv6_get_lladdr if/when it's get exported */ - rcu_read_lock(); + read_lock(&addrconf_lock); if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } read_unlock_bh(&idev->lock); } - rcu_read_unlock(); + read_unlock(&addrconf_lock); if (err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); } diff --git a/trunk/net/core/rtnetlink.c b/trunk/net/core/rtnetlink.c index d8e25e08cb7e..30cc1ba6ed5c 100644 --- a/trunk/net/core/rtnetlink.c +++ b/trunk/net/core/rtnetlink.c @@ -35,7 +35,6 @@ #include #include #include -#include #include #include @@ -50,7 +49,6 @@ #include #include #include -#include #include #ifdef CONFIG_NET_WIRELESS_RTNETLINK #include @@ -58,7 +56,6 @@ #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ static DEFINE_MUTEX(rtnl_mutex); -static struct sock *rtnl; void rtnl_lock(void) { @@ -96,6 +93,8 @@ int rtattr_parse(struct rtattr *tb[], int maxattr, struct rtattr *rta, int len) return 0; } +struct sock *rtnl; + struct rtnetlink_link * rtnetlink_links[NPROTO]; static const int rtm_min[RTM_NR_FAMILIES] = @@ -103,7 +102,8 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), + [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -111,6 +111,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWPREFIX)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETMULTICAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), [RTM_FAM(RTM_GETANYCAST)] = NLMSG_LENGTH(sizeof(struct rtgenmsg)), + [RTM_FAM(RTM_NEWNEIGHTBL)] = NLMSG_LENGTH(sizeof(struct ndtmsg)), }; static const int rta_max[RTM_NR_FAMILIES] = @@ -118,11 +119,13 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, + [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, + [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, [RTM_FAM(RTM_NEWACTION)] = TCAA_MAX, + [RTM_FAM(RTM_NEWNEIGHTBL)] = NDTA_MAX, }; void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data) @@ -165,52 +168,24 @@ int rtnetlink_send(struct sk_buff *skb, u32 pid, unsigned group, int echo) return err; } -int rtnl_unicast(struct sk_buff *skb, u32 pid) -{ - return nlmsg_unicast(rtnl, skb, pid); -} - -int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, - struct nlmsghdr *nlh, gfp_t flags) -{ - int report = 0; - - if (nlh) - report = nlmsg_report(nlh); - - return nlmsg_notify(rtnl, skb, pid, group, report, flags); -} - -void rtnl_set_sk_err(u32 group, int error) -{ - netlink_set_err(rtnl, 0, group, error); -} - int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { - struct nlattr *mx; - int i, valid = 0; - - mx = nla_nest_start(skb, RTA_METRICS); - if (mx == NULL) - return -ENOBUFS; - - for (i = 0; i < RTAX_MAX; i++) { - if (metrics[i]) { - valid++; - NLA_PUT_U32(skb, i+1, metrics[i]); - } - } + struct rtattr *mx = (struct rtattr*)skb->tail; + int i; - if (!valid) { - nla_nest_cancel(skb, mx); - return 0; + RTA_PUT(skb, RTA_METRICS, 0, NULL); + for (i=0; irta_len = skb->tail - (u8*)mx; + if (mx->rta_len == RTA_LENGTH(0)) + skb_trim(skb, (u8*)mx - skb->data); + return 0; - return nla_nest_end(skb, mx); - -nla_put_failure: - return nla_nest_cancel(skb, mx); +rtattr_failure: + skb_trim(skb, (u8*)mx - skb->data); + return -1; } @@ -241,73 +216,41 @@ static void set_operstate(struct net_device *dev, unsigned char transition) } } -static void copy_rtnl_link_stats(struct rtnl_link_stats *a, - struct net_device_stats *b) +static int rtnetlink_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, + int type, u32 pid, u32 seq, u32 change, + unsigned int flags) { - a->rx_packets = b->rx_packets; - a->tx_packets = b->tx_packets; - a->rx_bytes = b->rx_bytes; - a->tx_bytes = b->tx_bytes; - a->rx_errors = b->rx_errors; - a->tx_errors = b->tx_errors; - a->rx_dropped = b->rx_dropped; - a->tx_dropped = b->tx_dropped; - - a->multicast = b->multicast; - a->collisions = b->collisions; - - a->rx_length_errors = b->rx_length_errors; - a->rx_over_errors = b->rx_over_errors; - a->rx_crc_errors = b->rx_crc_errors; - a->rx_frame_errors = b->rx_frame_errors; - a->rx_fifo_errors = b->rx_fifo_errors; - a->rx_missed_errors = b->rx_missed_errors; - - a->tx_aborted_errors = b->tx_aborted_errors; - a->tx_carrier_errors = b->tx_carrier_errors; - a->tx_fifo_errors = b->tx_fifo_errors; - a->tx_heartbeat_errors = b->tx_heartbeat_errors; - a->tx_window_errors = b->tx_window_errors; - - a->rx_compressed = b->rx_compressed; - a->tx_compressed = b->tx_compressed; -}; + struct ifinfomsg *r; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; -static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, - void *iwbuf, int iwbuflen, int type, u32 pid, - u32 seq, u32 change, unsigned int flags) -{ - struct ifinfomsg *ifm; - struct nlmsghdr *nlh; - - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); - if (nlh == NULL) - return -ENOBUFS; - - ifm = nlmsg_data(nlh); - ifm->ifi_family = AF_UNSPEC; - ifm->__ifi_pad = 0; - ifm->ifi_type = dev->type; - ifm->ifi_index = dev->ifindex; - ifm->ifi_flags = dev_get_flags(dev); - ifm->ifi_change = change; - - NLA_PUT_STRING(skb, IFLA_IFNAME, dev->name); - NLA_PUT_U32(skb, IFLA_TXQLEN, dev->tx_queue_len); - NLA_PUT_U32(skb, IFLA_WEIGHT, dev->weight); - NLA_PUT_U8(skb, IFLA_OPERSTATE, - netif_running(dev) ? dev->operstate : IF_OPER_DOWN); - NLA_PUT_U8(skb, IFLA_LINKMODE, dev->link_mode); - NLA_PUT_U32(skb, IFLA_MTU, dev->mtu); - - if (dev->ifindex != dev->iflink) - NLA_PUT_U32(skb, IFLA_LINK, dev->iflink); - - if (dev->master) - NLA_PUT_U32(skb, IFLA_MASTER, dev->master->ifindex); + nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*r), flags); + r = NLMSG_DATA(nlh); + r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; + r->ifi_type = dev->type; + r->ifi_index = dev->ifindex; + r->ifi_flags = dev_get_flags(dev); + r->ifi_change = change; - if (dev->qdisc_sleeping) - NLA_PUT_STRING(skb, IFLA_QDISC, dev->qdisc_sleeping->ops->id); + RTA_PUT(skb, IFLA_IFNAME, strlen(dev->name)+1, dev->name); + + if (1) { + u32 txqlen = dev->tx_queue_len; + RTA_PUT(skb, IFLA_TXQLEN, sizeof(txqlen), &txqlen); + } + + if (1) { + u32 weight = dev->weight; + RTA_PUT(skb, IFLA_WEIGHT, sizeof(weight), &weight); + } + + if (1) { + u8 operstate = netif_running(dev)?dev->operstate:IF_OPER_DOWN; + u8 link_mode = dev->link_mode; + RTA_PUT(skb, IFLA_OPERSTATE, sizeof(operstate), &operstate); + RTA_PUT(skb, IFLA_LINKMODE, sizeof(link_mode), &link_mode); + } if (1) { struct rtnl_link_ifmap map = { @@ -318,38 +261,58 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, .dma = dev->dma, .port = dev->if_port, }; - NLA_PUT(skb, IFLA_MAP, sizeof(map), &map); + RTA_PUT(skb, IFLA_MAP, sizeof(map), &map); } if (dev->addr_len) { - NLA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); - NLA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); + RTA_PUT(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr); + RTA_PUT(skb, IFLA_BROADCAST, dev->addr_len, dev->broadcast); } - if (dev->get_stats) { - struct net_device_stats *stats = dev->get_stats(dev); - if (stats) { - struct nlattr *attr; - - attr = nla_reserve(skb, IFLA_STATS, - sizeof(struct rtnl_link_stats)); - if (attr == NULL) - goto nla_put_failure; + if (1) { + u32 mtu = dev->mtu; + RTA_PUT(skb, IFLA_MTU, sizeof(mtu), &mtu); + } - copy_rtnl_link_stats(nla_data(attr), stats); - } + if (dev->ifindex != dev->iflink) { + u32 iflink = dev->iflink; + RTA_PUT(skb, IFLA_LINK, sizeof(iflink), &iflink); } - if (iwbuf) - NLA_PUT(skb, IFLA_WIRELESS, iwbuflen, iwbuf); + if (dev->qdisc_sleeping) + RTA_PUT(skb, IFLA_QDISC, + strlen(dev->qdisc_sleeping->ops->id) + 1, + dev->qdisc_sleeping->ops->id); + + if (dev->master) { + u32 master = dev->master->ifindex; + RTA_PUT(skb, IFLA_MASTER, sizeof(master), &master); + } - return nlmsg_end(skb, nlh); + if (dev->get_stats) { + unsigned long *stats = (unsigned long*)dev->get_stats(dev); + if (stats) { + struct rtattr *a; + __u32 *s; + int i; + int n = sizeof(struct rtnl_link_stats)/4; + + a = __RTA_PUT(skb, IFLA_STATS, n*4); + s = RTA_DATA(a); + for (i=0; inlmsg_len = skb->tail - b; + return skb->len; -nla_put_failure: - return nlmsg_cancel(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } -static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->args[0]; @@ -359,9 +322,10 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) { if (idx < s_idx) continue; - if (rtnl_fill_ifinfo(skb, dev, NULL, 0, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, 0, NLM_F_MULTI) <= 0) + if (rtnetlink_fill_ifinfo(skb, dev, RTM_NEWLINK, + NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, 0, + NLM_F_MULTI) <= 0) break; } read_unlock(&dev_base_lock); @@ -370,70 +334,52 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static struct nla_policy ifla_policy[IFLA_MAX+1] __read_mostly = { - [IFLA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ-1 }, - [IFLA_MAP] = { .len = sizeof(struct rtnl_link_ifmap) }, - [IFLA_MTU] = { .type = NLA_U32 }, - [IFLA_TXQLEN] = { .type = NLA_U32 }, - [IFLA_WEIGHT] = { .type = NLA_U32 }, - [IFLA_OPERSTATE] = { .type = NLA_U8 }, - [IFLA_LINKMODE] = { .type = NLA_U8 }, -}; - -static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int do_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ifinfomsg *ifm; + struct ifinfomsg *ifm = NLMSG_DATA(nlh); + struct rtattr **ida = arg; struct net_device *dev; - int err, send_addr_notify = 0, modified = 0; - struct nlattr *tb[IFLA_MAX+1]; - char ifname[IFNAMSIZ]; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - if (tb[IFLA_IFNAME]) - nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); - else - ifname[0] = '\0'; + int err, send_addr_notify = 0; - err = -EINVAL; - ifm = nlmsg_data(nlh); if (ifm->ifi_index >= 0) dev = dev_get_by_index(ifm->ifi_index); - else if (tb[IFLA_IFNAME]) + else if (ida[IFLA_IFNAME - 1]) { + char ifname[IFNAMSIZ]; + + if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], + IFNAMSIZ) >= IFNAMSIZ) + return -EINVAL; dev = dev_get_by_name(ifname); - else - goto errout; + } else + return -EINVAL; - if (dev == NULL) { - err = -ENODEV; - goto errout; - } + if (!dev) + return -ENODEV; - if (tb[IFLA_ADDRESS] && - nla_len(tb[IFLA_ADDRESS]) < dev->addr_len) - goto errout_dev; + err = -EINVAL; - if (tb[IFLA_BROADCAST] && - nla_len(tb[IFLA_BROADCAST]) < dev->addr_len) - goto errout_dev; + if (ifm->ifi_flags) + dev_change_flags(dev, ifm->ifi_flags); - if (tb[IFLA_MAP]) { + if (ida[IFLA_MAP - 1]) { struct rtnl_link_ifmap *u_map; struct ifmap k_map; if (!dev->set_config) { err = -EOPNOTSUPP; - goto errout_dev; + goto out; } if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto out; } + + if (ida[IFLA_MAP - 1]->rta_len != RTA_LENGTH(sizeof(*u_map))) + goto out; + + u_map = RTA_DATA(ida[IFLA_MAP - 1]); - u_map = nla_data(tb[IFLA_MAP]); k_map.mem_start = (unsigned long) u_map->mem_start; k_map.mem_end = (unsigned long) u_map->mem_end; k_map.base_addr = (unsigned short) u_map->base_addr; @@ -442,175 +388,200 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) k_map.port = (unsigned char) u_map->port; err = dev->set_config(dev, &k_map); - if (err < 0) - goto errout_dev; - modified = 1; + if (err) + goto out; } - if (tb[IFLA_ADDRESS]) { + if (ida[IFLA_ADDRESS - 1]) { struct sockaddr *sa; int len; if (!dev->set_mac_address) { err = -EOPNOTSUPP; - goto errout_dev; + goto out; } - if (!netif_device_present(dev)) { err = -ENODEV; - goto errout_dev; + goto out; } + if (ida[IFLA_ADDRESS - 1]->rta_len != RTA_LENGTH(dev->addr_len)) + goto out; len = sizeof(sa_family_t) + dev->addr_len; sa = kmalloc(len, GFP_KERNEL); if (!sa) { err = -ENOMEM; - goto errout_dev; + goto out; } sa->sa_family = dev->type; - memcpy(sa->sa_data, nla_data(tb[IFLA_ADDRESS]), + memcpy(sa->sa_data, RTA_DATA(ida[IFLA_ADDRESS - 1]), dev->addr_len); err = dev->set_mac_address(dev, sa); kfree(sa); if (err) - goto errout_dev; + goto out; send_addr_notify = 1; - modified = 1; } - if (tb[IFLA_MTU]) { - err = dev_set_mtu(dev, nla_get_u32(tb[IFLA_MTU])); - if (err < 0) - goto errout_dev; - modified = 1; + if (ida[IFLA_BROADCAST - 1]) { + if (ida[IFLA_BROADCAST - 1]->rta_len != RTA_LENGTH(dev->addr_len)) + goto out; + memcpy(dev->broadcast, RTA_DATA(ida[IFLA_BROADCAST - 1]), + dev->addr_len); + send_addr_notify = 1; } - /* - * Interface selected by interface index but interface - * name provided implies that a name change has been - * requested. - */ - if (ifm->ifi_index >= 0 && ifname[0]) { - err = dev_change_name(dev, ifname); - if (err < 0) - goto errout_dev; - modified = 1; - } + if (ida[IFLA_MTU - 1]) { + if (ida[IFLA_MTU - 1]->rta_len != RTA_LENGTH(sizeof(u32))) + goto out; + err = dev_set_mtu(dev, *((u32 *) RTA_DATA(ida[IFLA_MTU - 1]))); + + if (err) + goto out; -#ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (tb[IFLA_WIRELESS]) { - /* Call Wireless Extensions. - * Various stuff checked in there... */ - err = wireless_rtnetlink_set(dev, nla_data(tb[IFLA_WIRELESS]), - nla_len(tb[IFLA_WIRELESS])); - if (err < 0) - goto errout_dev; } -#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - if (tb[IFLA_BROADCAST]) { - nla_memcpy(dev->broadcast, tb[IFLA_BROADCAST], dev->addr_len); - send_addr_notify = 1; + if (ida[IFLA_TXQLEN - 1]) { + if (ida[IFLA_TXQLEN - 1]->rta_len != RTA_LENGTH(sizeof(u32))) + goto out; + + dev->tx_queue_len = *((u32 *) RTA_DATA(ida[IFLA_TXQLEN - 1])); } + if (ida[IFLA_WEIGHT - 1]) { + if (ida[IFLA_WEIGHT - 1]->rta_len != RTA_LENGTH(sizeof(u32))) + goto out; - if (ifm->ifi_flags) - dev_change_flags(dev, ifm->ifi_flags); + dev->weight = *((u32 *) RTA_DATA(ida[IFLA_WEIGHT - 1])); + } - if (tb[IFLA_TXQLEN]) - dev->tx_queue_len = nla_get_u32(tb[IFLA_TXQLEN]); + if (ida[IFLA_OPERSTATE - 1]) { + if (ida[IFLA_OPERSTATE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) + goto out; - if (tb[IFLA_WEIGHT]) - dev->weight = nla_get_u32(tb[IFLA_WEIGHT]); + set_operstate(dev, *((u8 *) RTA_DATA(ida[IFLA_OPERSTATE - 1]))); + } - if (tb[IFLA_OPERSTATE]) - set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE])); + if (ida[IFLA_LINKMODE - 1]) { + if (ida[IFLA_LINKMODE - 1]->rta_len != RTA_LENGTH(sizeof(u8))) + goto out; - if (tb[IFLA_LINKMODE]) { write_lock_bh(&dev_base_lock); - dev->link_mode = nla_get_u8(tb[IFLA_LINKMODE]); + dev->link_mode = *((u8 *) RTA_DATA(ida[IFLA_LINKMODE - 1])); write_unlock_bh(&dev_base_lock); } - err = 0; + if (ifm->ifi_index >= 0 && ida[IFLA_IFNAME - 1]) { + char ifname[IFNAMSIZ]; + + if (rtattr_strlcpy(ifname, ida[IFLA_IFNAME - 1], + IFNAMSIZ) >= IFNAMSIZ) + goto out; + err = dev_change_name(dev, ifname); + if (err) + goto out; + } + +#ifdef CONFIG_NET_WIRELESS_RTNETLINK + if (ida[IFLA_WIRELESS - 1]) { + + /* Call Wireless Extensions. + * Various stuff checked in there... */ + err = wireless_rtnetlink_set(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len); + if (err) + goto out; + } +#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ -errout_dev: - if (err < 0 && modified && net_ratelimit()) - printk(KERN_WARNING "A link change request failed with " - "some changes comitted already. Interface %s may " - "have been left with an inconsistent configuration, " - "please check.\n", dev->name); + err = 0; +out: if (send_addr_notify) call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); dev_put(dev); -errout: return err; } -static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +#ifdef CONFIG_NET_WIRELESS_RTNETLINK +static int do_getlink(struct sk_buff *in_skb, struct nlmsghdr* in_nlh, void *arg) { - struct ifinfomsg *ifm; - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev = NULL; - struct sk_buff *nskb; - char *iw_buf = NULL, *iw = NULL; + struct ifinfomsg *ifm = NLMSG_DATA(in_nlh); + struct rtattr **ida = arg; + struct net_device *dev; + struct ifinfomsg *r; + struct nlmsghdr *nlh; + int err = -ENOBUFS; + struct sk_buff *skb; + unsigned char *b; + char *iw_buf = NULL; int iw_buf_len = 0; - int err, payload; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFLA_MAX, ifla_policy); - if (err < 0) - goto errout; - - ifm = nlmsg_data(nlh); - if (ifm->ifi_index >= 0) { + if (ifm->ifi_index >= 0) dev = dev_get_by_index(ifm->ifi_index); - if (dev == NULL) - return -ENODEV; - } else + else return -EINVAL; - + if (!dev) + return -ENODEV; #ifdef CONFIG_NET_WIRELESS_RTNETLINK - if (tb[IFLA_WIRELESS]) { + if (ida[IFLA_WIRELESS - 1]) { + /* Call Wireless Extensions. We need to know the size before * we can alloc. Various stuff checked in there... */ - err = wireless_rtnetlink_get(dev, nla_data(tb[IFLA_WIRELESS]), - nla_len(tb[IFLA_WIRELESS]), - &iw_buf, &iw_buf_len); - if (err < 0) - goto errout; - - iw += IW_EV_POINT_OFF; + err = wireless_rtnetlink_get(dev, RTA_DATA(ida[IFLA_WIRELESS - 1]), ida[IFLA_WIRELESS - 1]->rta_len, &iw_buf, &iw_buf_len); + if (err) + goto out; } #endif /* CONFIG_NET_WIRELESS_RTNETLINK */ - payload = NLMSG_ALIGN(sizeof(struct ifinfomsg) + - nla_total_size(iw_buf_len)); - nskb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); - if (nskb == NULL) { - err = -ENOBUFS; - goto errout; - } - - err = rtnl_fill_ifinfo(nskb, dev, iw, iw_buf_len, RTM_NEWLINK, - NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, 0); - if (err <= 0) { - kfree_skb(nskb); - goto errout; - } - - err = rtnl_unicast(skb, NETLINK_CB(skb).pid); -errout: - kfree(iw_buf); + /* Create a skb big enough to include all the data. + * Some requests are way bigger than 4k... Jean II */ + skb = alloc_skb((NLMSG_LENGTH(sizeof(*r))) + (RTA_SPACE(iw_buf_len)), + GFP_KERNEL); + if (!skb) + goto out; + b = skb->tail; + + /* Put in the message the usual good stuff */ + nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid, in_nlh->nlmsg_seq, + RTM_NEWLINK, sizeof(*r)); + r = NLMSG_DATA(nlh); + r->ifi_family = AF_UNSPEC; + r->__ifi_pad = 0; + r->ifi_type = dev->type; + r->ifi_index = dev->ifindex; + r->ifi_flags = dev->flags; + r->ifi_change = 0; + + /* Put the wireless payload if it exist */ + if(iw_buf != NULL) + RTA_PUT(skb, IFLA_WIRELESS, iw_buf_len, + iw_buf + IW_EV_POINT_OFF); + + nlh->nlmsg_len = skb->tail - b; + + /* Needed ? */ + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: + if(iw_buf != NULL) + kfree(iw_buf); dev_put(dev); - return err; + +rtattr_failure: +nlmsg_failure: + kfree_skb(skb); + goto out; } +#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ -static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) +static int rtnetlink_dump_all(struct sk_buff *skb, struct netlink_callback *cb) { int idx; int s_idx = cb->family; @@ -637,22 +608,20 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change) { struct sk_buff *skb; - int err = -ENOBUFS; + int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + + sizeof(struct rtnl_link_ifmap) + + sizeof(struct rtnl_link_stats) + 128); - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto errout; + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; - err = rtnl_fill_ifinfo(skb, dev, NULL, 0, type, 0, 0, change, 0); - if (err < 0) { + if (rtnetlink_fill_ifinfo(skb, dev, type, 0, 0, change, 0) < 0) { kfree_skb(skb); - goto errout; + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_LINK, err); + NETLINK_CB(skb).dst_group = RTNLGRP_LINK; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_KERNEL); } /* Protected by RTNL sempahore. */ @@ -777,19 +746,18 @@ static void rtnetlink_rcv(struct sock *sk, int len) static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = { - [RTM_GETLINK - RTM_BASE] = { .doit = rtnl_getlink, - .dumpit = rtnl_dump_ifinfo }, - [RTM_SETLINK - RTM_BASE] = { .doit = rtnl_setlink }, - [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnl_dump_all }, - [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnl_dump_all }, + [RTM_GETLINK - RTM_BASE] = { +#ifdef CONFIG_NET_WIRELESS_RTNETLINK + .doit = do_getlink, +#endif /* CONFIG_NET_WIRELESS_RTNETLINK */ + .dumpit = rtnetlink_dump_ifinfo }, + [RTM_SETLINK - RTM_BASE] = { .doit = do_setlink }, + [RTM_GETADDR - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, + [RTM_GETROUTE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, -#ifdef CONFIG_FIB_RULES - [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, - [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, -#endif - [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnl_dump_all }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, }; @@ -849,9 +817,7 @@ EXPORT_SYMBOL(rtattr_strlcpy); EXPORT_SYMBOL(rtattr_parse); EXPORT_SYMBOL(rtnetlink_links); EXPORT_SYMBOL(rtnetlink_put_metrics); +EXPORT_SYMBOL(rtnl); EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); -EXPORT_SYMBOL(rtnl_unicast); -EXPORT_SYMBOL(rtnl_notify); -EXPORT_SYMBOL(rtnl_set_sk_err); diff --git a/trunk/net/core/skbuff.c b/trunk/net/core/skbuff.c index c448c7f6fde2..c54f3664bce5 100644 --- a/trunk/net/core/skbuff.c +++ b/trunk/net/core/skbuff.c @@ -1397,7 +1397,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) unsigned int csum; long csstart; - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) csstart = skb->h.raw - skb->data; else csstart = skb_headlen(skb); @@ -1411,7 +1411,7 @@ void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, skb->len - csstart, 0); - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { long csstuff = csstart + skb->csum; *((unsigned short *)(to + csstuff)) = csum_fold(csum); @@ -1898,10 +1898,10 @@ int skb_append_datato_frags(struct sock *sk, struct sk_buff *skb, * @len: length of data pulled * * This function performs an skb_pull on the packet and updates - * update the CHECKSUM_COMPLETE checksum. It should be used on - * receive path processing instead of skb_pull unless you know - * that the checksum difference is zero (e.g., a valid IP header) - * or you are setting ip_summed to CHECKSUM_NONE. + * update the CHECKSUM_HW checksum. It should be used on receive + * path processing instead of skb_pull unless you know that the + * checksum difference is zero (e.g., a valid IP header) or you + * are setting ip_summed to CHECKSUM_NONE. */ unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { @@ -1994,7 +1994,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, int features) frag = skb_shinfo(nskb)->frags; k = 0; - nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->ip_summed = CHECKSUM_HW; nskb->csum = skb->csum; memcpy(skb_put(nskb, hsize), skb->data + offset, hsize); @@ -2046,14 +2046,19 @@ void __init skb_init(void) skbuff_head_cache = kmem_cache_create("skbuff_head_cache", sizeof(struct sk_buff), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!skbuff_head_cache) + panic("cannot create skbuff cache"); + skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache", (2*sizeof(struct sk_buff)) + sizeof(atomic_t), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!skbuff_fclone_cache) + panic("cannot create skbuff cache"); } EXPORT_SYMBOL(___pskb_trim); diff --git a/trunk/net/core/sock.c b/trunk/net/core/sock.c index b77e155cbe6c..51fcfbc041a7 100644 --- a/trunk/net/core/sock.c +++ b/trunk/net/core/sock.c @@ -187,13 +187,13 @@ static struct lock_class_key af_callback_keys[AF_MAX]; #define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) /* Run time adjustable parameters. */ -__u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; -__u32 sysctl_rmem_max __read_mostly = SK_RMEM_MAX; -__u32 sysctl_wmem_default __read_mostly = SK_WMEM_MAX; -__u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX; +__u32 sysctl_wmem_max = SK_WMEM_MAX; +__u32 sysctl_rmem_max = SK_RMEM_MAX; +__u32 sysctl_wmem_default = SK_WMEM_MAX; +__u32 sysctl_rmem_default = SK_RMEM_MAX; /* Maximal space eaten by iovec or ancilliary data plus some space */ -int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512); +int sysctl_optmem_max = sizeof(unsigned long)*(2*UIO_MAXIOV + 512); static int sock_set_timeout(long *timeo_p, char __user *optval, int optlen) { @@ -247,7 +247,11 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - err = sk_filter(sk, skb); + /* It would be deadlock, if sock_queue_rcv_skb is used + with socket lock! We assume that users of this + function are lock free. + */ + err = sk_filter(sk, skb, 1); if (err) goto out; @@ -274,7 +278,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 0)) goto discard_and_relse; skb->dev = NULL; @@ -602,15 +606,15 @@ int sock_setsockopt(struct socket *sock, int level, int optname, break; case SO_DETACH_FILTER: - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); + spin_lock_bh(&sk->sk_lock.slock); + filter = sk->sk_filter; if (filter) { - rcu_assign_pointer(sk->sk_filter, NULL); + sk->sk_filter = NULL; + spin_unlock_bh(&sk->sk_lock.slock); sk_filter_release(sk, filter); - rcu_read_unlock_bh(); break; } - rcu_read_unlock_bh(); + spin_unlock_bh(&sk->sk_lock.slock); ret = -ENONET; break; @@ -880,10 +884,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = rcu_dereference(sk->sk_filter); + filter = sk->sk_filter; if (filter) { sk_filter_release(sk, filter); - rcu_assign_pointer(sk->sk_filter, NULL); + sk->sk_filter = NULL; } sock_disable_timestamp(sk); @@ -907,7 +911,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) if (newsk != NULL) { struct sk_filter *filter; - sock_copy(newsk, sk); + memcpy(newsk, sk, sk->sk_prot->obj_size); /* SANITY */ sk_node_init(&newsk->sk_node); diff --git a/trunk/net/core/utils.c b/trunk/net/core/utils.c index 2682490777de..e31c90e05594 100644 --- a/trunk/net/core/utils.c +++ b/trunk/net/core/utils.c @@ -4,7 +4,6 @@ * Authors: * net_random Alan Cox * net_ratelimit Andy Kleen - * in{4,6}_pton YOSHIFUJI Hideaki, Copyright (C)2006 USAGI/WIDE Project * * Created by Alexey Kuznetsov * @@ -192,215 +191,3 @@ __be32 in_aton(const char *str) } EXPORT_SYMBOL(in_aton); - -#define IN6PTON_XDIGIT 0x00010000 -#define IN6PTON_DIGIT 0x00020000 -#define IN6PTON_COLON_MASK 0x00700000 -#define IN6PTON_COLON_1 0x00100000 /* single : requested */ -#define IN6PTON_COLON_2 0x00200000 /* second : requested */ -#define IN6PTON_COLON_1_2 0x00400000 /* :: requested */ -#define IN6PTON_DOT 0x00800000 /* . */ -#define IN6PTON_DELIM 0x10000000 -#define IN6PTON_NULL 0x20000000 /* first/tail */ -#define IN6PTON_UNKNOWN 0x40000000 - -static inline int digit2bin(char c, char delim) -{ - if (c == delim || c == '\0') - return IN6PTON_DELIM; - if (c == '.') - return IN6PTON_DOT; - if (c >= '0' && c <= '9') - return (IN6PTON_DIGIT | (c - '0')); - return IN6PTON_UNKNOWN; -} - -static inline int xdigit2bin(char c, char delim) -{ - if (c == delim || c == '\0') - return IN6PTON_DELIM; - if (c == ':') - return IN6PTON_COLON_MASK; - if (c == '.') - return IN6PTON_DOT; - if (c >= '0' && c <= '9') - return (IN6PTON_XDIGIT | IN6PTON_DIGIT| (c - '0')); - if (c >= 'a' && c <= 'f') - return (IN6PTON_XDIGIT | (c - 'a' + 10)); - if (c >= 'A' && c <= 'F') - return (IN6PTON_XDIGIT | (c - 'A' + 10)); - return IN6PTON_UNKNOWN; -} - -int in4_pton(const char *src, int srclen, - u8 *dst, - char delim, const char **end) -{ - const char *s; - u8 *d; - u8 dbuf[4]; - int ret = 0; - int i; - int w = 0; - - if (srclen < 0) - srclen = strlen(src); - s = src; - d = dbuf; - i = 0; - while(1) { - int c; - c = xdigit2bin(srclen > 0 ? *s : '\0', delim); - if (!(c & (IN6PTON_DIGIT | IN6PTON_DOT | IN6PTON_DELIM))) { - goto out; - } - if (c & (IN6PTON_DOT | IN6PTON_DELIM)) { - if (w == 0) - goto out; - *d++ = w & 0xff; - w = 0; - i++; - if (c & IN6PTON_DELIM) { - if (i != 4) - goto out; - break; - } - goto cont; - } - w = (w * 10) + c; - if ((w & 0xffff) > 255) { - goto out; - } -cont: - if (i >= 4) - goto out; - s++; - srclen--; - } - ret = 1; - memcpy(dst, dbuf, sizeof(dbuf)); -out: - if (end) - *end = s; - return ret; -} - -EXPORT_SYMBOL(in4_pton); - -int in6_pton(const char *src, int srclen, - u8 *dst, - char delim, const char **end) -{ - const char *s, *tok = NULL; - u8 *d, *dc = NULL; - u8 dbuf[16]; - int ret = 0; - int i; - int state = IN6PTON_COLON_1_2 | IN6PTON_XDIGIT | IN6PTON_NULL; - int w = 0; - - memset(dbuf, 0, sizeof(dbuf)); - - s = src; - d = dbuf; - if (srclen < 0) - srclen = strlen(src); - - while (1) { - int c; - - c = xdigit2bin(srclen > 0 ? *s : '\0', delim); - if (!(c & state)) - goto out; - if (c & (IN6PTON_DELIM | IN6PTON_COLON_MASK)) { - /* process one 16-bit word */ - if (!(state & IN6PTON_NULL)) { - *d++ = (w >> 8) & 0xff; - *d++ = w & 0xff; - } - w = 0; - if (c & IN6PTON_DELIM) { - /* We've processed last word */ - break; - } - /* - * COLON_1 => XDIGIT - * COLON_2 => XDIGIT|DELIM - * COLON_1_2 => COLON_2 - */ - switch (state & IN6PTON_COLON_MASK) { - case IN6PTON_COLON_2: - dc = d; - state = IN6PTON_XDIGIT | IN6PTON_DELIM; - if (dc - dbuf >= sizeof(dbuf)) - state |= IN6PTON_NULL; - break; - case IN6PTON_COLON_1|IN6PTON_COLON_1_2: - state = IN6PTON_XDIGIT | IN6PTON_COLON_2; - break; - case IN6PTON_COLON_1: - state = IN6PTON_XDIGIT; - break; - case IN6PTON_COLON_1_2: - state = IN6PTON_COLON_2; - break; - default: - state = 0; - } - tok = s + 1; - goto cont; - } - - if (c & IN6PTON_DOT) { - ret = in4_pton(tok ? tok : s, srclen + (int)(s - tok), d, delim, &s); - if (ret > 0) { - d += 4; - break; - } - goto out; - } - - w = (w << 4) | (0xff & c); - state = IN6PTON_COLON_1 | IN6PTON_DELIM; - if (!(w & 0xf000)) { - state |= IN6PTON_XDIGIT; - } - if (!dc && d + 2 < dbuf + sizeof(dbuf)) { - state |= IN6PTON_COLON_1_2; - state &= ~IN6PTON_DELIM; - } - if (d + 2 >= dbuf + sizeof(dbuf)) { - state &= ~(IN6PTON_COLON_1|IN6PTON_COLON_1_2); - } -cont: - if ((dc && d + 4 < dbuf + sizeof(dbuf)) || - d + 4 == dbuf + sizeof(dbuf)) { - state |= IN6PTON_DOT; - } - if (d >= dbuf + sizeof(dbuf)) { - state &= ~(IN6PTON_XDIGIT|IN6PTON_COLON_MASK); - } - s++; - srclen--; - } - - i = 15; d--; - - if (dc) { - while(d >= dc) - dst[i--] = *d--; - while(i >= dc - dbuf) - dst[i--] = 0; - while(i >= 0) - dst[i--] = *d--; - } else - memcpy(dst, dbuf, sizeof(dbuf)); - - ret = 1; -out: - if (end) - *end = s; - return ret; -} - -EXPORT_SYMBOL(in6_pton); diff --git a/trunk/net/core/wireless.c b/trunk/net/core/wireless.c index 3168fca312f7..de0bde4b51dd 100644 --- a/trunk/net/core/wireless.c +++ b/trunk/net/core/wireless.c @@ -72,6 +72,7 @@ /***************************** INCLUDES *****************************/ +#include /* Not needed ??? */ #include #include /* off_t */ #include /* struct ifreq, dev_get_by_name() */ @@ -85,7 +86,6 @@ #include /* Pretty obvious */ #include /* New driver API */ -#include #include /* copy_to_user() */ @@ -1850,7 +1850,7 @@ static void wireless_nlevent_process(unsigned long data) struct sk_buff *skb; while ((skb = skb_dequeue(&wireless_nlevent_queue))) - rtnl_notify(skb, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); + netlink_broadcast(rtnl, skb, 0, RTNLGRP_LINK, GFP_ATOMIC); } static DECLARE_TASKLET(wireless_nlevent_tasklet, wireless_nlevent_process, 0); diff --git a/trunk/net/dccp/ackvec.c b/trunk/net/dccp/ackvec.c index 4d176d33983f..8c211c58893b 100644 --- a/trunk/net/dccp/ackvec.c +++ b/trunk/net/dccp/ackvec.c @@ -142,13 +142,14 @@ struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority) struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority); if (av != NULL) { - av->dccpav_buf_head = DCCP_MAX_ACKVEC_LEN - 1; + av->dccpav_buf_head = + av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; av->dccpav_buf_ackno = DCCP_MAX_SEQNO + 1; av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0; av->dccpav_ack_ptr = 0; av->dccpav_time.tv_sec = 0; av->dccpav_time.tv_usec = 0; - av->dccpav_vec_len = 0; + av->dccpav_sent_len = av->dccpav_vec_len = 0; INIT_LIST_HEAD(&av->dccpav_records); } @@ -352,13 +353,11 @@ static void dccp_ackvec_throw_record(struct dccp_ackvec *av, { struct dccp_ackvec_record *next; - /* sort out vector length */ - if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr) - av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head; - else - av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1 - - av->dccpav_buf_head - + avr->dccpavr_ack_ptr; + av->dccpav_buf_tail = avr->dccpavr_ack_ptr - 1; + if (av->dccpav_buf_tail == 0) + av->dccpav_buf_tail = DCCP_MAX_ACKVEC_LEN - 1; + + av->dccpav_vec_len -= avr->dccpavr_sent_len; /* free records */ list_for_each_entry_safe_from(avr, next, &av->dccpav_records, @@ -435,7 +434,8 @@ static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av, break; found: if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, ackno)) { - const u8 state = *vector & DCCP_ACKVEC_STATE_MASK; + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) { #ifdef CONFIG_IP_DCCP_DEBUG struct dccp_sock *dp = dccp_sk(sk); diff --git a/trunk/net/dccp/ackvec.h b/trunk/net/dccp/ackvec.h index 2424effac7f6..0adf4b56c34c 100644 --- a/trunk/net/dccp/ackvec.h +++ b/trunk/net/dccp/ackvec.h @@ -54,7 +54,9 @@ struct dccp_ackvec { struct list_head dccpav_records; struct timeval dccpav_time; u8 dccpav_buf_head; + u8 dccpav_buf_tail; u8 dccpav_ack_ptr; + u8 dccpav_sent_len; u8 dccpav_vec_len; u8 dccpav_buf_nonce; u8 dccpav_ack_nonce; @@ -105,7 +107,7 @@ extern int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb); static inline int dccp_ackvec_pending(const struct dccp_ackvec *av) { - return av->dccpav_vec_len; + return av->dccpav_sent_len != av->dccpav_vec_len; } #else /* CONFIG_IP_DCCP_ACKVEC */ static inline int dccp_ackvec_init(void) diff --git a/trunk/net/dccp/ccids/Kconfig b/trunk/net/dccp/ccids/Kconfig index 32752f750447..ca00191628f7 100644 --- a/trunk/net/dccp/ccids/Kconfig +++ b/trunk/net/dccp/ccids/Kconfig @@ -30,14 +30,6 @@ config IP_DCCP_CCID2 If in doubt, say M. -config IP_DCCP_CCID2_DEBUG - bool "CCID2 debug" - depends on IP_DCCP_CCID2 - ---help--- - Enable CCID2 debug messages. - - If in doubt, say N. - config IP_DCCP_CCID3 tristate "CCID3 (TCP-Friendly) (EXPERIMENTAL)" depends on IP_DCCP diff --git a/trunk/net/dccp/ccids/ccid2.c b/trunk/net/dccp/ccids/ccid2.c index 457dd3db7f41..e9615627dcd6 100644 --- a/trunk/net/dccp/ccids/ccid2.c +++ b/trunk/net/dccp/ccids/ccid2.c @@ -27,6 +27,7 @@ * * BUGS: * - sequence number wrapping + * - jiffies wrapping */ #include "../ccid.h" @@ -35,7 +36,8 @@ static int ccid2_debug; -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG +#undef CCID2_DEBUG +#ifdef CCID2_DEBUG #define ccid2_pr_debug(format, a...) \ do { if (ccid2_debug) \ printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \ @@ -44,7 +46,9 @@ static int ccid2_debug; #define ccid2_pr_debug(format, a...) #endif -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG +static const int ccid2_seq_len = 128; + +#ifdef CCID2_DEBUG static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) { int len = 0; @@ -67,8 +71,8 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) /* packets are sent sequentially */ BUG_ON(seqp->ccid2s_seq <= prev->ccid2s_seq); - BUG_ON(time_before(seqp->ccid2s_sent, - prev->ccid2s_sent)); + BUG_ON(seqp->ccid2s_sent < prev->ccid2s_sent); + BUG_ON(len > ccid2_seq_len); seqp = prev; } @@ -80,57 +84,16 @@ static void ccid2_hc_tx_check_sanity(const struct ccid2_hc_tx_sock *hctx) do { seqp = seqp->ccid2s_prev; len++; + BUG_ON(len > ccid2_seq_len); } while (seqp != hctx->ccid2hctx_seqh); + BUG_ON(len != ccid2_seq_len); ccid2_pr_debug("total len=%d\n", len); - BUG_ON(len != hctx->ccid2hctx_seqbufc * CCID2_SEQBUF_LEN); } #else #define ccid2_hc_tx_check_sanity(hctx) do {} while (0) #endif -static int ccid2_hc_tx_alloc_seq(struct ccid2_hc_tx_sock *hctx, int num, - gfp_t gfp) -{ - struct ccid2_seq *seqp; - int i; - - /* check if we have space to preserve the pointer to the buffer */ - if (hctx->ccid2hctx_seqbufc >= (sizeof(hctx->ccid2hctx_seqbuf) / - sizeof(struct ccid2_seq*))) - return -ENOMEM; - - /* allocate buffer and initialize linked list */ - seqp = kmalloc(sizeof(*seqp) * num, gfp); - if (seqp == NULL) - return -ENOMEM; - - for (i = 0; i < (num - 1); i++) { - seqp[i].ccid2s_next = &seqp[i + 1]; - seqp[i + 1].ccid2s_prev = &seqp[i]; - } - seqp[num - 1].ccid2s_next = seqp; - seqp->ccid2s_prev = &seqp[num - 1]; - - /* This is the first allocation. Initiate the head and tail. */ - if (hctx->ccid2hctx_seqbufc == 0) - hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqt = seqp; - else { - /* link the existing list with the one we just created */ - hctx->ccid2hctx_seqh->ccid2s_next = seqp; - seqp->ccid2s_prev = hctx->ccid2hctx_seqh; - - hctx->ccid2hctx_seqt->ccid2s_prev = &seqp[num - 1]; - seqp[num - 1].ccid2s_next = hctx->ccid2hctx_seqt; - } - - /* store the original pointer to the buffer so we can free it */ - hctx->ccid2hctx_seqbuf[hctx->ccid2hctx_seqbufc] = seqp; - hctx->ccid2hctx_seqbufc++; - - return 0; -} - static int ccid2_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb, int len) { @@ -159,7 +122,7 @@ static int ccid2_hc_tx_send_packet(struct sock *sk, } } - return 1; /* XXX CCID should dequeue when ready instead of polling */ + return 100; /* XXX */ } static void ccid2_change_l_ack_ratio(struct sock *sk, int val) @@ -187,8 +150,10 @@ static void ccid2_change_l_ack_ratio(struct sock *sk, int val) dp->dccps_l_ack_ratio = val; } -static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) +static void ccid2_change_cwnd(struct sock *sk, int val) { + struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); + if (val == 0) val = 1; @@ -199,17 +164,6 @@ static void ccid2_change_cwnd(struct ccid2_hc_tx_sock *hctx, int val) hctx->ccid2hctx_cwnd = val; } -static void ccid2_change_srtt(struct ccid2_hc_tx_sock *hctx, long val) -{ - ccid2_pr_debug("change SRTT to %ld\n", val); - hctx->ccid2hctx_srtt = val; -} - -static void ccid2_change_pipe(struct ccid2_hc_tx_sock *hctx, long val) -{ - hctx->ccid2hctx_pipe = val; -} - static void ccid2_start_rto_timer(struct sock *sk); static void ccid2_hc_tx_rto_expire(unsigned long data) @@ -239,11 +193,11 @@ static void ccid2_hc_tx_rto_expire(unsigned long data) ccid2_start_rto_timer(sk); /* adjust pipe, cwnd etc */ - ccid2_change_pipe(hctx, 0); + hctx->ccid2hctx_pipe = 0; hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd >> 1; if (hctx->ccid2hctx_ssthresh < 2) hctx->ccid2hctx_ssthresh = 2; - ccid2_change_cwnd(hctx, 1); + ccid2_change_cwnd(sk, 1); /* clear state about stuff we sent */ hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; @@ -278,14 +232,13 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) { struct dccp_sock *dp = dccp_sk(sk); struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - struct ccid2_seq *next; u64 seq; ccid2_hc_tx_check_sanity(hctx); BUG_ON(!hctx->ccid2hctx_sendwait); hctx->ccid2hctx_sendwait = 0; - ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe + 1); + hctx->ccid2hctx_pipe++; BUG_ON(hctx->ccid2hctx_pipe < 0); /* There is an issue. What if another packet is sent between @@ -298,24 +251,16 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) hctx->ccid2hctx_seqh->ccid2s_seq = seq; hctx->ccid2hctx_seqh->ccid2s_acked = 0; hctx->ccid2hctx_seqh->ccid2s_sent = jiffies; - - next = hctx->ccid2hctx_seqh->ccid2s_next; - /* check if we need to alloc more space */ - if (next == hctx->ccid2hctx_seqt) { - int rc; - - ccid2_pr_debug("allocating more space in history\n"); - rc = ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_KERNEL); - BUG_ON(rc); /* XXX what do we do? */ - - next = hctx->ccid2hctx_seqh->ccid2s_next; - BUG_ON(next == hctx->ccid2hctx_seqt); - } - hctx->ccid2hctx_seqh = next; + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqh->ccid2s_next; ccid2_pr_debug("cwnd=%d pipe=%d\n", hctx->ccid2hctx_cwnd, hctx->ccid2hctx_pipe); + if (hctx->ccid2hctx_seqh == hctx->ccid2hctx_seqt) { + /* XXX allocate more space */ + WARN_ON(1); + } + hctx->ccid2hctx_sent++; /* Ack Ratio. Need to maintain a concept of how many windows we sent */ @@ -350,7 +295,7 @@ static void ccid2_hc_tx_packet_sent(struct sock *sk, int more, int len) if (!timer_pending(&hctx->ccid2hctx_rtotimer)) ccid2_start_rto_timer(sk); -#ifdef CONFIG_IP_DCCP_CCID2_DEBUG +#ifdef CCID2_DEBUG ccid2_pr_debug("pipe=%d\n", hctx->ccid2hctx_pipe); ccid2_pr_debug("Sent: seq=%llu\n", seq); do { @@ -453,7 +398,7 @@ static inline void ccid2_new_ack(struct sock *sk, /* increase every 2 acks */ hctx->ccid2hctx_ssacks++; if (hctx->ccid2hctx_ssacks == 2) { - ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd+1); + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); hctx->ccid2hctx_ssacks = 0; *maxincr = *maxincr - 1; } @@ -466,28 +411,26 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_acks++; if (hctx->ccid2hctx_acks >= hctx->ccid2hctx_cwnd) { - ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd + 1); + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd + 1); hctx->ccid2hctx_acks = 0; } } /* update RTO */ if (hctx->ccid2hctx_srtt == -1 || - time_after(jiffies, hctx->ccid2hctx_lastrtt + hctx->ccid2hctx_srtt)) { - unsigned long r = (long)jiffies - (long)seqp->ccid2s_sent; + (jiffies - hctx->ccid2hctx_lastrtt) >= hctx->ccid2hctx_srtt) { + unsigned long r = jiffies - seqp->ccid2s_sent; int s; /* first measurement */ if (hctx->ccid2hctx_srtt == -1) { ccid2_pr_debug("R: %lu Time=%lu seq=%llu\n", r, jiffies, seqp->ccid2s_seq); - ccid2_change_srtt(hctx, r); + hctx->ccid2hctx_srtt = r; hctx->ccid2hctx_rttvar = r >> 1; } else { /* RTTVAR */ long tmp = hctx->ccid2hctx_srtt - r; - long srtt; - if (tmp < 0) tmp *= -1; @@ -497,12 +440,10 @@ static inline void ccid2_new_ack(struct sock *sk, hctx->ccid2hctx_rttvar += tmp; /* SRTT */ - srtt = hctx->ccid2hctx_srtt; - srtt *= 7; - srtt >>= 3; + hctx->ccid2hctx_srtt *= 7; + hctx->ccid2hctx_srtt >>= 3; tmp = r >> 3; - srtt += tmp; - ccid2_change_srtt(hctx, srtt); + hctx->ccid2hctx_srtt += tmp; } s = hctx->ccid2hctx_rttvar << 2; /* clock granularity is 1 when based on jiffies */ @@ -538,29 +479,13 @@ static void ccid2_hc_tx_dec_pipe(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - ccid2_change_pipe(hctx, hctx->ccid2hctx_pipe-1); + hctx->ccid2hctx_pipe--; BUG_ON(hctx->ccid2hctx_pipe < 0); if (hctx->ccid2hctx_pipe == 0) ccid2_hc_tx_kill_rto_timer(sk); } -static void ccid2_congestion_event(struct ccid2_hc_tx_sock *hctx, - struct ccid2_seq *seqp) -{ - if (time_before(seqp->ccid2s_sent, hctx->ccid2hctx_last_cong)) { - ccid2_pr_debug("Multiple losses in an RTT---treating as one\n"); - return; - } - - hctx->ccid2hctx_last_cong = jiffies; - - ccid2_change_cwnd(hctx, hctx->ccid2hctx_cwnd >> 1); - hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; - if (hctx->ccid2hctx_ssthresh < 2) - hctx->ccid2hctx_ssthresh = 2; -} - static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) { struct dccp_sock *dp = dccp_sk(sk); @@ -571,6 +496,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) unsigned char veclen; int offset = 0; int done = 0; + int loss = 0; unsigned int maxincr = 0; ccid2_hc_tx_check_sanity(hctx); @@ -656,16 +582,15 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) * run length */ while (between48(seqp->ccid2s_seq,ackno_end_rl,ackno)) { - const u8 state = *vector & - DCCP_ACKVEC_STATE_MASK; + const u8 state = (*vector & + DCCP_ACKVEC_STATE_MASK) >> 6; /* new packet received or marked */ if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED && !seqp->ccid2s_acked) { if (state == DCCP_ACKVEC_STATE_ECN_MARKED) { - ccid2_congestion_event(hctx, - seqp); + loss = 1; } else ccid2_new_ack(sk, seqp, &maxincr); @@ -717,13 +642,7 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) /* check for lost packets */ while (1) { if (!seqp->ccid2s_acked) { - ccid2_pr_debug("Packet lost: %llu\n", - seqp->ccid2s_seq); - /* XXX need to traverse from tail -> head in - * order to detect multiple congestion events in - * one ack vector. - */ - ccid2_congestion_event(hctx, seqp); + loss = 1; ccid2_hc_tx_dec_pipe(sk); } if (seqp == hctx->ccid2hctx_seqt) @@ -742,33 +661,53 @@ static void ccid2_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb) hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqt->ccid2s_next; } + if (loss) { + /* XXX do bit shifts guarantee a 0 as the new bit? */ + ccid2_change_cwnd(sk, hctx->ccid2hctx_cwnd >> 1); + hctx->ccid2hctx_ssthresh = hctx->ccid2hctx_cwnd; + if (hctx->ccid2hctx_ssthresh < 2) + hctx->ccid2hctx_ssthresh = 2; + } + ccid2_hc_tx_check_sanity(hctx); } static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid_priv(ccid); + int seqcount = ccid2_seq_len; + int i; - ccid2_change_cwnd(hctx, 1); - /* Initialize ssthresh to infinity. This means that we will exit the - * initial slow-start after the first packet loss. This is what we - * want. - */ - hctx->ccid2hctx_ssthresh = ~0; + /* XXX init variables with proper values */ + hctx->ccid2hctx_cwnd = 1; + hctx->ccid2hctx_ssthresh = 10; hctx->ccid2hctx_numdupack = 3; - hctx->ccid2hctx_seqbufc = 0; /* XXX init ~ to window size... */ - if (ccid2_hc_tx_alloc_seq(hctx, CCID2_SEQBUF_LEN, GFP_ATOMIC) != 0) + hctx->ccid2hctx_seqbuf = kmalloc(sizeof(*hctx->ccid2hctx_seqbuf) * + seqcount, gfp_any()); + if (hctx->ccid2hctx_seqbuf == NULL) return -ENOMEM; + for (i = 0; i < (seqcount - 1); i++) { + hctx->ccid2hctx_seqbuf[i].ccid2s_next = + &hctx->ccid2hctx_seqbuf[i + 1]; + hctx->ccid2hctx_seqbuf[i + 1].ccid2s_prev = + &hctx->ccid2hctx_seqbuf[i]; + } + hctx->ccid2hctx_seqbuf[seqcount - 1].ccid2s_next = + hctx->ccid2hctx_seqbuf; + hctx->ccid2hctx_seqbuf->ccid2s_prev = + &hctx->ccid2hctx_seqbuf[seqcount - 1]; + + hctx->ccid2hctx_seqh = hctx->ccid2hctx_seqbuf; + hctx->ccid2hctx_seqt = hctx->ccid2hctx_seqh; hctx->ccid2hctx_sent = 0; hctx->ccid2hctx_rto = 3 * HZ; - ccid2_change_srtt(hctx, -1); + hctx->ccid2hctx_srtt = -1; hctx->ccid2hctx_rttvar = -1; hctx->ccid2hctx_lastrtt = 0; hctx->ccid2hctx_rpdupack = -1; - hctx->ccid2hctx_last_cong = jiffies; hctx->ccid2hctx_rtotimer.function = &ccid2_hc_tx_rto_expire; hctx->ccid2hctx_rtotimer.data = (unsigned long)sk; @@ -781,13 +720,10 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) static void ccid2_hc_tx_exit(struct sock *sk) { struct ccid2_hc_tx_sock *hctx = ccid2_hc_tx_sk(sk); - int i; ccid2_hc_tx_kill_rto_timer(sk); - - for (i = 0; i < hctx->ccid2hctx_seqbufc; i++) - kfree(hctx->ccid2hctx_seqbuf[i]); - hctx->ccid2hctx_seqbufc = 0; + kfree(hctx->ccid2hctx_seqbuf); + hctx->ccid2hctx_seqbuf = NULL; } static void ccid2_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb) diff --git a/trunk/net/dccp/ccids/ccid2.h b/trunk/net/dccp/ccids/ccid2.h index 5b2ef4acb300..451a87464fa5 100644 --- a/trunk/net/dccp/ccids/ccid2.h +++ b/trunk/net/dccp/ccids/ccid2.h @@ -35,9 +35,6 @@ struct ccid2_seq { struct ccid2_seq *ccid2s_next; }; -#define CCID2_SEQBUF_LEN 256 -#define CCID2_SEQBUF_MAX 128 - /** struct ccid2_hc_tx_sock - CCID2 TX half connection * * @ccid2hctx_ssacks - ACKs recv in slow start @@ -53,11 +50,10 @@ struct ccid2_hc_tx_sock { int ccid2hctx_cwnd; int ccid2hctx_ssacks; int ccid2hctx_acks; - unsigned int ccid2hctx_ssthresh; + int ccid2hctx_ssthresh; int ccid2hctx_pipe; int ccid2hctx_numdupack; - struct ccid2_seq *ccid2hctx_seqbuf[CCID2_SEQBUF_MAX]; - int ccid2hctx_seqbufc; + struct ccid2_seq *ccid2hctx_seqbuf; struct ccid2_seq *ccid2hctx_seqh; struct ccid2_seq *ccid2hctx_seqt; long ccid2hctx_rto; @@ -71,7 +67,6 @@ struct ccid2_hc_tx_sock { u64 ccid2hctx_rpseq; int ccid2hctx_rpdupack; int ccid2hctx_sendwait; - unsigned long ccid2hctx_last_cong; }; struct ccid2_hc_rx_sock { diff --git a/trunk/net/dccp/ccids/ccid3.c b/trunk/net/dccp/ccids/ccid3.c index 195aa9566228..090bc39e8199 100644 --- a/trunk/net/dccp/ccids/ccid3.c +++ b/trunk/net/dccp/ccids/ccid3.c @@ -900,7 +900,7 @@ static u32 ccid3_hc_rx_calc_first_li(struct sock *sk) static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) { struct ccid3_hc_rx_sock *hcrx = ccid3_hc_rx_sk(sk); - struct dccp_li_hist_entry *head; + struct dccp_li_hist_entry *next, *head; u64 seq_temp; if (list_empty(&hcrx->ccid3hcrx_li_hist)) { @@ -908,15 +908,15 @@ static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss) &hcrx->ccid3hcrx_li_hist, seq_loss, win_loss)) return; - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); - head->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); + next = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; + next->dccplih_interval = ccid3_hc_rx_calc_first_li(sk); } else { struct dccp_li_hist_entry *entry; struct list_head *tail; - head = list_entry(hcrx->ccid3hcrx_li_hist.next, - struct dccp_li_hist_entry, dccplih_node); + head = (struct dccp_li_hist_entry *) + hcrx->ccid3hcrx_li_hist.next; /* FIXME win count check removed as was wrong */ /* should make this check with receive history */ /* and compare there as per section 10.2 of RFC4342 */ diff --git a/trunk/net/dccp/dccp.h b/trunk/net/dccp/dccp.h index 0a21be437ed3..a5c5475724c0 100644 --- a/trunk/net/dccp/dccp.h +++ b/trunk/net/dccp/dccp.h @@ -130,7 +130,7 @@ extern void dccp_send_delayed_ack(struct sock *sk); extern void dccp_send_sync(struct sock *sk, const u64 seq, const enum dccp_pkt_type pkt_type); -extern void dccp_write_xmit(struct sock *sk, int block); +extern int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo); extern void dccp_write_space(struct sock *sk); extern void dccp_init_xmit_timers(struct sock *sk); diff --git a/trunk/net/dccp/feat.h b/trunk/net/dccp/feat.h index cee553d416ca..b44c45504fb6 100644 --- a/trunk/net/dccp/feat.h +++ b/trunk/net/dccp/feat.h @@ -27,10 +27,5 @@ extern int dccp_feat_clone(struct sock *oldsk, struct sock *newsk); extern int dccp_feat_init(struct dccp_minisock *dmsk); extern int dccp_feat_default_sequence_window; -extern int dccp_feat_default_rx_ccid; -extern int dccp_feat_default_tx_ccid; -extern int dccp_feat_default_ack_ratio; -extern int dccp_feat_default_send_ack_vector; -extern int dccp_feat_default_send_ndp_count; #endif /* _DCCP_FEAT_H */ diff --git a/trunk/net/dccp/ipv4.c b/trunk/net/dccp/ipv4.c index 9a1a76a7dc41..7f56f7e8f571 100644 --- a/trunk/net/dccp/ipv4.c +++ b/trunk/net/dccp/ipv4.c @@ -501,9 +501,6 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; @@ -608,10 +605,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, dh->dccph_dport, - inet_iif(skb)); + nsk = __inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, ntohs(dh->dccph_dport), + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -681,7 +678,6 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, } }; - security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -925,7 +921,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, dh->dccph_dport, + skb->nh.iph->daddr, ntohs(dh->dccph_dport), inet_iif(skb)); /* diff --git a/trunk/net/dccp/ipv6.c b/trunk/net/dccp/ipv6.c index 7a47399cf31f..610c722ac27f 100644 --- a/trunk/net/dccp/ipv6.c +++ b/trunk/net/dccp/ipv6.c @@ -201,7 +201,6 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; - security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -231,7 +230,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - __ip6_dst_store(sk, dst, NULL, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -323,7 +322,6 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; - security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -424,7 +422,6 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -569,7 +566,6 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { @@ -626,7 +622,6 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -709,9 +704,6 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); @@ -850,7 +842,6 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; @@ -872,7 +863,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - __ip6_dst_store(newsk, dst, NULL, NULL); + __ip6_dst_store(newsk, dst, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; @@ -970,7 +961,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 0)) goto discard; /* diff --git a/trunk/net/dccp/output.c b/trunk/net/dccp/output.c index 7102e3aed4ca..58669beee132 100644 --- a/trunk/net/dccp/output.c +++ b/trunk/net/dccp/output.c @@ -198,7 +198,7 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, while (1) { prepare_to_wait(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE); - if (sk->sk_err) + if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; if (!*timeo) goto do_nonblock; @@ -234,72 +234,37 @@ static int dccp_wait_for_ccid(struct sock *sk, struct sk_buff *skb, goto out; } -static void dccp_write_xmit_timer(unsigned long data) { - struct sock *sk = (struct sock *)data; - struct dccp_sock *dp = dccp_sk(sk); - - bh_lock_sock(sk); - if (sock_owned_by_user(sk)) - sk_reset_timer(sk, &dp->dccps_xmit_timer, jiffies+1); - else - dccp_write_xmit(sk, 0); - bh_unlock_sock(sk); - sock_put(sk); -} - -void dccp_write_xmit(struct sock *sk, int block) +int dccp_write_xmit(struct sock *sk, struct sk_buff *skb, long *timeo) { - struct dccp_sock *dp = dccp_sk(sk); - struct sk_buff *skb; - long timeo = 30000; /* If a packet is taking longer than 2 secs - we have other issues */ - - while ((skb = skb_peek(&sk->sk_write_queue))) { - int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, + const struct dccp_sock *dp = dccp_sk(sk); + int err = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk, skb, skb->len); - if (err > 0) { - if (!block) { - sk_reset_timer(sk, &dp->dccps_xmit_timer, - msecs_to_jiffies(err)+jiffies); - break; - } else - err = dccp_wait_for_ccid(sk, skb, &timeo); - if (err) { - printk(KERN_CRIT "%s:err at dccp_wait_for_ccid" - " %d\n", __FUNCTION__, err); - dump_stack(); - } - } + if (err > 0) + err = dccp_wait_for_ccid(sk, skb, timeo); - skb_dequeue(&sk->sk_write_queue); - if (err == 0) { - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - const int len = skb->len; + if (err == 0) { + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + const int len = skb->len; - if (sk->sk_state == DCCP_PARTOPEN) { - /* See 8.1.5. Handshake Completion */ - inet_csk_schedule_ack(sk); - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, + if (sk->sk_state == DCCP_PARTOPEN) { + /* See 8.1.5. Handshake Completion */ + inet_csk_schedule_ack(sk); + inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, DCCP_RTO_MAX); - dcb->dccpd_type = DCCP_PKT_DATAACK; - } else if (dccp_ack_pending(sk)) - dcb->dccpd_type = DCCP_PKT_DATAACK; - else - dcb->dccpd_type = DCCP_PKT_DATA; - - err = dccp_transmit_skb(sk, skb); - ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); - if (err) { - printk(KERN_CRIT "%s:err from " - "ccid_hc_tx_packet_sent %d\n", - __FUNCTION__, err); - dump_stack(); - } - } else - kfree(skb); - } + dcb->dccpd_type = DCCP_PKT_DATAACK; + } else if (dccp_ack_pending(sk)) + dcb->dccpd_type = DCCP_PKT_DATAACK; + else + dcb->dccpd_type = DCCP_PKT_DATA; + + err = dccp_transmit_skb(sk, skb); + ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len); + } else + kfree_skb(skb); + + return err; } int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb) @@ -461,9 +426,6 @@ static inline void dccp_connect_init(struct sock *sk) dccp_set_seqno(&dp->dccps_awl, max48(dp->dccps_awl, dp->dccps_iss)); icsk->icsk_retransmits = 0; - init_timer(&dp->dccps_xmit_timer); - dp->dccps_xmit_timer.data = (unsigned long)sk; - dp->dccps_xmit_timer.function = dccp_write_xmit_timer; } int dccp_connect(struct sock *sk) @@ -598,10 +560,8 @@ void dccp_send_close(struct sock *sk, const int active) DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ; if (active) { - dccp_write_xmit(sk, 1); dccp_skb_entail(sk, skb); dccp_transmit_skb(sk, skb_clone(skb, prio)); - /* FIXME do we need a retransmit timer here? */ } else dccp_transmit_skb(sk, skb); } diff --git a/trunk/net/dccp/proto.c b/trunk/net/dccp/proto.c index 962df0ea31aa..6f14bb5a28d4 100644 --- a/trunk/net/dccp/proto.c +++ b/trunk/net/dccp/proto.c @@ -662,8 +662,17 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - skb_queue_tail(&sk->sk_write_queue, skb); - dccp_write_xmit(sk,0); + rc = dccp_write_xmit(sk, skb, &timeo); + /* + * XXX we don't use sk_write_queue, so just discard the packet. + * Current plan however is to _use_ sk_write_queue with + * an algorith similar to tcp_sendmsg, where the main difference + * is that in DCCP we have to respect packet boundaries, so + * no coalescing of skbs. + * + * This bug was _quickly_ found & fixed by just looking at an OSTRA + * generated callgraph 8) -acme + */ out_release: release_sock(sk); return rc ? : len; @@ -837,7 +846,6 @@ static int dccp_close_state(struct sock *sk) void dccp_close(struct sock *sk, long timeout) { - struct dccp_sock *dp = dccp_sk(sk); struct sk_buff *skb; int state; @@ -854,8 +862,6 @@ void dccp_close(struct sock *sk, long timeout) goto adjudge_to_death; } - sk_stop_timer(sk, &dp->dccps_xmit_timer); - /* * We need to flush the recv. buffs. We do this only on the * descriptor close, not protocol-sourced closes, because the diff --git a/trunk/net/dccp/sysctl.c b/trunk/net/dccp/sysctl.c index 38bc157876f3..c1ba9451bc3d 100644 --- a/trunk/net/dccp/sysctl.c +++ b/trunk/net/dccp/sysctl.c @@ -11,12 +11,18 @@ #include #include -#include "feat.h" #ifndef CONFIG_SYSCTL #error This file should not be compiled without CONFIG_SYSCTL defined #endif +extern int dccp_feat_default_sequence_window; +extern int dccp_feat_default_rx_ccid; +extern int dccp_feat_default_tx_ccid; +extern int dccp_feat_default_ack_ratio; +extern int dccp_feat_default_send_ack_vector; +extern int dccp_feat_default_send_ndp_count; + static struct ctl_table dccp_default_table[] = { { .ctl_name = NET_DCCP_DEFAULT_SEQ_WINDOW, diff --git a/trunk/net/decnet/Kconfig b/trunk/net/decnet/Kconfig index 36e72cb145b0..92f2ec46fd22 100644 --- a/trunk/net/decnet/Kconfig +++ b/trunk/net/decnet/Kconfig @@ -27,7 +27,6 @@ config DECNET config DECNET_ROUTER bool "DECnet: router support (EXPERIMENTAL)" depends on DECNET && EXPERIMENTAL - select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you diff --git a/trunk/net/decnet/af_decnet.c b/trunk/net/decnet/af_decnet.c index 70e027375682..5486247735f6 100644 --- a/trunk/net/decnet/af_decnet.c +++ b/trunk/net/decnet/af_decnet.c @@ -130,7 +130,6 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include #include #include #include diff --git a/trunk/net/decnet/dn_dev.c b/trunk/net/decnet/dn_dev.c index 01861feb608d..476455fbdb03 100644 --- a/trunk/net/decnet/dn_dev.c +++ b/trunk/net/decnet/dn_dev.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -46,7 +45,6 @@ #include #include #include -#include #include #include #include @@ -746,23 +744,20 @@ static int dn_dev_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, static void rtmsg_ifa(int event, struct dn_ifaddr *ifa) { struct sk_buff *skb; - int payload = sizeof(struct ifaddrmsg) + 128; - int err = -ENOBUFS; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg)+128); - skb = alloc_skb(nlmsg_total_size(payload), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) { + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, ENOBUFS); + return; + } + if (dn_dev_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_DECnet_IFADDR, EINVAL); + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_DECnet_IFADDR, NULL, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_IFADDR, err); + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_DECnet_IFADDR, GFP_KERNEL); } static int dn_dev_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1422,6 +1417,8 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, + [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, + [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, diff --git a/trunk/net/decnet/dn_fib.c b/trunk/net/decnet/dn_fib.c index 1cf010124ec5..fa20e2efcfc1 100644 --- a/trunk/net/decnet/dn_fib.c +++ b/trunk/net/decnet/dn_fib.c @@ -34,7 +34,6 @@ #include #include #include -#include #include #include #include @@ -55,9 +54,11 @@ #define endfor_nexthops(fi) } +extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); + static DEFINE_SPINLOCK(dn_fib_multipath_lock); static struct dn_fib_info *dn_fib_info_list; -static DEFINE_SPINLOCK(dn_fib_info_lock); +static DEFINE_RWLOCK(dn_fib_info_lock); static struct { @@ -78,9 +79,6 @@ static struct [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, }; -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); -static int dn_fib_sync_up(struct net_device *dev); - void dn_fib_free_info(struct dn_fib_info *fi) { if (fi->fib_dead == 0) { @@ -98,7 +96,7 @@ void dn_fib_free_info(struct dn_fib_info *fi) void dn_fib_release_info(struct dn_fib_info *fi) { - spin_lock(&dn_fib_info_lock); + write_lock(&dn_fib_info_lock); if (fi && --fi->fib_treeref == 0) { if (fi->fib_next) fi->fib_next->fib_prev = fi->fib_prev; @@ -109,7 +107,7 @@ void dn_fib_release_info(struct dn_fib_info *fi) fi->fib_dead = 1; dn_fib_info_put(fi); } - spin_unlock(&dn_fib_info_lock); + write_unlock(&dn_fib_info_lock); } static inline int dn_fib_nh_comp(const struct dn_fib_info *fi, const struct dn_fib_info *ofi) @@ -380,13 +378,13 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct dn_kern_rta fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - spin_lock(&dn_fib_info_lock); + write_lock(&dn_fib_info_lock); fi->fib_next = dn_fib_info_list; fi->fib_prev = NULL; if (dn_fib_info_list) dn_fib_info_list->fib_prev = fi; dn_fib_info_list = fi; - spin_unlock(&dn_fib_info_lock); + write_unlock(&dn_fib_info_lock); return fi; err_inval: @@ -492,8 +490,7 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) + if (i != RTA_MULTIPATH && i != RTA_METRICS) rta[i-1] = (struct rtattr *)RTA_DATA(attr); } } @@ -510,7 +507,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); + tb = dn_fib_get_table(r->rtm_table, 0); if (tb) return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); @@ -526,13 +523,46 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); + tb = dn_fib_get_table(r->rtm_table, 1); if (tb) return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); return -ENOBUFS; } + +int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + int t; + int s_t; + struct dn_fib_table *tb; + + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + return dn_cache_dump(skb, cb); + + s_t = cb->args[0]; + if (s_t == 0) + s_t = cb->args[0] = RT_MIN_TABLE; + + for(t = s_t; t <= RT_TABLE_MAX; t++) { + if (t < s_t) + continue; + if (t > s_t) + memset(&cb->args[1], 0, + sizeof(cb->args) - sizeof(cb->args[0])); + tb = dn_fib_get_table(t, 0); + if (tb == NULL) + continue; + if (tb->dump(tb, skb, cb) < 0) + break; + } + + cb->args[0] = t; + + return skb->len; +} + static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) { struct dn_fib_table *tb; @@ -652,7 +682,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) +int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -696,7 +726,7 @@ static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) } -static int dn_fib_sync_up(struct net_device *dev) +int dn_fib_sync_up(struct net_device *dev) { int ret = 0; @@ -730,6 +760,22 @@ static int dn_fib_sync_up(struct net_device *dev) return ret; } +void dn_fib_flush(void) +{ + int flushed = 0; + struct dn_fib_table *tb; + int id; + + for(id = RT_TABLE_MAX; id > 0; id--) { + if ((tb = dn_fib_get_table(id, 0)) == NULL) + continue; + flushed += tb->flush(tb); + } + + if (flushed) + dn_rt_cache_flush(-1); +} + static struct notifier_block dn_fib_dnaddr_notifier = { .notifier_call = dn_fib_dnaddr_event, }; diff --git a/trunk/net/decnet/dn_nsp_in.c b/trunk/net/decnet/dn_nsp_in.c index 72ecc6e62ec4..86f7f3b28e70 100644 --- a/trunk/net/decnet/dn_nsp_in.c +++ b/trunk/net/decnet/dn_nsp_in.c @@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb); + err = sk_filter(sk, skb, 0); if (err) goto out; diff --git a/trunk/net/decnet/dn_route.c b/trunk/net/decnet/dn_route.c index dd0761e3d280..743e9fcf7c5a 100644 --- a/trunk/net/decnet/dn_route.c +++ b/trunk/net/decnet/dn_route.c @@ -80,7 +80,6 @@ #include #include #include -#include #include #include #include @@ -1285,7 +1284,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = fl.fld_src; /* no NAT support for now */ + src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { @@ -1486,7 +1485,6 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 0; r->rtm_tos = 0; r->rtm_table = RT_TABLE_MAIN; - RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; r->rtm_scope = RT_SCOPE_UNIVERSE; @@ -1611,7 +1609,9 @@ int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) goto out_free; } - return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + + return err; out_free: kfree_skb(skb); @@ -1781,9 +1781,14 @@ void __init dn_route_init(void) { int i, goal, order; - dn_dst_ops.kmem_cachep = - kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", + sizeof(struct dn_route), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + if (!dn_dst_ops.kmem_cachep) + panic("DECnet: Failed to allocate dn_dst_cache\n"); + init_timer(&dn_route_timer); dn_route_timer.function = dn_dst_check_expire; dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; diff --git a/trunk/net/decnet/dn_rules.c b/trunk/net/decnet/dn_rules.c index 3e0c882c90bf..6986be754ef2 100644 --- a/trunk/net/decnet/dn_rules.c +++ b/trunk/net/decnet/dn_rules.c @@ -11,213 +11,259 @@ * * * Changes: - * Steve Whitehouse - * Updated for Thomas Graf's generic rules * */ +#include #include +#include +#include #include +#include #include #include +#include #include +#include #include +#include #include #include +#include +#include #include #include #include -#include #include #include #include #include -static struct fib_rules_ops dn_fib_rules_ops; - struct dn_fib_rule { - struct fib_rule common; - unsigned char dst_len; - unsigned char src_len; - __le16 src; - __le16 srcmask; - __le16 dst; - __le16 dstmask; - __le16 srcmap; - u8 flags; + struct hlist_node r_hlist; + atomic_t r_clntref; + u32 r_preference; + unsigned char r_table; + unsigned char r_action; + unsigned char r_dst_len; + unsigned char r_src_len; + __le16 r_src; + __le16 r_srcmask; + __le16 r_dst; + __le16 r_dstmask; + __le16 r_srcmap; + u8 r_flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; + u32 r_fwmark; #endif + int r_ifindex; + char r_ifname[IFNAMSIZ]; + int r_dead; + struct rcu_head rcu; }; static struct dn_fib_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7fff, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, + .r_clntref = ATOMIC_INIT(2), + .r_preference = 0x7fff, + .r_table = RT_TABLE_MAIN, + .r_action = RTN_UNICAST }; -static LIST_HEAD(dn_fib_rules); - +static struct hlist_head dn_fib_rules; -int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) +int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct fib_lookup_arg arg = { - .result = res, - }; - int err; - - err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); - res->r = arg.rule; + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct dn_fib_rule *r; + struct hlist_node *node; + int err = -ESRCH; + + hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { + if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && + rtm->rtm_src_len == r->r_src_len && + rtm->rtm_dst_len == r->r_dst_len && + (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && +#ifdef CONFIG_DECNET_ROUTE_FWMARK + (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && +#endif + (!rtm->rtm_type || rtm->rtm_type == r->r_action) && + (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && + (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && + (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { + + err = -EPERM; + if (r == &default_rule) + break; + + hlist_del_rcu(&r->r_hlist); + r->r_dead = 1; + dn_fib_rule_put(r); + err = 0; + break; + } + } return err; } -static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) +static inline void dn_fib_rule_put_rcu(struct rcu_head *head) { - int err = -EAGAIN; - struct dn_fib_table *tbl; - - switch(rule->action) { - case FR_ACT_TO_TBL: - break; - - case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; - - case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; + struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); + kfree(r); +} - case FR_ACT_BLACKHOLE: - default: - err = -EINVAL; - goto errout; +void dn_fib_rule_put(struct dn_fib_rule *r) +{ + if (atomic_dec_and_test(&r->r_clntref)) { + if (r->r_dead) + call_rcu(&r->rcu, dn_fib_rule_put_rcu); + else + printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); } - - tbl = dn_fib_get_table(rule->table, 0); - if (tbl == NULL) - goto errout; - - err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); - if (err > 0) - err = -EAGAIN; -errout: - return err; } -static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, - [FRA_SRC] = { .type = NLA_U16 }, - [FRA_DST] = { .type = NLA_U16 }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, -}; -static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - u16 daddr = fl->fld_dst; - u16 saddr = fl->fld_src; - - if (((saddr ^ r->src) & r->srcmask) || - ((daddr ^ r->dst) & r->dstmask)) - return 0; + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct dn_fib_rule *r, *new_r, *last = NULL; + struct hlist_node *node = NULL; + unsigned char table_id; + + if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) + return -EINVAL; + + if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) + return -EINVAL; + + if (rtm->rtm_type == RTN_NAT) + return -EINVAL; + + table_id = rtm->rtm_table; + if (table_id == RT_TABLE_UNSPEC) { + struct dn_fib_table *tb; + if (rtm->rtm_type == RTN_UNICAST) { + if ((tb = dn_fib_empty_table()) == NULL) + return -ENOBUFS; + table_id = tb->n; + } + } + new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); + if (!new_r) + return -ENOMEM; + + if (rta[RTA_SRC-1]) + memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); + if (rta[RTA_DST-1]) + memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); + if (rta[RTA_GATEWAY-1]) + memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); + new_r->r_src_len = rtm->rtm_src_len; + new_r->r_dst_len = rtm->rtm_dst_len; + new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); + new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if ((r->fwmark ^ fl->fld_fwmark) & r->fwmask) - return 0; + if (rta[RTA_PROTOINFO-1]) + memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); #endif - - return 1; -} - -static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - int err = -EINVAL; - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - - if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) - goto errout; - - if (rule->table == RT_TABLE_UNSPEC) { - if (rule->action == FR_ACT_TO_TBL) { - struct dn_fib_table *table; - - table = dn_fib_empty_table(); - if (table == NULL) { - err = -ENOBUFS; - goto errout; - } - - rule->table = table->n; + new_r->r_action = rtm->rtm_type; + new_r->r_flags = rtm->rtm_flags; + if (rta[RTA_PRIORITY-1]) + memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + new_r->r_table = table_id; + if (rta[RTA_IIF-1]) { + struct net_device *dev; + rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); + new_r->r_ifindex = -1; + dev = dev_get_by_name(new_r->r_ifname); + if (dev) { + new_r->r_ifindex = dev->ifindex; + dev_put(dev); } } - if (tb[FRA_SRC]) - r->src = nla_get_u16(tb[FRA_SRC]); - - if (tb[FRA_DST]) - r->dst = nla_get_u16(tb[FRA_DST]); - -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - r->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (r->fwmark) - /* compatibility: if the mark value is non-zero all bits - * are compared unless a mask is explicitly specified. - */ - r->fwmask = 0xFFFFFFFF; + r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); + if (!new_r->r_preference) { + if (r && r->r_hlist.next != NULL) { + r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); + if (r->r_preference) + new_r->r_preference = r->r_preference - 1; + } } - if (tb[FRA_FWMASK]) - r->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif + hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { + if (r->r_preference > new_r->r_preference) + break; + last = r; + } + atomic_inc(&new_r->r_clntref); - r->src_len = frh->src_len; - r->srcmask = dnet_make_mask(r->src_len); - r->dst_len = frh->dst_len; - r->dstmask = dnet_make_mask(r->dst_len); - err = 0; -errout: - return err; + if (last) + hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); + else + hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); + return 0; } -static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; - if (frh->src_len && (r->src_len != frh->src_len)) - return 0; +int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) +{ + struct dn_fib_rule *r, *policy; + struct dn_fib_table *tb; + __le16 saddr = flp->fld_src; + __le16 daddr = flp->fld_dst; + struct hlist_node *node; + int err; - if (frh->dst_len && (r->dst_len != frh->dst_len)) - return 0; + rcu_read_lock(); + hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { + if (((saddr^r->r_src) & r->r_srcmask) || + ((daddr^r->r_dst) & r->r_dstmask) || #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (r->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; + (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || #endif + (r->r_ifindex && r->r_ifindex != flp->iif)) + continue; + + switch(r->r_action) { + case RTN_UNICAST: + case RTN_NAT: + policy = r; + break; + case RTN_UNREACHABLE: + rcu_read_unlock(); + return -ENETUNREACH; + default: + case RTN_BLACKHOLE: + rcu_read_unlock(); + return -EINVAL; + case RTN_PROHIBIT: + rcu_read_unlock(); + return -EACCES; + } - if (tb[FRA_SRC] && (r->src != nla_get_u16(tb[FRA_SRC]))) - return 0; - - if (tb[FRA_DST] && (r->dst != nla_get_u16(tb[FRA_DST]))) - return 0; + if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) + continue; + err = tb->lookup(tb, flp, res); + if (err == 0) { + res->r = policy; + if (policy) + atomic_inc(&policy->r_clntref); + rcu_read_unlock(); + return 0; + } + if (err < 0 && err != -EAGAIN) { + rcu_read_unlock(); + return err; + } + } - return 1; + rcu_read_unlock(); + return -ESRCH; } unsigned dnet_addr_type(__le16 addr) @@ -225,7 +271,7 @@ unsigned dnet_addr_type(__le16 addr) struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); + struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; res.r = NULL; @@ -238,79 +284,142 @@ unsigned dnet_addr_type(__le16 addr) return ret; } -static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) { - struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + struct dn_fib_rule *r = res->r; - frh->family = AF_DECnet; - frh->dst_len = r->dst_len; - frh->src_len = r->src_len; - frh->tos = 0; + if (r->r_action == RTN_NAT) { + int addrtype = dnet_addr_type(r->r_srcmap); -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); - if (r->fwmask || r->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, r->fwmask); -#endif - if (r->dst_len) - NLA_PUT_U16(skb, FRA_DST, r->dst); - if (r->src_len) - NLA_PUT_U16(skb, FRA_SRC, r->src); + if (addrtype == RTN_NAT) { + saddr = (saddr&~r->r_srcmask)|r->r_srcmap; + *flags |= RTCF_SNAT; + } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { + saddr = r->r_srcmap; + *flags |= RTCF_MASQ; + } + } + return saddr; +} - return 0; +static void dn_fib_rules_detach(struct net_device *dev) +{ + struct hlist_node *node; + struct dn_fib_rule *r; -nla_put_failure: - return -ENOBUFS; + hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { + if (r->r_ifindex == dev->ifindex) + r->r_ifindex = -1; + } } -static u32 dn_fib_rule_default_pref(void) +static void dn_fib_rules_attach(struct net_device *dev) { - struct list_head *pos; - struct fib_rule *rule; - - if (!list_empty(&dn_fib_rules)) { - pos = dn_fib_rules.next; - if (pos->next != &dn_fib_rules) { - rule = list_entry(pos->next, struct fib_rule, list); - if (rule->pref) - return rule->pref - 1; - } - } + struct hlist_node *node; + struct dn_fib_rule *r; - return 0; + hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { + if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) + r->r_ifindex = dev->ifindex; + } } -int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) { - return fib_rules_dump(skb, cb, AF_DECnet); + struct net_device *dev = ptr; + + switch(event) { + case NETDEV_UNREGISTER: + dn_fib_rules_detach(dev); + dn_fib_sync_down(0, dev, 1); + case NETDEV_REGISTER: + dn_fib_rules_attach(dev); + dn_fib_sync_up(dev); + } + + return NOTIFY_DONE; } -static struct fib_rules_ops dn_fib_rules_ops = { - .family = AF_DECnet, - .rule_size = sizeof(struct dn_fib_rule), - .action = dn_fib_rule_action, - .match = dn_fib_rule_match, - .configure = dn_fib_rule_configure, - .compare = dn_fib_rule_compare, - .fill = dn_fib_rule_fill, - .default_pref = dn_fib_rule_default_pref, - .nlgroup = RTNLGRP_DECnet_RULE, - .policy = dn_fib_rule_policy, - .rules_list = &dn_fib_rules, - .owner = THIS_MODULE, + +static struct notifier_block dn_fib_rules_notifier = { + .notifier_call = dn_fib_rules_event, }; +static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, + struct netlink_callback *cb, unsigned int flags) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + + nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_DECnet; + rtm->rtm_dst_len = r->r_dst_len; + rtm->rtm_src_len = r->r_src_len; + rtm->rtm_tos = 0; +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (r->r_fwmark) + RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); +#endif + rtm->rtm_table = r->r_table; + rtm->rtm_protocol = 0; + rtm->rtm_scope = 0; + rtm->rtm_type = r->r_action; + rtm->rtm_flags = r->r_flags; + + if (r->r_dst_len) + RTA_PUT(skb, RTA_DST, 2, &r->r_dst); + if (r->r_src_len) + RTA_PUT(skb, RTA_SRC, 2, &r->r_src); + if (r->r_ifname[0]) + RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); + if (r->r_preference) + RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); + if (r->r_srcmap) + RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +{ + int idx = 0; + int s_idx = cb->args[0]; + struct dn_fib_rule *r; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { + if (idx < s_idx) + goto next; + if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) + break; +next: + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + + return skb->len; +} + void __init dn_fib_rules_init(void) { - list_add_tail(&default_rule.common.list, &dn_fib_rules); - fib_rules_register(&dn_fib_rules_ops); + INIT_HLIST_HEAD(&dn_fib_rules); + hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); + register_netdevice_notifier(&dn_fib_rules_notifier); } void __exit dn_fib_rules_cleanup(void) { - fib_rules_unregister(&dn_fib_rules_ops); + unregister_netdevice_notifier(&dn_fib_rules_notifier); } diff --git a/trunk/net/decnet/dn_table.c b/trunk/net/decnet/dn_table.c index 317904bb5896..e926c952e363 100644 --- a/trunk/net/decnet/dn_table.c +++ b/trunk/net/decnet/dn_table.c @@ -30,7 +30,6 @@ #include #include #include -#include #include #include #include @@ -75,9 +74,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) #define RT_TABLE_MIN 1 -#define DN_FIB_TABLE_HASHSZ 256 -static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; + static DEFINE_RWLOCK(dn_fib_tables_lock); +struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; @@ -264,7 +263,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern } static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; @@ -278,7 +277,6 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = 0; rtm->rtm_table = tb_id; - RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_type = type; @@ -328,29 +326,29 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, } -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, +static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; u32 pid = req ? req->pid : 0; - int err = -ENOBUFS; + int size = NLMSG_SPACE(sizeof(struct rtmsg) + 256); - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto errout; + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, - f->fn_type, f->fn_scope, &f->fn_key, z, - DN_FIB_INFO(f), 0); - if (err < 0) { + if (dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + f->fn_type, f->fn_scope, &f->fn_key, z, + DN_FIB_INFO(f), 0) < 0) { kfree_skb(skb); - goto errout; + return; } - - err = rtnl_notify(skb, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_DECnet_ROUTE, err); + NETLINK_CB(skb).dst_group = RTNLGRP_DECnet_ROUTE; + if (nlh->nlmsg_flags & NLM_F_ECHO) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_DECnet_ROUTE, GFP_KERNEL); + if (nlh->nlmsg_flags & NLM_F_ECHO) + netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, @@ -361,7 +359,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, { int i, s_i; - s_i = cb->args[4]; + s_i = cb->args[3]; for(i = 0; f; i++, f = f->fn_next) { if (i < s_i) continue; @@ -374,11 +372,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, &f->fn_key, dz->dz_order, f->fn_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; + cb->args[3] = i; return -1; } } - cb->args[4] = i; + cb->args[3] = i; return skb->len; } @@ -389,20 +387,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, { int h, s_h; - s_h = cb->args[3]; + s_h = cb->args[2]; for(h = 0; h < dz->dz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) continue; if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[3] = h; + cb->args[2] = h; return -1; } } - cb->args[3] = h; + cb->args[2] = h; return skb->len; } @@ -413,63 +411,26 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, struct dn_zone *dz; struct dn_hash *table = (struct dn_hash *)tb->data; - s_m = cb->args[2]; + s_m = cb->args[1]; read_lock(&dn_fib_tables_lock); for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[2] = m; + cb->args[1] = m; read_unlock(&dn_fib_tables_lock); return -1; } } read_unlock(&dn_fib_tables_lock); - cb->args[2] = m; + cb->args[1] = m; return skb->len; } -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - unsigned int h, s_h; - unsigned int e = 0, s_e; - struct dn_fib_table *tb; - struct hlist_node *node; - int dumped = 0; - - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_h = cb->args[0]; - s_e = cb->args[1]; - - for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { - e = 0; - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { - if (e < s_e) - goto next; - if (dumped) - memset(&cb->args[2], 0, sizeof(cb->args) - - 2 * sizeof(cb->args[0])); - if (tb->dump(tb, skb, cb) < 0) - goto out; - dumped = 1; -next: - e++; - } - } -out: - cb->args[1] = e; - cb->args[0] = h; - - return skb->len; -} - static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; @@ -778,11 +739,9 @@ static int dn_fib_table_lookup(struct dn_fib_table *tb, const struct flowi *flp, } -struct dn_fib_table *dn_fib_get_table(u32 n, int create) +struct dn_fib_table *dn_fib_get_table(int n, int create) { struct dn_fib_table *t; - struct hlist_node *node; - unsigned int h; if (n < RT_TABLE_MIN) return NULL; @@ -790,15 +749,8 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) if (n > RT_TABLE_MAX) return NULL; - h = n & (DN_FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); - hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { - if (t->n == n) { - rcu_read_unlock(); - return t; - } - } - rcu_read_unlock(); + if (dn_fib_tables[n]) + return dn_fib_tables[n]; if (!create) return NULL; @@ -819,35 +771,31 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) t->flush = dn_fib_table_flush; t->dump = dn_fib_table_dump; memset(t->data, 0, sizeof(struct dn_hash)); - hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); + dn_fib_tables[n] = t; return t; } -struct dn_fib_table *dn_fib_empty_table(void) +static void dn_fib_del_tree(int n) { - u32 id; + struct dn_fib_table *t; - for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_get_table(id, 0) == NULL) - return dn_fib_get_table(id, 1); - return NULL; + write_lock(&dn_fib_tables_lock); + t = dn_fib_tables[n]; + dn_fib_tables[n] = NULL; + write_unlock(&dn_fib_tables_lock); + + kfree(t); } -void dn_fib_flush(void) +struct dn_fib_table *dn_fib_empty_table(void) { - int flushed = 0; - struct dn_fib_table *tb; - struct hlist_node *node; - unsigned int h; - - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) - flushed += tb->flush(tb); - } + int id; - if (flushed) - dn_rt_cache_flush(-1); + for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) + if (dn_fib_tables[id] == NULL) + return dn_fib_get_table(id, 1); + return NULL; } void __init dn_fib_table_init(void) @@ -860,17 +808,10 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { - struct dn_fib_table *t; - struct hlist_node *node, *next; - unsigned int h; + int i; - write_lock(&dn_fib_tables_lock); - for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], - hlist) { - hlist_del(&t->hlist); - kfree(t); - } - } - write_unlock(&dn_fib_tables_lock); + for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) + dn_fib_del_tree(i); + + return; } diff --git a/trunk/net/ethernet/eth.c b/trunk/net/ethernet/eth.c index 43863933f27f..387c71c584ee 100644 --- a/trunk/net/ethernet/eth.c +++ b/trunk/net/ethernet/eth.c @@ -64,79 +64,81 @@ __setup("ether=", netdev_boot_setup); -/** - * eth_header - create the Ethernet header - * @skb: buffer to alter - * @dev: source device - * @type: Ethernet type field - * @daddr: destination address (NULL leave destination address) - * @saddr: source address (NULL use device source address) - * @len: packet length (<= skb->len) - * +/* + * Create the Ethernet MAC header for an arbitrary protocol layer * - * Set the protocol type. For a packet of type ETH_P_802_3 we put the length - * in here instead. It is up to the 802.2 layer to carry protocol information. + * saddr=NULL means use device source address + * daddr=NULL means leave destination address (eg unresolved arp) */ + int eth_header(struct sk_buff *skb, struct net_device *dev, unsigned short type, - void *daddr, void *saddr, unsigned len) + void *daddr, void *saddr, unsigned len) { - struct ethhdr *eth = (struct ethhdr *)skb_push(skb, ETH_HLEN); + struct ethhdr *eth = (struct ethhdr *)skb_push(skb,ETH_HLEN); - if (type != ETH_P_802_3) + /* + * Set the protocol type. For a packet of type ETH_P_802_3 we put the length + * in here instead. It is up to the 802.2 layer to carry protocol information. + */ + + if(type!=ETH_P_802_3) eth->h_proto = htons(type); else eth->h_proto = htons(len); /* - * Set the source hardware address. + * Set the source hardware address. */ - - if (!saddr) + + if(!saddr) saddr = dev->dev_addr; - memcpy(eth->h_source, saddr, dev->addr_len); + memcpy(eth->h_source,saddr,dev->addr_len); - if (daddr) { - memcpy(eth->h_dest, daddr, dev->addr_len); + if(daddr) + { + memcpy(eth->h_dest,daddr,dev->addr_len); return ETH_HLEN; } - + /* - * Anyway, the loopback-device should never use this function... + * Anyway, the loopback-device should never use this function... */ - if (dev->flags & (IFF_LOOPBACK | IFF_NOARP)) { + if (dev->flags & (IFF_LOOPBACK|IFF_NOARP)) + { memset(eth->h_dest, 0, dev->addr_len); return ETH_HLEN; } - + return -ETH_HLEN; } -/** - * eth_rebuild_header- rebuild the Ethernet MAC header. - * @skb: socket buffer to update - * - * This is called after an ARP or IPV6 ndisc it's resolution on this - * sk_buff. We now let protocol (ARP) fill in the other fields. + +/* + * Rebuild the Ethernet MAC header. This is called after an ARP + * (or in future other address resolution) has completed on this + * sk_buff. We now let ARP fill in the other fields. * - * This routine CANNOT use cached dst->neigh! - * Really, it is used only when dst->neigh is wrong. + * This routine CANNOT use cached dst->neigh! + * Really, it is used only when dst->neigh is wrong. */ + int eth_rebuild_header(struct sk_buff *skb) { struct ethhdr *eth = (struct ethhdr *)skb->data; struct net_device *dev = skb->dev; - switch (eth->h_proto) { + switch (eth->h_proto) + { #ifdef CONFIG_INET case __constant_htons(ETH_P_IP): - return arp_find(eth->h_dest, skb); -#endif + return arp_find(eth->h_dest, skb); +#endif default: printk(KERN_DEBUG - "%s: unable to resolve type %X addresses.\n", + "%s: unable to resolve type %X addresses.\n", dev->name, (int)eth->h_proto); - + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); break; } @@ -144,70 +146,62 @@ int eth_rebuild_header(struct sk_buff *skb) return 0; } -/** - * eth_type_trans - determine the packet's protocol ID. - * @skb: received socket data - * @dev: receiving network device - * - * The rule here is that we - * assume 802.3 if the type field is short enough to be a length. - * This is normal practice and works for any 'now in use' protocol. + +/* + * Determine the packet's protocol ID. The rule here is that we + * assume 802.3 if the type field is short enough to be a length. + * This is normal practice and works for any 'now in use' protocol. */ + __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev) { struct ethhdr *eth; unsigned char *rawp; - + skb->mac.raw = skb->data; - skb_pull(skb, ETH_HLEN); + skb_pull(skb,ETH_HLEN); eth = eth_hdr(skb); - + if (is_multicast_ether_addr(eth->h_dest)) { if (!compare_ether_addr(eth->h_dest, dev->broadcast)) skb->pkt_type = PACKET_BROADCAST; else skb->pkt_type = PACKET_MULTICAST; } - + /* - * This ALLMULTI check should be redundant by 1.4 - * so don't forget to remove it. + * This ALLMULTI check should be redundant by 1.4 + * so don't forget to remove it. * - * Seems, you forgot to remove it. All silly devices - * seems to set IFF_PROMISC. + * Seems, you forgot to remove it. All silly devices + * seems to set IFF_PROMISC. */ - - else if (1 /*dev->flags&IFF_PROMISC */ ) { + + else if(1 /*dev->flags&IFF_PROMISC*/) { if (unlikely(compare_ether_addr(eth->h_dest, dev->dev_addr))) skb->pkt_type = PACKET_OTHERHOST; } - + if (ntohs(eth->h_proto) >= 1536) return eth->h_proto; - + rawp = skb->data; - + /* - * This is a magic hack to spot IPX packets. Older Novell breaks - * the protocol design and runs IPX over 802.3 without an 802.2 LLC - * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This - * won't work for fault tolerant netware but does for the rest. + * This is a magic hack to spot IPX packets. Older Novell breaks + * the protocol design and runs IPX over 802.3 without an 802.2 LLC + * layer. We look for FFFF which isn't a used 802.2 SSAP/DSAP. This + * won't work for fault tolerant netware but does for the rest. */ if (*(unsigned short *)rawp == 0xFFFF) return htons(ETH_P_802_3); - + /* - * Real 802.2 LLC + * Real 802.2 LLC */ return htons(ETH_P_802_2); } -EXPORT_SYMBOL(eth_type_trans); -/** - * eth_header_parse - extract hardware address from packet - * @skb: packet to extract header from - * @haddr: destination buffer - */ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) { struct ethhdr *eth = eth_hdr(skb); @@ -215,20 +209,14 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) return ETH_ALEN; } -/** - * eth_header_cache - fill cache entry from neighbour - * @neigh: source neighbour - * @hh: destination cache entry - * Create an Ethernet header template from the neighbour. - */ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) { unsigned short type = hh->hh_type; struct ethhdr *eth; struct net_device *dev = neigh->dev; - eth = (struct ethhdr *) - (((u8 *) hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); + eth = (struct ethhdr*) + (((u8*)hh->hh_data) + (HH_DATA_OFF(sizeof(*eth)))); if (type == __constant_htons(ETH_P_802_3)) return -1; @@ -240,47 +228,27 @@ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) return 0; } -/** - * eth_header_cache_update - update cache entry - * @hh: destination cache entry - * @dev: network device - * @haddr: new hardware address - * +/* * Called by Address Resolution module to notify changes in address. */ -void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, - unsigned char *haddr) + +void eth_header_cache_update(struct hh_cache *hh, struct net_device *dev, unsigned char * haddr) { - memcpy(((u8 *) hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), + memcpy(((u8*)hh->hh_data) + HH_DATA_OFF(sizeof(struct ethhdr)), haddr, dev->addr_len); } -/** - * eth_mac_addr - set new Ethernet hardware address - * @dev: network device - * @p: socket address - * Change hardware address of device. - * - * This doesn't change hardware matching, so needs to be overridden - * for most real devices. - */ +EXPORT_SYMBOL(eth_type_trans); + static int eth_mac_addr(struct net_device *dev, void *p) { - struct sockaddr *addr = p; + struct sockaddr *addr=p; if (netif_running(dev)) return -EBUSY; - memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + memcpy(dev->dev_addr, addr->sa_data,dev->addr_len); return 0; } -/** - * eth_change_mtu - set new MTU size - * @dev: network device - * @new_mtu: new Maximum Transfer Unit - * - * Allow changing MTU size. Needs to be overridden for devices - * supporting jumbo frames. - */ static int eth_change_mtu(struct net_device *dev, int new_mtu) { if (new_mtu < 68 || new_mtu > ETH_DATA_LEN) @@ -289,10 +257,8 @@ static int eth_change_mtu(struct net_device *dev, int new_mtu) return 0; } -/** - * ether_setup - setup Ethernet network device - * @dev: network device - * Fill in the fields of the device structure with Ethernet-generic values. +/* + * Fill in the fields of the device structure with ethernet-generic values. */ void ether_setup(struct net_device *dev) { @@ -311,21 +277,21 @@ void ether_setup(struct net_device *dev) dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; - memset(dev->broadcast, 0xFF, ETH_ALEN); + memset(dev->broadcast,0xFF, ETH_ALEN); } EXPORT_SYMBOL(ether_setup); /** - * alloc_etherdev - Allocates and sets up an Ethernet device + * alloc_etherdev - Allocates and sets up an ethernet device * @sizeof_priv: Size of additional driver-private structure to be allocated - * for this Ethernet device + * for this ethernet device * - * Fill in the fields of the device structure with Ethernet-generic + * Fill in the fields of the device structure with ethernet-generic * values. Basically does everything except registering the device. * * Constructs a new net device, complete with a private data area of - * size (sizeof_priv). A 32-byte (not bit) alignment is enforced for + * size @sizeof_priv. A 32-byte (not bit) alignment is enforced for * this private data area. */ diff --git a/trunk/net/ipv4/Kconfig b/trunk/net/ipv4/Kconfig index 1650b64415aa..3b5d504a74be 100644 --- a/trunk/net/ipv4/Kconfig +++ b/trunk/net/ipv4/Kconfig @@ -88,7 +88,6 @@ config IP_FIB_HASH config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER - select FIB_RULES ---help--- Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, diff --git a/trunk/net/ipv4/Makefile b/trunk/net/ipv4/Makefile index f66049e28aeb..4878fc5be85f 100644 --- a/trunk/net/ipv4/Makefile +++ b/trunk/net/ipv4/Makefile @@ -47,7 +47,6 @@ obj-$(CONFIG_TCP_CONG_VEGAS) += tcp_vegas.o obj-$(CONFIG_TCP_CONG_VENO) += tcp_veno.o obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o -obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ xfrm4_output.o diff --git a/trunk/net/ipv4/af_inet.c b/trunk/net/ipv4/af_inet.c index fdd89e37b9aa..c84a32070f8d 100644 --- a/trunk/net/ipv4/af_inet.c +++ b/trunk/net/ipv4/af_inet.c @@ -67,6 +67,7 @@ * 2 of the License, or (at your option) any later version. */ +#include #include #include #include @@ -391,7 +392,7 @@ int inet_release(struct socket *sock) } /* It is off by default, see below. */ -int sysctl_ip_nonlocal_bind __read_mostly; +int sysctl_ip_nonlocal_bind; int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { @@ -987,7 +988,7 @@ void inet_unregister_protosw(struct inet_protosw *p) * Shall we try to damage output packets if routing dev changes? */ -int sysctl_ip_dynaddr __read_mostly; +int sysctl_ip_dynaddr; static int inet_sk_reselect_saddr(struct sock *sk) { @@ -1073,7 +1074,6 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; - security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) @@ -1254,7 +1254,10 @@ static int __init inet_init(void) struct list_head *r; int rc = -EINVAL; - BUILD_BUG_ON(sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)); + if (sizeof(struct inet_skb_parm) > sizeof(dummy_skb->cb)) { + printk(KERN_CRIT "%s: panic\n", __FUNCTION__); + goto out; + } rc = proto_register(&tcp_prot, 1); if (rc) diff --git a/trunk/net/ipv4/ah4.c b/trunk/net/ipv4/ah4.c index 99542977e47e..2b98943e6b02 100644 --- a/trunk/net/ipv4/ah4.c +++ b/trunk/net/ipv4/ah4.c @@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case IPOPT_CIPSO: + case 0x86: /* Another "Commercial Security" crap. */ case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; @@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/trunk/net/ipv4/cipso_ipv4.c b/trunk/net/ipv4/cipso_ipv4.c deleted file mode 100644 index 80a2a0911b49..000000000000 --- a/trunk/net/ipv4/cipso_ipv4.c +++ /dev/null @@ -1,1607 +0,0 @@ -/* - * CIPSO - Commercial IP Security Option - * - * This is an implementation of the CIPSO 2.2 protocol as specified in - * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in - * FIPS-188, copies of both documents can be found in the Documentation - * directory. While CIPSO never became a full IETF RFC standard many vendors - * have chosen to adopt the protocol and over the years it has become a - * de-facto standard for labeled networking. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -struct cipso_v4_domhsh_entry { - char *domain; - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - -/* List of available DOI definitions */ -/* XXX - Updates should be minimal so having a single lock for the - * cipso_v4_doi_list and the cipso_v4_doi_list->dom_list should be - * okay. */ -/* XXX - This currently assumes a minimal number of different DOIs in use, - * if in practice there are a lot of different DOIs this list should - * probably be turned into a hash table or something similar so we - * can do quick lookups. */ -static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); -static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); - -/* Label mapping cache */ -int cipso_v4_cache_enabled = 1; -int cipso_v4_cache_bucketsize = 10; -#define CIPSO_V4_CACHE_BUCKETBITS 7 -#define CIPSO_V4_CACHE_BUCKETS (1 << CIPSO_V4_CACHE_BUCKETBITS) -#define CIPSO_V4_CACHE_REORDERLIMIT 10 -struct cipso_v4_map_cache_bkt { - spinlock_t lock; - u32 size; - struct list_head list; -}; -struct cipso_v4_map_cache_entry { - u32 hash; - unsigned char *key; - size_t key_len; - - struct netlbl_lsm_cache lsm_data; - - u32 activity; - struct list_head list; -}; -static struct cipso_v4_map_cache_bkt *cipso_v4_cache = NULL; - -/* Restricted bitmap (tag #1) flags */ -int cipso_v4_rbm_optfmt = 0; -int cipso_v4_rbm_strictvalid = 1; - -/* - * Helper Functions - */ - -/** - * cipso_v4_bitmap_walk - Walk a bitmap looking for a bit - * @bitmap: the bitmap - * @bitmap_len: length in bits - * @offset: starting offset - * @state: if non-zero, look for a set (1) bit else look for a cleared (0) bit - * - * Description: - * Starting at @offset, walk the bitmap from left to right until either the - * desired bit is found or we reach the end. Return the bit offset, -1 if - * not found, or -2 if error. - */ -static int cipso_v4_bitmap_walk(const unsigned char *bitmap, - u32 bitmap_len, - u32 offset, - u8 state) -{ - u32 bit_spot; - u32 byte_offset; - unsigned char bitmask; - unsigned char byte; - - /* gcc always rounds to zero when doing integer division */ - byte_offset = offset / 8; - byte = bitmap[byte_offset]; - bit_spot = offset; - bitmask = 0x80 >> (offset % 8); - - while (bit_spot < bitmap_len) { - if ((state && (byte & bitmask) == bitmask) || - (state == 0 && (byte & bitmask) == 0)) - return bit_spot; - - bit_spot++; - bitmask >>= 1; - if (bitmask == 0) { - byte = bitmap[++byte_offset]; - bitmask = 0x80; - } - } - - return -1; -} - -/** - * cipso_v4_bitmap_setbit - Sets a single bit in a bitmap - * @bitmap: the bitmap - * @bit: the bit - * @state: if non-zero, set the bit (1) else clear the bit (0) - * - * Description: - * Set a single bit in the bitmask. Returns zero on success, negative values - * on error. - */ -static void cipso_v4_bitmap_setbit(unsigned char *bitmap, - u32 bit, - u8 state) -{ - u32 byte_spot; - u8 bitmask; - - /* gcc always rounds to zero when doing integer division */ - byte_spot = bit / 8; - bitmask = 0x80 >> (bit % 8); - if (state) - bitmap[byte_spot] |= bitmask; - else - bitmap[byte_spot] &= ~bitmask; -} - -/** - * cipso_v4_doi_domhsh_free - Frees a domain list entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a domain list entry can be released - * safely. - * - */ -static void cipso_v4_doi_domhsh_free(struct rcu_head *entry) -{ - struct cipso_v4_domhsh_entry *ptr; - - ptr = container_of(entry, struct cipso_v4_domhsh_entry, rcu); - kfree(ptr->domain); - kfree(ptr); -} - -/** - * cipso_v4_cache_entry_free - Frees a cache entry - * @entry: the entry to free - * - * Description: - * This function frees the memory associated with a cache entry. - * - */ -static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry) -{ - if (entry->lsm_data.free) - entry->lsm_data.free(entry->lsm_data.data); - kfree(entry->key); - kfree(entry); -} - -/** - * cipso_v4_map_cache_hash - Hashing function for the CIPSO cache - * @key: the hash key - * @key_len: the length of the key in bytes - * - * Description: - * The CIPSO tag hashing function. Returns a 32-bit hash value. - * - */ -static u32 cipso_v4_map_cache_hash(const unsigned char *key, u32 key_len) -{ - return jhash(key, key_len, 0); -} - -/* - * Label Mapping Cache Functions - */ - -/** - * cipso_v4_cache_init - Initialize the CIPSO cache - * - * Description: - * Initializes the CIPSO label mapping cache, this function should be called - * before any of the other functions defined in this file. Returns zero on - * success, negative values on error. - * - */ -static int cipso_v4_cache_init(void) -{ - u32 iter; - - cipso_v4_cache = kcalloc(CIPSO_V4_CACHE_BUCKETS, - sizeof(struct cipso_v4_map_cache_bkt), - GFP_KERNEL); - if (cipso_v4_cache == NULL) - return -ENOMEM; - - for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { - spin_lock_init(&cipso_v4_cache[iter].lock); - cipso_v4_cache[iter].size = 0; - INIT_LIST_HEAD(&cipso_v4_cache[iter].list); - } - - return 0; -} - -/** - * cipso_v4_cache_invalidate - Invalidates the current CIPSO cache - * - * Description: - * Invalidates and frees any entries in the CIPSO cache. Returns zero on - * success and negative values on failure. - * - */ -void cipso_v4_cache_invalidate(void) -{ - struct cipso_v4_map_cache_entry *entry, *tmp_entry; - u32 iter; - - for (iter = 0; iter < CIPSO_V4_CACHE_BUCKETS; iter++) { - spin_lock(&cipso_v4_cache[iter].lock); - list_for_each_entry_safe(entry, - tmp_entry, - &cipso_v4_cache[iter].list, list) { - list_del(&entry->list); - cipso_v4_cache_entry_free(entry); - } - cipso_v4_cache[iter].size = 0; - spin_unlock(&cipso_v4_cache[iter].lock); - } - - return; -} - -/** - * cipso_v4_cache_check - Check the CIPSO cache for a label mapping - * @key: the buffer to check - * @key_len: buffer length in bytes - * @secattr: the security attribute struct to use - * - * Description: - * This function checks the cache to see if a label mapping already exists for - * the given key. If there is a match then the cache is adjusted and the - * @secattr struct is populated with the correct LSM security attributes. The - * cache is adjusted in the following manner if the entry is not already the - * first in the cache bucket: - * - * 1. The cache entry's activity counter is incremented - * 2. The previous (higher ranking) entry's activity counter is decremented - * 3. If the difference between the two activity counters is geater than - * CIPSO_V4_CACHE_REORDERLIMIT the two entries are swapped - * - * Returns zero on success, -ENOENT for a cache miss, and other negative values - * on error. - * - */ -static int cipso_v4_cache_check(const unsigned char *key, - u32 key_len, - struct netlbl_lsm_secattr *secattr) -{ - u32 bkt; - struct cipso_v4_map_cache_entry *entry; - struct cipso_v4_map_cache_entry *prev_entry = NULL; - u32 hash; - - if (!cipso_v4_cache_enabled) - return -ENOENT; - - hash = cipso_v4_map_cache_hash(key, key_len); - bkt = hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); - list_for_each_entry(entry, &cipso_v4_cache[bkt].list, list) { - if (entry->hash == hash && - entry->key_len == key_len && - memcmp(entry->key, key, key_len) == 0) { - entry->activity += 1; - secattr->cache.free = entry->lsm_data.free; - secattr->cache.data = entry->lsm_data.data; - if (prev_entry == NULL) { - spin_unlock(&cipso_v4_cache[bkt].lock); - return 0; - } - - if (prev_entry->activity > 0) - prev_entry->activity -= 1; - if (entry->activity > prev_entry->activity && - entry->activity - prev_entry->activity > - CIPSO_V4_CACHE_REORDERLIMIT) { - __list_del(entry->list.prev, entry->list.next); - __list_add(&entry->list, - prev_entry->list.prev, - &prev_entry->list); - } - - spin_unlock(&cipso_v4_cache[bkt].lock); - return 0; - } - prev_entry = entry; - } - spin_unlock(&cipso_v4_cache[bkt].lock); - - return -ENOENT; -} - -/** - * cipso_v4_cache_add - Add an entry to the CIPSO cache - * @skb: the packet - * @secattr: the packet's security attributes - * - * Description: - * Add a new entry into the CIPSO label mapping cache. Add the new entry to - * head of the cache bucket's list, if the cache bucket is out of room remove - * the last entry in the list first. It is important to note that there is - * currently no checking for duplicate keys. Returns zero on success, - * negative values on failure. - * - */ -int cipso_v4_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr) -{ - int ret_val = -EPERM; - u32 bkt; - struct cipso_v4_map_cache_entry *entry = NULL; - struct cipso_v4_map_cache_entry *old_entry = NULL; - unsigned char *cipso_ptr; - u32 cipso_ptr_len; - - if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0) - return 0; - - cipso_ptr = CIPSO_V4_OPTPTR(skb); - cipso_ptr_len = cipso_ptr[1]; - - entry = kzalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) - return -ENOMEM; - entry->key = kmalloc(cipso_ptr_len, GFP_ATOMIC); - if (entry->key == NULL) { - ret_val = -ENOMEM; - goto cache_add_failure; - } - memcpy(entry->key, cipso_ptr, cipso_ptr_len); - entry->key_len = cipso_ptr_len; - entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len); - entry->lsm_data.free = secattr->cache.free; - entry->lsm_data.data = secattr->cache.data; - - bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1); - spin_lock(&cipso_v4_cache[bkt].lock); - if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) { - list_add(&entry->list, &cipso_v4_cache[bkt].list); - cipso_v4_cache[bkt].size += 1; - } else { - old_entry = list_entry(cipso_v4_cache[bkt].list.prev, - struct cipso_v4_map_cache_entry, list); - list_del(&old_entry->list); - list_add(&entry->list, &cipso_v4_cache[bkt].list); - cipso_v4_cache_entry_free(old_entry); - } - spin_unlock(&cipso_v4_cache[bkt].lock); - - return 0; - -cache_add_failure: - if (entry) - cipso_v4_cache_entry_free(entry); - return ret_val; -} - -/* - * DOI List Functions - */ - -/** - * cipso_v4_doi_search - Searches for a DOI definition - * @doi: the DOI to search for - * - * Description: - * Search the DOI definition list for a DOI definition with a DOI value that - * matches @doi. The caller is responsibile for calling rcu_read_[un]lock(). - * Returns a pointer to the DOI definition on success and NULL on failure. - */ -static struct cipso_v4_doi *cipso_v4_doi_search(u32 doi) -{ - struct cipso_v4_doi *iter; - - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->doi == doi && iter->valid) - return iter; - return NULL; -} - -/** - * cipso_v4_doi_add - Add a new DOI to the CIPSO protocol engine - * @doi_def: the DOI structure - * - * Description: - * The caller defines a new DOI for use by the CIPSO engine and calls this - * function to add it to the list of acceptable domains. The caller must - * ensure that the mapping table specified in @doi_def->map meets all of the - * requirements of the mapping type (see cipso_ipv4.h for details). Returns - * zero on success and non-zero on failure. - * - */ -int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) -{ - if (doi_def == NULL || doi_def->doi == CIPSO_V4_DOI_UNKNOWN) - return -EINVAL; - - doi_def->valid = 1; - INIT_RCU_HEAD(&doi_def->rcu); - INIT_LIST_HEAD(&doi_def->dom_list); - - rcu_read_lock(); - if (cipso_v4_doi_search(doi_def->doi) != NULL) - goto doi_add_failure_rlock; - spin_lock(&cipso_v4_doi_list_lock); - if (cipso_v4_doi_search(doi_def->doi) != NULL) - goto doi_add_failure_slock; - list_add_tail_rcu(&doi_def->list, &cipso_v4_doi_list); - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - - return 0; - -doi_add_failure_slock: - spin_unlock(&cipso_v4_doi_list_lock); -doi_add_failure_rlock: - rcu_read_unlock(); - return -EEXIST; -} - -/** - * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine - * @doi: the DOI value - * @callback: the DOI cleanup/free callback - * - * Description: - * Removes a DOI definition from the CIPSO engine, @callback is called to - * free any memory. The NetLabel routines will be called to release their own - * LSM domain mappings as well as our own domain list. Returns zero on - * success and negative values on failure. - * - */ -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) -{ - struct cipso_v4_doi *doi_def; - struct cipso_v4_domhsh_entry *dom_iter; - - rcu_read_lock(); - if (cipso_v4_doi_search(doi) != NULL) { - spin_lock(&cipso_v4_doi_list_lock); - doi_def = cipso_v4_doi_search(doi); - if (doi_def == NULL) { - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - return -ENOENT; - } - doi_def->valid = 0; - list_del_rcu(&doi_def->list); - spin_unlock(&cipso_v4_doi_list_lock); - list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) - if (dom_iter->valid) - netlbl_domhsh_remove(dom_iter->domain); - cipso_v4_cache_invalidate(); - rcu_read_unlock(); - - call_rcu(&doi_def->rcu, callback); - return 0; - } - rcu_read_unlock(); - - return -ENOENT; -} - -/** - * cipso_v4_doi_getdef - Returns a pointer to a valid DOI definition - * @doi: the DOI value - * - * Description: - * Searches for a valid DOI definition and if one is found it is returned to - * the caller. Otherwise NULL is returned. The caller must ensure that - * rcu_read_lock() is held while accessing the returned definition. - * - */ -struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) -{ - return cipso_v4_doi_search(doi); -} - -/** - * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Dump a list of all the configured DOI values into a sk_buff. The returned - * sk_buff has room at the front of the sk_buff for @headroom bytes. See - * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This - * function may fail if another process is changing the DOI list at the same - * time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 doi_cnt = 0; - ssize_t buf_len; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - doi_cnt += 1; - buf_len += 2 * NETLBL_LEN_U32; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_all_failure; - - if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) - goto doi_dump_all_failure; - buf_len -= NETLBL_LEN_U32; - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - if (buf_len < 2 * NETLBL_LEN_U32) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_all_failure; - buf_len -= 2 * NETLBL_LEN_U32; - } - rcu_read_unlock(); - - return skb; - -doi_dump_all_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff - * @doi: the DOI value - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Lookup the DOI definition matching @doi and dump it's contents into a - * sk_buff. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message - * format. This function may fail if another process is changing the DOI list - * at the same time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 tag_cnt = 0; - u32 lvl_cnt = 0; - u32 cat_cnt = 0; - ssize_t buf_len; - ssize_t tmp; - - rcu_read_lock(); - iter = cipso_v4_doi_getdef(doi); - if (iter == NULL) - goto doi_dump_failure; - buf_len = NETLBL_LEN_U32; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - buf_len += NETLBL_LEN_U32; - while(tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - buf_len += 3 * NETLBL_LEN_U32; - while (tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - lvl_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - cat_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_failure; - - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - if (iter != cipso_v4_doi_getdef(doi)) - goto doi_dump_failure; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) - goto doi_dump_failure; - buf_len -= 3 * NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u8(skb, - NLA_U8, - iter->map.std->lvl.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u16(skb, - NLA_U16, - iter->map.std->cat.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - rcu_read_unlock(); - - return skb; - -doi_dump_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_domhsh_add - Adds a domain entry to a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to add - * - * Description: - * Adds the @domain to the the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). This function - * does allocate memory. Returns zero on success, negative values on failure. - * - */ -int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - struct cipso_v4_domhsh_entry *new_dom; - - new_dom = kzalloc(sizeof(*new_dom), GFP_KERNEL); - if (new_dom == NULL) - return -ENOMEM; - if (domain) { - new_dom->domain = kstrdup(domain, GFP_KERNEL); - if (new_dom->domain == NULL) { - kfree(new_dom); - return -ENOMEM; - } - } - new_dom->valid = 1; - INIT_RCU_HEAD(&new_dom->rcu); - - rcu_read_lock(); - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry_rcu(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - kfree(new_dom->domain); - kfree(new_dom); - return -EEXIST; - } - list_add_tail_rcu(&new_dom->list, &doi_def->dom_list); - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - - return 0; -} - -/** - * cipso_v4_doi_domhsh_remove - Removes a domain entry from a DOI definition - * @doi_def: the DOI definition - * @domain: the domain to remove - * - * Description: - * Removes the @domain from the DOI specified by @doi_def, this function - * should only be called by external functions (i.e. NetLabel). Returns zero - * on success and negative values on error. - * - */ -int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, - const char *domain) -{ - struct cipso_v4_domhsh_entry *iter; - - rcu_read_lock(); - spin_lock(&cipso_v4_doi_list_lock); - list_for_each_entry_rcu(iter, &doi_def->dom_list, list) - if (iter->valid && - ((domain != NULL && iter->domain != NULL && - strcmp(iter->domain, domain) == 0) || - (domain == NULL && iter->domain == NULL))) { - iter->valid = 0; - list_del_rcu(&iter->list); - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - call_rcu(&iter->rcu, cipso_v4_doi_domhsh_free); - - return 0; - } - spin_unlock(&cipso_v4_doi_list_lock); - rcu_read_unlock(); - - return -ENOENT; -} - -/* - * Label Mapping Functions - */ - -/** - * cipso_v4_map_lvl_valid - Checks to see if the given level is understood - * @doi_def: the DOI definition - * @level: the level to check - * - * Description: - * Checks the given level against the given DOI definition and returns a - * negative value if the level does not have a valid mapping and a zero value - * if the level is defined by the DOI. - * - */ -static int cipso_v4_map_lvl_valid(const struct cipso_v4_doi *doi_def, u8 level) -{ - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - return 0; - case CIPSO_V4_MAP_STD: - if (doi_def->map.std->lvl.cipso[level] < CIPSO_V4_INV_LVL) - return 0; - break; - } - - return -EFAULT; -} - -/** - * cipso_v4_map_lvl_hton - Perform a level mapping from the host to the network - * @doi_def: the DOI definition - * @host_lvl: the host MLS level - * @net_lvl: the network/CIPSO MLS level - * - * Description: - * Perform a label mapping to translate a local MLS level to the correct - * CIPSO level using the given DOI definition. Returns zero on success, - * negative values otherwise. - * - */ -static int cipso_v4_map_lvl_hton(const struct cipso_v4_doi *doi_def, - u32 host_lvl, - u32 *net_lvl) -{ - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - *net_lvl = host_lvl; - return 0; - case CIPSO_V4_MAP_STD: - if (host_lvl < doi_def->map.std->lvl.local_size) { - *net_lvl = doi_def->map.std->lvl.local[host_lvl]; - return 0; - } - break; - } - - return -EINVAL; -} - -/** - * cipso_v4_map_lvl_ntoh - Perform a level mapping from the network to the host - * @doi_def: the DOI definition - * @net_lvl: the network/CIPSO MLS level - * @host_lvl: the host MLS level - * - * Description: - * Perform a label mapping to translate a CIPSO level to the correct local MLS - * level using the given DOI definition. Returns zero on success, negative - * values otherwise. - * - */ -static int cipso_v4_map_lvl_ntoh(const struct cipso_v4_doi *doi_def, - u32 net_lvl, - u32 *host_lvl) -{ - struct cipso_v4_std_map_tbl *map_tbl; - - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - *host_lvl = net_lvl; - return 0; - case CIPSO_V4_MAP_STD: - map_tbl = doi_def->map.std; - if (net_lvl < map_tbl->lvl.cipso_size && - map_tbl->lvl.cipso[net_lvl] < CIPSO_V4_INV_LVL) { - *host_lvl = doi_def->map.std->lvl.cipso[net_lvl]; - return 0; - } - break; - } - - return -EINVAL; -} - -/** - * cipso_v4_map_cat_rbm_valid - Checks to see if the category bitmap is valid - * @doi_def: the DOI definition - * @bitmap: category bitmap - * @bitmap_len: bitmap length in bytes - * - * Description: - * Checks the given category bitmap against the given DOI definition and - * returns a negative value if any of the categories in the bitmap do not have - * a valid mapping and a zero value if all of the categories are valid. - * - */ -static int cipso_v4_map_cat_rbm_valid(const struct cipso_v4_doi *doi_def, - const unsigned char *bitmap, - u32 bitmap_len) -{ - int cat = -1; - u32 bitmap_len_bits = bitmap_len * 8; - u32 cipso_cat_size = doi_def->map.std->cat.cipso_size; - u32 *cipso_array = doi_def->map.std->cat.cipso; - - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - return 0; - case CIPSO_V4_MAP_STD: - for (;;) { - cat = cipso_v4_bitmap_walk(bitmap, - bitmap_len_bits, - cat + 1, - 1); - if (cat < 0) - break; - if (cat >= cipso_cat_size || - cipso_array[cat] >= CIPSO_V4_INV_CAT) - return -EFAULT; - } - - if (cat == -1) - return 0; - break; - } - - return -EFAULT; -} - -/** - * cipso_v4_map_cat_rbm_hton - Perform a category mapping from host to network - * @doi_def: the DOI definition - * @host_cat: the category bitmap in host format - * @host_cat_len: the length of the host's category bitmap in bytes - * @net_cat: the zero'd out category bitmap in network/CIPSO format - * @net_cat_len: the length of the CIPSO bitmap in bytes - * - * Description: - * Perform a label mapping to translate a local MLS category bitmap to the - * correct CIPSO bitmap using the given DOI definition. Returns the minimum - * size in bytes of the network bitmap on success, negative values otherwise. - * - */ -static int cipso_v4_map_cat_rbm_hton(const struct cipso_v4_doi *doi_def, - const unsigned char *host_cat, - u32 host_cat_len, - unsigned char *net_cat, - u32 net_cat_len) -{ - int host_spot = -1; - u32 net_spot; - u32 net_spot_max = 0; - u32 host_clen_bits = host_cat_len * 8; - u32 net_clen_bits = net_cat_len * 8; - u32 host_cat_size = doi_def->map.std->cat.local_size; - u32 *host_cat_array = doi_def->map.std->cat.local; - - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - net_spot_max = host_cat_len - 1; - while (net_spot_max > 0 && host_cat[net_spot_max] == 0) - net_spot_max--; - if (net_spot_max > net_cat_len) - return -EINVAL; - memcpy(net_cat, host_cat, net_spot_max); - return net_spot_max; - case CIPSO_V4_MAP_STD: - for (;;) { - host_spot = cipso_v4_bitmap_walk(host_cat, - host_clen_bits, - host_spot + 1, - 1); - if (host_spot < 0) - break; - if (host_spot >= host_cat_size) - return -EPERM; - - net_spot = host_cat_array[host_spot]; - if (net_spot >= net_clen_bits) - return -ENOSPC; - cipso_v4_bitmap_setbit(net_cat, net_spot, 1); - - if (net_spot > net_spot_max) - net_spot_max = net_spot; - } - - if (host_spot == -2) - return -EFAULT; - - if (++net_spot_max % 8) - return net_spot_max / 8 + 1; - return net_spot_max / 8; - } - - return -EINVAL; -} - -/** - * cipso_v4_map_cat_rbm_ntoh - Perform a category mapping from network to host - * @doi_def: the DOI definition - * @net_cat: the category bitmap in network/CIPSO format - * @net_cat_len: the length of the CIPSO bitmap in bytes - * @host_cat: the zero'd out category bitmap in host format - * @host_cat_len: the length of the host's category bitmap in bytes - * - * Description: - * Perform a label mapping to translate a CIPSO bitmap to the correct local - * MLS category bitmap using the given DOI definition. Returns the minimum - * size in bytes of the host bitmap on success, negative values otherwise. - * - */ -static int cipso_v4_map_cat_rbm_ntoh(const struct cipso_v4_doi *doi_def, - const unsigned char *net_cat, - u32 net_cat_len, - unsigned char *host_cat, - u32 host_cat_len) -{ - u32 host_spot; - u32 host_spot_max = 0; - int net_spot = -1; - u32 net_clen_bits = net_cat_len * 8; - u32 host_clen_bits = host_cat_len * 8; - u32 net_cat_size = doi_def->map.std->cat.cipso_size; - u32 *net_cat_array = doi_def->map.std->cat.cipso; - - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - if (net_cat_len > host_cat_len) - return -EINVAL; - memcpy(host_cat, net_cat, net_cat_len); - return net_cat_len; - case CIPSO_V4_MAP_STD: - for (;;) { - net_spot = cipso_v4_bitmap_walk(net_cat, - net_clen_bits, - net_spot + 1, - 1); - if (net_spot < 0) - break; - if (net_spot >= net_cat_size || - net_cat_array[net_spot] >= CIPSO_V4_INV_CAT) - return -EPERM; - - host_spot = net_cat_array[net_spot]; - if (host_spot >= host_clen_bits) - return -ENOSPC; - cipso_v4_bitmap_setbit(host_cat, host_spot, 1); - - if (host_spot > host_spot_max) - host_spot_max = host_spot; - } - - if (net_spot == -2) - return -EFAULT; - - if (++host_spot_max % 8) - return host_spot_max / 8 + 1; - return host_spot_max / 8; - } - - return -EINVAL; -} - -/* - * Protocol Handling Functions - */ - -#define CIPSO_V4_HDR_LEN 6 - -/** - * cipso_v4_gentag_hdr - Generate a CIPSO option header - * @doi_def: the DOI definition - * @len: the total tag length in bytes - * @buf: the CIPSO option buffer - * - * Description: - * Write a CIPSO header into the beginning of @buffer. Return zero on success, - * negative values on failure. - * - */ -static int cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def, - u32 len, - unsigned char *buf) -{ - if (CIPSO_V4_HDR_LEN + len > 40) - return -ENOSPC; - - buf[0] = IPOPT_CIPSO; - buf[1] = CIPSO_V4_HDR_LEN + len; - *(u32 *)&buf[2] = htonl(doi_def->doi); - - return 0; -} - -#define CIPSO_V4_TAG1_CAT_LEN 30 - -/** - * cipso_v4_gentag_rbm - Generate a CIPSO restricted bitmap tag (type #1) - * @doi_def: the DOI definition - * @secattr: the security attributes - * @buffer: the option buffer - * @buffer_len: length of buffer in bytes - * - * Description: - * Generate a CIPSO option using the restricted bitmap tag, tag type #1. The - * actual buffer length may be larger than the indicated size due to - * translation between host and network category bitmaps. Returns zero on - * success, negative values on failure. - * - */ -static int cipso_v4_gentag_rbm(const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr, - unsigned char **buffer, - u32 *buffer_len) -{ - int ret_val = -EPERM; - unsigned char *buf = NULL; - u32 buf_len; - u32 level; - - if (secattr->mls_cat) { - buf = kzalloc(CIPSO_V4_HDR_LEN + 4 + CIPSO_V4_TAG1_CAT_LEN, - GFP_ATOMIC); - if (buf == NULL) - return -ENOMEM; - - ret_val = cipso_v4_map_cat_rbm_hton(doi_def, - secattr->mls_cat, - secattr->mls_cat_len, - &buf[CIPSO_V4_HDR_LEN + 4], - CIPSO_V4_TAG1_CAT_LEN); - if (ret_val < 0) - goto gentag_failure; - - /* This will send packets using the "optimized" format when - * possibile as specified in section 3.4.2.6 of the - * CIPSO draft. */ - if (cipso_v4_rbm_optfmt && (ret_val > 0 && ret_val < 10)) - ret_val = 10; - - buf_len = 4 + ret_val; - } else { - buf = kzalloc(CIPSO_V4_HDR_LEN + 4, GFP_ATOMIC); - if (buf == NULL) - return -ENOMEM; - buf_len = 4; - } - - ret_val = cipso_v4_map_lvl_hton(doi_def, secattr->mls_lvl, &level); - if (ret_val != 0) - goto gentag_failure; - - ret_val = cipso_v4_gentag_hdr(doi_def, buf_len, buf); - if (ret_val != 0) - goto gentag_failure; - - buf[CIPSO_V4_HDR_LEN] = 0x01; - buf[CIPSO_V4_HDR_LEN + 1] = buf_len; - buf[CIPSO_V4_HDR_LEN + 3] = level; - - *buffer = buf; - *buffer_len = CIPSO_V4_HDR_LEN + buf_len; - - return 0; - -gentag_failure: - kfree(buf); - return ret_val; -} - -/** - * cipso_v4_parsetag_rbm - Parse a CIPSO restricted bitmap tag - * @doi_def: the DOI definition - * @tag: the CIPSO tag - * @secattr: the security attributes - * - * Description: - * Parse a CIPSO restricted bitmap tag (tag type #1) and return the security - * attributes in @secattr. Return zero on success, negatives values on - * failure. - * - */ -static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, - const unsigned char *tag, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val; - u8 tag_len = tag[1]; - u32 level; - - ret_val = cipso_v4_map_lvl_ntoh(doi_def, tag[3], &level); - if (ret_val != 0) - return ret_val; - secattr->mls_lvl = level; - secattr->mls_lvl_vld = 1; - - if (tag_len > 4) { - switch (doi_def->type) { - case CIPSO_V4_MAP_PASS: - secattr->mls_cat_len = tag_len - 4; - break; - case CIPSO_V4_MAP_STD: - secattr->mls_cat_len = - doi_def->map.std->cat.local_size; - break; - } - secattr->mls_cat = kzalloc(secattr->mls_cat_len, GFP_ATOMIC); - if (secattr->mls_cat == NULL) - return -ENOMEM; - - ret_val = cipso_v4_map_cat_rbm_ntoh(doi_def, - &tag[4], - tag_len - 4, - secattr->mls_cat, - secattr->mls_cat_len); - if (ret_val < 0) { - kfree(secattr->mls_cat); - return ret_val; - } - secattr->mls_cat_len = ret_val; - } - - return 0; -} - -/** - * cipso_v4_validate - Validate a CIPSO option - * @option: the start of the option, on error it is set to point to the error - * - * Description: - * This routine is called to validate a CIPSO option, it checks all of the - * fields to ensure that they are at least valid, see the draft snippet below - * for details. If the option is valid then a zero value is returned and - * the value of @option is unchanged. If the option is invalid then a - * non-zero value is returned and @option is adjusted to point to the - * offending portion of the option. From the IETF draft ... - * - * "If any field within the CIPSO options, such as the DOI identifier, is not - * recognized the IP datagram is discarded and an ICMP 'parameter problem' - * (type 12) is generated and returned. The ICMP code field is set to 'bad - * parameter' (code 0) and the pointer is set to the start of the CIPSO field - * that is unrecognized." - * - */ -int cipso_v4_validate(unsigned char **option) -{ - unsigned char *opt = *option; - unsigned char *tag; - unsigned char opt_iter; - unsigned char err_offset = 0; - u8 opt_len; - u8 tag_len; - struct cipso_v4_doi *doi_def = NULL; - u32 tag_iter; - - /* caller already checks for length values that are too large */ - opt_len = opt[1]; - if (opt_len < 8) { - err_offset = 1; - goto validate_return; - } - - rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(ntohl(*((u32 *)&opt[2]))); - if (doi_def == NULL) { - err_offset = 2; - goto validate_return_locked; - } - - opt_iter = 6; - tag = opt + opt_iter; - while (opt_iter < opt_len) { - for (tag_iter = 0; doi_def->tags[tag_iter] != tag[0];) - if (doi_def->tags[tag_iter] == CIPSO_V4_TAG_INVALID || - ++tag_iter == CIPSO_V4_TAG_MAXCNT) { - err_offset = opt_iter; - goto validate_return_locked; - } - - tag_len = tag[1]; - if (tag_len > (opt_len - opt_iter)) { - err_offset = opt_iter + 1; - goto validate_return_locked; - } - - switch (tag[0]) { - case CIPSO_V4_TAG_RBITMAP: - if (tag_len < 4) { - err_offset = opt_iter + 1; - goto validate_return_locked; - } - - /* We are already going to do all the verification - * necessary at the socket layer so from our point of - * view it is safe to turn these checks off (and less - * work), however, the CIPSO draft says we should do - * all the CIPSO validations here but it doesn't - * really specify _exactly_ what we need to validate - * ... so, just make it a sysctl tunable. */ - if (cipso_v4_rbm_strictvalid) { - if (cipso_v4_map_lvl_valid(doi_def, - tag[3]) < 0) { - err_offset = opt_iter + 3; - goto validate_return_locked; - } - if (tag_len > 4 && - cipso_v4_map_cat_rbm_valid(doi_def, - &tag[4], - tag_len - 4) < 0) { - err_offset = opt_iter + 4; - goto validate_return_locked; - } - } - break; - default: - err_offset = opt_iter; - goto validate_return_locked; - } - - tag += tag_len; - opt_iter += tag_len; - } - -validate_return_locked: - rcu_read_unlock(); -validate_return: - *option = opt + err_offset; - return err_offset; -} - -/** - * cipso_v4_error - Send the correct reponse for a bad packet - * @skb: the packet - * @error: the error code - * @gateway: CIPSO gateway flag - * - * Description: - * Based on the error code given in @error, send an ICMP error message back to - * the originating host. From the IETF draft ... - * - * "If the contents of the CIPSO [option] are valid but the security label is - * outside of the configured host or port label range, the datagram is - * discarded and an ICMP 'destination unreachable' (type 3) is generated and - * returned. The code field of the ICMP is set to 'communication with - * destination network administratively prohibited' (code 9) or to - * 'communication with destination host administratively prohibited' - * (code 10). The value of the code is dependent on whether the originator - * of the ICMP message is acting as a CIPSO host or a CIPSO gateway. The - * recipient of the ICMP message MUST be able to handle either value. The - * same procedure is performed if a CIPSO [option] can not be added to an - * IP packet because it is too large to fit in the IP options area." - * - * "If the error is triggered by receipt of an ICMP message, the message is - * discarded and no response is permitted (consistent with general ICMP - * processing rules)." - * - */ -void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway) -{ - if (skb->nh.iph->protocol == IPPROTO_ICMP || error != -EACCES) - return; - - if (gateway) - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0); - else - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0); -} - -/** - * cipso_v4_socket_setattr - Add a CIPSO option to a socket - * @sock: the socket - * @doi_def: the CIPSO DOI to use - * @secattr: the specific security attributes of the socket - * - * Description: - * Set the CIPSO option on the given socket using the DOI definition and - * security attributes passed to the function. This function requires - * exclusive access to @sock->sk, which means it either needs to be in the - * process of being created or locked via lock_sock(sock->sk). Returns zero on - * success and negative values on failure. - * - */ -int cipso_v4_socket_setattr(const struct socket *sock, - const struct cipso_v4_doi *doi_def, - const struct netlbl_lsm_secattr *secattr) -{ - int ret_val = -EPERM; - u32 iter; - unsigned char *buf = NULL; - u32 buf_len = 0; - u32 opt_len; - struct ip_options *opt = NULL; - struct sock *sk; - struct inet_sock *sk_inet; - struct inet_connection_sock *sk_conn; - - /* In the case of sock_create_lite(), the sock->sk field is not - * defined yet but it is not a problem as the only users of these - * "lite" PF_INET sockets are functions which do an accept() call - * afterwards so we will label the socket as part of the accept(). */ - sk = sock->sk; - if (sk == NULL) - return 0; - - /* XXX - This code assumes only one tag per CIPSO option which isn't - * really a good assumption to make but since we only support the MAC - * tags right now it is a safe assumption. */ - iter = 0; - do { - switch (doi_def->tags[iter]) { - case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_gentag_rbm(doi_def, - secattr, - &buf, - &buf_len); - break; - default: - ret_val = -EPERM; - goto socket_setattr_failure; - } - - iter++; - } while (ret_val != 0 && - iter < CIPSO_V4_TAG_MAXCNT && - doi_def->tags[iter] != CIPSO_V4_TAG_INVALID); - if (ret_val != 0) - goto socket_setattr_failure; - - /* We can't use ip_options_get() directly because it makes a call to - * ip_options_get_alloc() which allocates memory with GFP_KERNEL and - * we can't block here. */ - opt_len = (buf_len + 3) & ~3; - opt = kzalloc(sizeof(*opt) + opt_len, GFP_ATOMIC); - if (opt == NULL) { - ret_val = -ENOMEM; - goto socket_setattr_failure; - } - memcpy(opt->__data, buf, buf_len); - opt->optlen = opt_len; - opt->is_data = 1; - kfree(buf); - buf = NULL; - ret_val = ip_options_compile(opt, NULL); - if (ret_val != 0) - goto socket_setattr_failure; - - sk_inet = inet_sk(sk); - if (sk_inet->is_icsk) { - sk_conn = inet_csk(sk); - if (sk_inet->opt) - sk_conn->icsk_ext_hdr_len -= sk_inet->opt->optlen; - sk_conn->icsk_ext_hdr_len += opt->optlen; - sk_conn->icsk_sync_mss(sk, sk_conn->icsk_pmtu_cookie); - } - opt = xchg(&sk_inet->opt, opt); - kfree(opt); - - return 0; - -socket_setattr_failure: - kfree(buf); - kfree(opt); - return ret_val; -} - -/** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket - * @secattr: the security attributes - * - * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. - * - */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val = -ENOMSG; - struct sock *sk; - struct inet_sock *sk_inet; - unsigned char *cipso_ptr; - u32 doi; - struct cipso_v4_doi *doi_def; - - sk = sock->sk; - lock_sock(sk); - sk_inet = inet_sk(sk); - if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - goto socket_getattr_return; - cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - - sizeof(struct iphdr); - ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); - if (ret_val == 0) - goto socket_getattr_return; - - doi = ntohl(*(u32 *)&cipso_ptr[2]); - rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(doi); - if (doi_def == NULL) { - rcu_read_unlock(); - goto socket_getattr_return; - } - switch (cipso_ptr[6]) { - case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); - break; - } - rcu_read_unlock(); - -socket_getattr_return: - release_sock(sk); - return ret_val; -} - -/** - * cipso_v4_skbuff_getattr - Get the security attributes from the CIPSO option - * @skb: the packet - * @secattr: the security attributes - * - * Description: - * Parse the given packet's CIPSO option and return the security attributes. - * Returns zero on success and negative values on failure. - * - */ -int cipso_v4_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val = -ENOMSG; - unsigned char *cipso_ptr; - u32 doi; - struct cipso_v4_doi *doi_def; - - if (!CIPSO_V4_OPTEXIST(skb)) - return -ENOMSG; - cipso_ptr = CIPSO_V4_OPTPTR(skb); - if (cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr) == 0) - return 0; - - doi = ntohl(*(u32 *)&cipso_ptr[2]); - rcu_read_lock(); - doi_def = cipso_v4_doi_getdef(doi); - if (doi_def == NULL) - goto skbuff_getattr_return; - switch (cipso_ptr[6]) { - case CIPSO_V4_TAG_RBITMAP: - ret_val = cipso_v4_parsetag_rbm(doi_def, - &cipso_ptr[6], - secattr); - break; - } - -skbuff_getattr_return: - rcu_read_unlock(); - return ret_val; -} - -/* - * Setup Functions - */ - -/** - * cipso_v4_init - Initialize the CIPSO module - * - * Description: - * Initialize the CIPSO module and prepare it for use. Returns zero on success - * and negative values on failure. - * - */ -static int __init cipso_v4_init(void) -{ - int ret_val; - - ret_val = cipso_v4_cache_init(); - if (ret_val != 0) - panic("Failed to initialize the CIPSO/IPv4 cache (%d)\n", - ret_val); - - return 0; -} - -subsys_initcall(cipso_v4_init); diff --git a/trunk/net/ipv4/devinet.c b/trunk/net/ipv4/devinet.c index 8e8d1f17d77a..a6cc31d911eb 100644 --- a/trunk/net/ipv4/devinet.c +++ b/trunk/net/ipv4/devinet.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include #include @@ -63,7 +62,6 @@ #include #include #include -#include struct ipv4_devconf ipv4_devconf = { .accept_redirects = 1, @@ -80,15 +78,7 @@ static struct ipv4_devconf ipv4_devconf_dflt = { .accept_source_route = 1, }; -static struct nla_policy ifa_ipv4_policy[IFA_MAX+1] __read_mostly = { - [IFA_LOCAL] = { .type = NLA_U32 }, - [IFA_ADDRESS] = { .type = NLA_U32 }, - [IFA_BROADCAST] = { .type = NLA_U32 }, - [IFA_ANYCAST] = { .type = NLA_U32 }, - [IFA_LABEL] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, -}; - -static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); +static void rtmsg_ifa(int event, struct in_ifaddr *); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, @@ -239,8 +229,8 @@ int inet_addr_onlink(struct in_device *in_dev, u32 a, u32 b) return 0; } -static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) +static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, + int destroy) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -273,7 +263,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, if (!do_promote) { *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -298,7 +288,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -310,7 +300,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) { @@ -329,14 +319,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } } -static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy) -{ - __inet_del_ifa(in_dev, ifap, destroy, NULL, 0); -} - -static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) +static int inet_insert_ifa(struct in_ifaddr *ifa) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -381,17 +364,12 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; } -static int inet_insert_ifa(struct in_ifaddr *ifa) -{ - return __inet_insert_ifa(ifa, NULL, 0); -} - static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) { struct in_device *in_dev = __in_dev_get_rtnl(dev); @@ -443,134 +421,87 @@ struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, u32 prefix, static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct nlattr *tb[IFA_MAX+1]; + struct rtattr **rta = arg; struct in_device *in_dev; - struct ifaddrmsg *ifm; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in_ifaddr *ifa, **ifap; - int err = -EINVAL; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); - if (err < 0) - goto errout; - - ifm = nlmsg_data(nlh); - in_dev = inetdev_by_index(ifm->ifa_index); - if (in_dev == NULL) { - err = -ENODEV; - goto errout; - } - + if ((in_dev = inetdev_by_index(ifm->ifa_index)) == NULL) + goto out; __in_dev_put(in_dev); for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL; ifap = &ifa->ifa_next) { - if (tb[IFA_LOCAL] && - ifa->ifa_local != nla_get_u32(tb[IFA_LOCAL])) - continue; - - if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label)) - continue; - - if (tb[IFA_ADDRESS] && - (ifm->ifa_prefixlen != ifa->ifa_prefixlen || - !inet_ifa_match(nla_get_u32(tb[IFA_ADDRESS]), ifa))) + if ((rta[IFA_LOCAL - 1] && + memcmp(RTA_DATA(rta[IFA_LOCAL - 1]), + &ifa->ifa_local, 4)) || + (rta[IFA_LABEL - 1] && + rtattr_strcmp(rta[IFA_LABEL - 1], ifa->ifa_label)) || + (rta[IFA_ADDRESS - 1] && + (ifm->ifa_prefixlen != ifa->ifa_prefixlen || + !inet_ifa_match(*(u32*)RTA_DATA(rta[IFA_ADDRESS - 1]), + ifa)))) continue; - - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + inet_del_ifa(in_dev, ifap, 1); return 0; } - - err = -EADDRNOTAVAIL; -errout: - return err; +out: + return -EADDRNOTAVAIL; } -static struct in_ifaddr *rtm_to_ifaddr(struct nlmsghdr *nlh) +static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct nlattr *tb[IFA_MAX+1]; - struct in_ifaddr *ifa; - struct ifaddrmsg *ifm; + struct rtattr **rta = arg; struct net_device *dev; struct in_device *in_dev; - int err = -EINVAL; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); + struct in_ifaddr *ifa; + int rc = -EINVAL; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy); - if (err < 0) - goto errout; + ASSERT_RTNL(); - ifm = nlmsg_data(nlh); - if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL) - goto errout; + if (ifm->ifa_prefixlen > 32 || !rta[IFA_LOCAL - 1]) + goto out; - dev = __dev_get_by_index(ifm->ifa_index); - if (dev == NULL) { - err = -ENODEV; - goto errout; - } + rc = -ENODEV; + if ((dev = __dev_get_by_index(ifm->ifa_index)) == NULL) + goto out; - in_dev = __in_dev_get_rtnl(dev); - if (in_dev == NULL) { + rc = -ENOBUFS; + if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) { in_dev = inetdev_init(dev); - if (in_dev == NULL) { - err = -ENOBUFS; - goto errout; - } - } - - ifa = inet_alloc_ifa(); - if (ifa == NULL) { - /* - * A potential indev allocation can be left alive, it stays - * assigned to its device and is destroy with it. - */ - err = -ENOBUFS; - goto errout; + if (!in_dev) + goto out; } - in_dev_hold(in_dev); - - if (tb[IFA_ADDRESS] == NULL) - tb[IFA_ADDRESS] = tb[IFA_LOCAL]; + if ((ifa = inet_alloc_ifa()) == NULL) + goto out; + if (!rta[IFA_ADDRESS - 1]) + rta[IFA_ADDRESS - 1] = rta[IFA_LOCAL - 1]; + memcpy(&ifa->ifa_local, RTA_DATA(rta[IFA_LOCAL - 1]), 4); + memcpy(&ifa->ifa_address, RTA_DATA(rta[IFA_ADDRESS - 1]), 4); ifa->ifa_prefixlen = ifm->ifa_prefixlen; ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen); + if (rta[IFA_BROADCAST - 1]) + memcpy(&ifa->ifa_broadcast, + RTA_DATA(rta[IFA_BROADCAST - 1]), 4); + if (rta[IFA_ANYCAST - 1]) + memcpy(&ifa->ifa_anycast, RTA_DATA(rta[IFA_ANYCAST - 1]), 4); ifa->ifa_flags = ifm->ifa_flags; ifa->ifa_scope = ifm->ifa_scope; - ifa->ifa_dev = in_dev; - - ifa->ifa_local = nla_get_u32(tb[IFA_LOCAL]); - ifa->ifa_address = nla_get_u32(tb[IFA_ADDRESS]); - - if (tb[IFA_BROADCAST]) - ifa->ifa_broadcast = nla_get_u32(tb[IFA_BROADCAST]); - - if (tb[IFA_ANYCAST]) - ifa->ifa_anycast = nla_get_u32(tb[IFA_ANYCAST]); - - if (tb[IFA_LABEL]) - nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ); + in_dev_hold(in_dev); + ifa->ifa_dev = in_dev; + if (rta[IFA_LABEL - 1]) + rtattr_strlcpy(ifa->ifa_label, rta[IFA_LABEL - 1], IFNAMSIZ); else memcpy(ifa->ifa_label, dev->name, IFNAMSIZ); - return ifa; - -errout: - return ERR_PTR(err); -} - -static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) -{ - struct in_ifaddr *ifa; - - ASSERT_RTNL(); - - ifa = rtm_to_ifaddr(nlh); - if (IS_ERR(ifa)) - return PTR_ERR(ifa); - - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + rc = inet_insert_ifa(ifa); +out: + return rc; } /* @@ -1125,37 +1056,32 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; + unsigned char *b = skb->tail; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); - if (nlh == NULL) - return -ENOBUFS; - - ifm = nlmsg_data(nlh); + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); + ifm = NLMSG_DATA(nlh); ifm->ifa_family = AF_INET; ifm->ifa_prefixlen = ifa->ifa_prefixlen; ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT; ifm->ifa_scope = ifa->ifa_scope; ifm->ifa_index = ifa->ifa_dev->dev->ifindex; - if (ifa->ifa_address) - NLA_PUT_U32(skb, IFA_ADDRESS, ifa->ifa_address); - + RTA_PUT(skb, IFA_ADDRESS, 4, &ifa->ifa_address); if (ifa->ifa_local) - NLA_PUT_U32(skb, IFA_LOCAL, ifa->ifa_local); - + RTA_PUT(skb, IFA_LOCAL, 4, &ifa->ifa_local); if (ifa->ifa_broadcast) - NLA_PUT_U32(skb, IFA_BROADCAST, ifa->ifa_broadcast); - + RTA_PUT(skb, IFA_BROADCAST, 4, &ifa->ifa_broadcast); if (ifa->ifa_anycast) - NLA_PUT_U32(skb, IFA_ANYCAST, ifa->ifa_anycast); - + RTA_PUT(skb, IFA_ANYCAST, 4, &ifa->ifa_anycast); if (ifa->ifa_label[0]) - NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label); - - return nlmsg_end(skb, nlh); + RTA_PUT(skb, IFA_LABEL, IFNAMSIZ, &ifa->ifa_label); + nlh->nlmsg_len = skb->tail - b; + return skb->len; -nla_put_failure: - return nlmsg_cancel(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) @@ -1201,27 +1127,19 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh, - u32 pid) +static void rtmsg_ifa(int event, struct in_ifaddr* ifa) { - struct sk_buff *skb; - u32 seq = nlh ? nlh->nlmsg_seq : 0; - int err = -ENOBUFS; - - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) - goto errout; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); - if (err < 0) { + if (!skb) + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, ENOBUFS); + else if (inet_fill_ifaddr(skb, ifa, 0, 0, event, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_IFADDR, EINVAL); + } else { + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_IFADDR, GFP_KERNEL); } - - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_IFADDR, err); } static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { @@ -1233,7 +1151,9 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, + [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, + [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, #endif }; diff --git a/trunk/net/ipv4/esp4.c b/trunk/net/ipv4/esp4.c index 13b29360d102..b428489f6ccd 100644 --- a/trunk/net/ipv4/esp4.c +++ b/trunk/net/ipv4/esp4.c @@ -95,13 +95,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) { - if (unlikely(!esp->conf.ivinitted)) { - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); - esp->conf.ivinitted = 1; - } + if (esp->conf.ivlen) crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -253,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (x->props.mode == XFRM_MODE_TRANSPORT) + if (!x->props.mode) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -272,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -383,12 +378,12 @@ static int esp_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - esp->conf.ivinitted = 0; + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/trunk/net/ipv4/fib_frontend.c b/trunk/net/ipv4/fib_frontend.c index cfb527c060e4..ba2a70745a63 100644 --- a/trunk/net/ipv4/fib_frontend.c +++ b/trunk/net/ipv4/fib_frontend.c @@ -32,12 +32,10 @@ #include #include #include -#include #include #include #include #include -#include #include #include @@ -52,67 +50,48 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES +#define RT_TABLE_MIN RT_TABLE_MAIN + struct fib_table *ip_fib_local_table; struct fib_table *ip_fib_main_table; -#define FIB_TABLE_HASHSZ 1 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; - #else -#define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; +#define RT_TABLE_MIN 1 -struct fib_table *fib_new_table(u32 id) +struct fib_table *fib_tables[RT_TABLE_MAX+1]; + +struct fib_table *__fib_new_table(int id) { struct fib_table *tb; - unsigned int h; - if (id == 0) - id = RT_TABLE_MAIN; - tb = fib_get_table(id); - if (tb) - return tb; tb = fib_hash_init(id); if (!tb) return NULL; - h = id & (FIB_TABLE_HASHSZ - 1); - hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); + fib_tables[id] = tb; return tb; } -struct fib_table *fib_get_table(u32 id) -{ - struct fib_table *tb; - struct hlist_node *node; - unsigned int h; - if (id == 0) - id = RT_TABLE_MAIN; - h = id & (FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { - if (tb->tb_id == id) { - rcu_read_unlock(); - return tb; - } - } - rcu_read_unlock(); - return NULL; -} #endif /* CONFIG_IP_MULTIPLE_TABLES */ + static void fib_flush(void) { int flushed = 0; +#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - struct hlist_node *node; - unsigned int h; + int id; - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) - flushed += tb->tb_flush(tb); + for (id = RT_TABLE_MAX; id>0; id--) { + if ((tb = fib_get_table(id))==NULL) + continue; + flushed += tb->tb_flush(tb); } +#else /* CONFIG_IP_MULTIPLE_TABLES */ + flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); + flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); +#endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) rt_cache_flush(-1); @@ -253,190 +232,42 @@ int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, #ifndef CONFIG_IP_NOSIOCRT -static inline u32 sk_extract_addr(struct sockaddr *addr) -{ - return ((struct sockaddr_in *) addr)->sin_addr.s_addr; -} - -static int put_rtax(struct nlattr *mx, int len, int type, u32 value) -{ - struct nlattr *nla; - - nla = (struct nlattr *) ((char *) mx + len); - nla->nla_type = type; - nla->nla_len = nla_attr_size(4); - *(u32 *) nla_data(nla) = value; - - return len + nla_total_size(4); -} - -static int rtentry_to_fib_config(int cmd, struct rtentry *rt, - struct fib_config *cfg) -{ - u32 addr; - int plen; - - memset(cfg, 0, sizeof(*cfg)); - - if (rt->rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - - /* - * Check mask for validity: - * a) it must be contiguous. - * b) destination must have all host bits clear. - * c) if application forgot to set correct family (AF_INET), - * reject request unless it is absolutely clear i.e. - * both family and mask are zero. - */ - plen = 32; - addr = sk_extract_addr(&rt->rt_dst); - if (!(rt->rt_flags & RTF_HOST)) { - u32 mask = sk_extract_addr(&rt->rt_genmask); - - if (rt->rt_genmask.sa_family != AF_INET) { - if (mask || rt->rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - - if (bad_mask(mask, addr)) - return -EINVAL; - - plen = inet_mask_len(mask); - } - - cfg->fc_dst_len = plen; - cfg->fc_dst = addr; - - if (cmd != SIOCDELRT) { - cfg->fc_nlflags = NLM_F_CREATE; - cfg->fc_protocol = RTPROT_BOOT; - } - - if (rt->rt_metric) - cfg->fc_priority = rt->rt_metric - 1; - - if (rt->rt_flags & RTF_REJECT) { - cfg->fc_scope = RT_SCOPE_HOST; - cfg->fc_type = RTN_UNREACHABLE; - return 0; - } - - cfg->fc_scope = RT_SCOPE_NOWHERE; - cfg->fc_type = RTN_UNICAST; - - if (rt->rt_dev) { - char *colon; - struct net_device *dev; - char devname[IFNAMSIZ]; - - if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) - return -EFAULT; - - devname[IFNAMSIZ-1] = 0; - colon = strchr(devname, ':'); - if (colon) - *colon = 0; - dev = __dev_get_by_name(devname); - if (!dev) - return -ENODEV; - cfg->fc_oif = dev->ifindex; - if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - return -ENODEV; - *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) - if (strcmp(ifa->ifa_label, devname) == 0) - break; - if (ifa == NULL) - return -ENODEV; - cfg->fc_prefsrc = ifa->ifa_local; - } - } - - addr = sk_extract_addr(&rt->rt_gateway); - if (rt->rt_gateway.sa_family == AF_INET && addr) { - cfg->fc_gw = addr; - if (rt->rt_flags & RTF_GATEWAY && - inet_addr_type(addr) == RTN_UNICAST) - cfg->fc_scope = RT_SCOPE_UNIVERSE; - } - - if (cmd == SIOCDELRT) - return 0; - - if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) - return -EINVAL; - - if (cfg->fc_scope == RT_SCOPE_NOWHERE) - cfg->fc_scope = RT_SCOPE_LINK; - - if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { - struct nlattr *mx; - int len = 0; - - mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); - if (mx == NULL) - return -ENOMEM; - - if (rt->rt_flags & RTF_MTU) - len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); - - if (rt->rt_flags & RTF_WINDOW) - len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); - - if (rt->rt_flags & RTF_IRTT) - len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); - - cfg->fc_mx = mx; - cfg->fc_mx_len = len; - } - - return 0; -} - /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void __user *arg) { - struct fib_config cfg; - struct rtentry rt; int err; + struct kern_rta rta; + struct rtentry r; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - - if (copy_from_user(&rt, arg, sizeof(rt))) + if (copy_from_user(&r, arg, sizeof(struct rtentry))) return -EFAULT; - rtnl_lock(); - err = rtentry_to_fib_config(cmd, &rt, &cfg); + err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); if (err == 0) { - struct fib_table *tb; - if (cmd == SIOCDELRT) { - tb = fib_get_table(cfg.fc_table); + struct fib_table *tb = fib_get_table(req.rtm.rtm_table); + err = -ESRCH; if (tb) - err = tb->tb_delete(tb, &cfg); - else - err = -ESRCH; + err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); } else { - tb = fib_new_table(cfg.fc_table); + struct fib_table *tb = fib_new_table(req.rtm.rtm_table); + err = -ENOBUFS; if (tb) - err = tb->tb_insert(tb, &cfg); - else - err = -ENOBUFS; + err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); } - - /* allocated by rtentry_to_fib_config() */ - kfree(cfg.fc_mx); + kfree(rta.rta_mx); } rtnl_unlock(); return err; @@ -453,169 +284,77 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { - [RTA_DST] = { .type = NLA_U32 }, - [RTA_SRC] = { .type = NLA_U32 }, - [RTA_IIF] = { .type = NLA_U32 }, - [RTA_OIF] = { .type = NLA_U32 }, - [RTA_GATEWAY] = { .type = NLA_U32 }, - [RTA_PRIORITY] = { .type = NLA_U32 }, - [RTA_PREFSRC] = { .type = NLA_U32 }, - [RTA_METRICS] = { .type = NLA_NESTED }, - [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, - [RTA_PROTOINFO] = { .type = NLA_U32 }, - [RTA_FLOW] = { .type = NLA_U32 }, - [RTA_MP_ALGO] = { .type = NLA_U32 }, -}; - -static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct fib_config *cfg) +static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) { - struct nlattr *attr; - int err, remaining; - struct rtmsg *rtm; - - err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); - if (err < 0) - goto errout; - - memset(cfg, 0, sizeof(*cfg)); - - rtm = nlmsg_data(nlh); - cfg->fc_family = rtm->rtm_family; - cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_src_len = rtm->rtm_src_len; - cfg->fc_tos = rtm->rtm_tos; - cfg->fc_table = rtm->rtm_table; - cfg->fc_protocol = rtm->rtm_protocol; - cfg->fc_scope = rtm->rtm_scope; - cfg->fc_type = rtm->rtm_type; - cfg->fc_flags = rtm->rtm_flags; - cfg->fc_nlflags = nlh->nlmsg_flags; - - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; - cfg->fc_nlinfo.nlh = nlh; - - nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { - switch (attr->nla_type) { - case RTA_DST: - cfg->fc_dst = nla_get_u32(attr); - break; - case RTA_SRC: - cfg->fc_src = nla_get_u32(attr); - break; - case RTA_OIF: - cfg->fc_oif = nla_get_u32(attr); - break; - case RTA_GATEWAY: - cfg->fc_gw = nla_get_u32(attr); - break; - case RTA_PRIORITY: - cfg->fc_priority = nla_get_u32(attr); - break; - case RTA_PREFSRC: - cfg->fc_prefsrc = nla_get_u32(attr); - break; - case RTA_METRICS: - cfg->fc_mx = nla_data(attr); - cfg->fc_mx_len = nla_len(attr); - break; - case RTA_MULTIPATH: - cfg->fc_mp = nla_data(attr); - cfg->fc_mp_len = nla_len(attr); - break; - case RTA_FLOW: - cfg->fc_flow = nla_get_u32(attr); - break; - case RTA_MP_ALGO: - cfg->fc_mp_alg = nla_get_u32(attr); - break; - case RTA_TABLE: - cfg->fc_table = nla_get_u32(attr); - break; + int i; + + for (i=1; i<=RTA_MAX; i++, rta++) { + struct rtattr *attr = *rta; + if (attr) { + if (RTA_PAYLOAD(attr) < 4) + return -EINVAL; + if (i != RTA_MULTIPATH && i != RTA_METRICS) + *rta = (struct rtattr*)RTA_DATA(attr); } } - return 0; -errout: - return err; } int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_config cfg; - struct fib_table *tb; - int err; - - err = rtm_to_fib_config(skb, nlh, &cfg); - if (err < 0) - goto errout; + struct fib_table * tb; + struct rtattr **rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); - tb = fib_get_table(cfg.fc_table); - if (tb == NULL) { - err = -ESRCH; - goto errout; - } + if (inet_check_attr(r, rta)) + return -EINVAL; - err = tb->tb_delete(tb, &cfg); -errout: - return err; + tb = fib_get_table(r->rtm_table); + if (tb) + return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); + return -ESRCH; } int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_config cfg; - struct fib_table *tb; - int err; - - err = rtm_to_fib_config(skb, nlh, &cfg); - if (err < 0) - goto errout; + struct fib_table * tb; + struct rtattr **rta = arg; + struct rtmsg *r = NLMSG_DATA(nlh); - tb = fib_new_table(cfg.fc_table); - if (tb == NULL) { - err = -ENOBUFS; - goto errout; - } + if (inet_check_attr(r, rta)) + return -EINVAL; - err = tb->tb_insert(tb, &cfg); -errout: - return err; + tb = fib_new_table(r->rtm_table); + if (tb) + return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); + return -ENOBUFS; } int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - unsigned int h, s_h; - unsigned int e = 0, s_e; + int t; + int s_t; struct fib_table *tb; - struct hlist_node *node; - int dumped = 0; - if (nlmsg_len(cb->nlh) >= sizeof(struct rtmsg) && - ((struct rtmsg *) nlmsg_data(cb->nlh))->rtm_flags & RTM_F_CLONED) + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return ip_rt_dump(skb, cb); - s_h = cb->args[0]; - s_e = cb->args[1]; - - for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { - e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { - if (e < s_e) - goto next; - if (dumped) - memset(&cb->args[2], 0, sizeof(cb->args) - - 2 * sizeof(cb->args[0])); - if (tb->tb_dump(tb, skb, cb) < 0) - goto out; - dumped = 1; -next: - e++; - } + s_t = cb->args[0]; + if (s_t == 0) + s_t = cb->args[0] = RT_TABLE_MIN; + + for (t=s_t; t<=RT_TABLE_MAX; t++) { + if (t < s_t) continue; + if (t > s_t) + memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); + if ((tb = fib_get_table(t))==NULL) + continue; + if (tb->tb_dump(tb, skb, cb) < 0) + break; } -out: - cb->args[1] = e; - cb->args[0] = h; + + cb->args[0] = t; return skb->len; } @@ -627,19 +366,17 @@ int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, - struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) { - struct fib_table *tb; - struct fib_config cfg = { - .fc_protocol = RTPROT_KERNEL, - .fc_type = type, - .fc_dst = dst, - .fc_dst_len = dst_len, - .fc_prefsrc = ifa->ifa_local, - .fc_oif = ifa->ifa_dev->dev->ifindex, - .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, - }; + struct fib_table * tb; + struct { + struct nlmsghdr nlh; + struct rtmsg rtm; + } req; + struct kern_rta rta; + + memset(&req.rtm, 0, sizeof(req.rtm)); + memset(&rta, 0, sizeof(rta)); if (type == RTN_UNICAST) tb = fib_new_table(RT_TABLE_MAIN); @@ -649,17 +386,26 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, if (tb == NULL) return; - cfg.fc_table = tb->tb_id; + req.nlh.nlmsg_len = sizeof(req); + req.nlh.nlmsg_type = cmd; + req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; + req.nlh.nlmsg_pid = 0; + req.nlh.nlmsg_seq = 0; - if (type != RTN_LOCAL) - cfg.fc_scope = RT_SCOPE_LINK; - else - cfg.fc_scope = RT_SCOPE_HOST; + req.rtm.rtm_dst_len = dst_len; + req.rtm.rtm_table = tb->tb_id; + req.rtm.rtm_protocol = RTPROT_KERNEL; + req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); + req.rtm.rtm_type = type; + + rta.rta_dst = &dst; + rta.rta_prefsrc = &ifa->ifa_local; + rta.rta_oif = &ifa->ifa_dev->dev->ifindex; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &cfg); + tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); else - tb->tb_delete(tb, &cfg); + tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); } void fib_add_ifaddr(struct in_ifaddr *ifa) @@ -906,17 +652,11 @@ static struct notifier_block fib_netdev_notifier = { void __init ip_fib_init(void) { - unsigned int i; - - for (i = 0; i < FIB_TABLE_HASHSZ; i++) - INIT_HLIST_HEAD(&fib_table_hash[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); - hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); - hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); #else - fib4_rules_init(); + fib_rules_init(); #endif register_netdevice_notifier(&fib_netdev_notifier); diff --git a/trunk/net/ipv4/fib_hash.c b/trunk/net/ipv4/fib_hash.c index 88133b383dc5..72c633b357cf 100644 --- a/trunk/net/ipv4/fib_hash.c +++ b/trunk/net/ipv4/fib_hash.c @@ -379,39 +379,42 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) +static int +fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f, *f; struct fib_alias *fa, *new_fa; struct fn_zone *fz; struct fib_info *fi; - u8 tos = cfg->fc_tos; + int z = r->rtm_dst_len; + int type = r->rtm_type; + u8 tos = r->rtm_tos; u32 key; int err; - if (cfg->fc_dst_len > 32) + if (z > 32) return -EINVAL; - - fz = table->fn_zones[cfg->fc_dst_len]; - if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) + fz = table->fn_zones[z]; + if (!fz && !(fz = fn_new_zone(table, z))) return -ENOBUFS; key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(cfg->fc_dst, fz); + key = fz_key(dst, fz); } - fi = fib_create_info(cfg); - if (IS_ERR(fi)) - return PTR_ERR(fi); + if ((fi = fib_create_info(r, rta, n, &err)) == NULL) + return err; if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && - (cfg->fc_dst_len == 32 || - (1 << cfg->fc_dst_len) > fz->fz_divisor)) + (z==32 || (1< fz->fz_divisor)) fn_rehash_zone(fz); f = fib_find_node(fz, key); @@ -437,18 +440,18 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) struct fib_alias *fa_orig; err = -EEXIST; - if (cfg->fc_nlflags & NLM_F_EXCL) + if (n->nlmsg_flags & NLM_F_EXCL) goto out; - if (cfg->fc_nlflags & NLM_F_REPLACE) { + if (n->nlmsg_flags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; - fa->fa_type = cfg->fc_type; - fa->fa_scope = cfg->fc_scope; + fa->fa_type = type; + fa->fa_scope = r->rtm_scope; state = fa->fa_state; fa->fa_state &= ~FA_S_ACCESSED; fib_hash_genid++; @@ -471,17 +474,17 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && + if (fa->fa_type == type && + fa->fa_scope == r->rtm_scope && fa->fa_info == fi) goto out; } - if (!(cfg->fc_nlflags & NLM_F_APPEND)) + if (!(n->nlmsg_flags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(cfg->fc_nlflags & NLM_F_CREATE)) + if (!(n->nlmsg_flags&NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -503,8 +506,8 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; + new_fa->fa_type = type; + new_fa->fa_scope = r->rtm_scope; new_fa->fa_state = 0; /* @@ -523,8 +526,7 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, - &cfg->fc_nlinfo); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); return 0; out_free_new_fa: @@ -535,25 +537,30 @@ static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) } -static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) +static int +fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; + int z = r->rtm_dst_len; struct fn_zone *fz; u32 key; + u8 tos = r->rtm_tos; - if (cfg->fc_dst_len > 32) + if (z > 32) return -EINVAL; - - if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) + if ((fz = table->fn_zones[z]) == NULL) return -ESRCH; key = 0; - if (cfg->fc_dst) { - if (cfg->fc_dst & ~FZ_MASK(fz)) + if (rta->rta_dst) { + u32 dst; + memcpy(&dst, rta->rta_dst, 4); + if (dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(cfg->fc_dst, fz); + key = fz_key(dst, fz); } f = fib_find_node(fz, key); @@ -561,7 +568,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) if (!f) fa = NULL; else - fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); + fa = fib_find_alias(&f->fn_alias, tos, 0); if (!fa) return -ESRCH; @@ -570,16 +577,16 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != cfg->fc_tos) + if (fa->fa_tos != tos) break; - if ((!cfg->fc_type || - fa->fa_type == cfg->fc_type) && - (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && - (!cfg->fc_protocol || - fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { + if ((!r->rtm_type || + fa->fa_type == r->rtm_type) && + (r->rtm_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == r->rtm_scope) && + (!r->rtm_protocol || + fi->fib_protocol == r->rtm_protocol) && + fib_nh_match(r, n, rta, fi) == 0) { fa_to_delete = fa; break; } @@ -589,8 +596,7 @@ static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, - tb->tb_id, &cfg->fc_nlinfo); + rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); kill_fn = 0; write_lock_bh(&fib_hash_lock); @@ -678,7 +684,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, struct fib_node *f; int i, s_i; - s_i = cb->args[4]; + s_i = cb->args[3]; i = 0; hlist_for_each_entry(f, node, head, fn_hash) { struct fib_alias *fa; @@ -693,19 +699,19 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, tb->tb_id, fa->fa_type, fa->fa_scope, - f->fn_key, + &f->fn_key, fz->fz_order, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[4] = i; + cb->args[3] = i; return -1; } next: i++; } } - cb->args[4] = i; + cb->args[3] = i; return skb->len; } @@ -716,21 +722,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, { int h, s_h; - s_h = cb->args[3]; + s_h = cb->args[2]; for (h=0; h < fz->fz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[4], 0, - sizeof(cb->args) - 4*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fz->fz_hash == NULL || hlist_empty(&fz->fz_hash[h])) continue; if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { - cb->args[3] = h; + cb->args[2] = h; return -1; } } - cb->args[3] = h; + cb->args[2] = h; return skb->len; } @@ -740,28 +746,28 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin struct fn_zone *fz; struct fn_hash *table = (struct fn_hash*)tb->tb_data; - s_m = cb->args[2]; + s_m = cb->args[1]; read_lock(&fib_hash_lock); for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[2], 0, + sizeof(cb->args) - 2*sizeof(cb->args[0])); if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[2] = m; + cb->args[1] = m; read_unlock(&fib_hash_lock); return -1; } } read_unlock(&fib_hash_lock); - cb->args[2] = m; + cb->args[1] = m; return skb->len; } #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(u32 id) +struct fib_table * fib_hash_init(int id) #else -struct fib_table * __init fib_hash_init(u32 id) +struct fib_table * __init fib_hash_init(int id) #endif { struct fib_table *tb; diff --git a/trunk/net/ipv4/fib_lookup.h b/trunk/net/ipv4/fib_lookup.h index fd6f7769f8ab..ef6609ea0eb7 100644 --- a/trunk/net/ipv4/fib_lookup.h +++ b/trunk/net/ipv4/fib_lookup.h @@ -23,14 +23,19 @@ extern int fib_semantic_match(struct list_head *head, struct fib_result *res, __u32 zone, __u32 mask, int prefixlen); extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(struct fib_config *cfg); -extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); +extern struct fib_info *fib_create_info(const struct rtmsg *r, + struct kern_rta *rta, + const struct nlmsghdr *, + int *err); +extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, + struct kern_rta *rta, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, u32 dst, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info); + int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/trunk/net/ipv4/fib_rules.c b/trunk/net/ipv4/fib_rules.c index 52b2adae4f22..79b04718bdfd 100644 --- a/trunk/net/ipv4/fib_rules.c +++ b/trunk/net/ipv4/fib_rules.c @@ -5,8 +5,9 @@ * * IPv4 Forwarding Information Base: policy rules. * + * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ + * * Authors: Alexey Kuznetsov, - * Thomas Graf * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,350 +19,463 @@ * Marc Boucher : routing by fwmark */ +#include +#include +#include #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include +#include #include -#include #include #include #include + #include +#include #include #include +#include #include -#include -static struct fib_rules_ops fib4_rules_ops; +#define FRprintk(a...) -struct fib4_rule +struct fib_rule { - struct fib_rule common; - u8 dst_len; - u8 src_len; - u8 tos; - u32 src; - u32 srcmask; - u32 dst; - u32 dstmask; + struct hlist_node hlist; + atomic_t r_clntref; + u32 r_preference; + unsigned char r_table; + unsigned char r_action; + unsigned char r_dst_len; + unsigned char r_src_len; + u32 r_src; + u32 r_srcmask; + u32 r_dst; + u32 r_dstmask; + u32 r_srcmap; + u8 r_flags; + u8 r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; + u32 r_fwmark; #endif + int r_ifindex; #ifdef CONFIG_NET_CLS_ROUTE - u32 tclassid; + __u32 r_tclassid; #endif + char r_ifname[IFNAMSIZ]; + int r_dead; + struct rcu_head rcu; }; -static struct fib4_rule default_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFF, - .table = RT_TABLE_DEFAULT, - .action = FR_ACT_TO_TBL, - }, +static struct fib_rule default_rule = { + .r_clntref = ATOMIC_INIT(2), + .r_preference = 0x7FFF, + .r_table = RT_TABLE_DEFAULT, + .r_action = RTN_UNICAST, }; -static struct fib4_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .table = RT_TABLE_MAIN, - .action = FR_ACT_TO_TBL, - }, +static struct fib_rule main_rule = { + .r_clntref = ATOMIC_INIT(2), + .r_preference = 0x7FFE, + .r_table = RT_TABLE_MAIN, + .r_action = RTN_UNICAST, }; -static struct fib4_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .table = RT_TABLE_LOCAL, - .action = FR_ACT_TO_TBL, - .flags = FIB_RULE_PERMANENT, - }, +static struct fib_rule local_rule = { + .r_clntref = ATOMIC_INIT(2), + .r_table = RT_TABLE_LOCAL, + .r_action = RTN_UNICAST, }; -static LIST_HEAD(fib4_rules); +static struct hlist_head fib_rules; -#ifdef CONFIG_NET_CLS_ROUTE -u32 fib_rules_tclass(struct fib_result *res) -{ - return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; -} -#endif - -int fib_lookup(struct flowi *flp, struct fib_result *res) -{ - struct fib_lookup_arg arg = { - .result = res, - }; - int err; +/* writer func called from netlink -- rtnl_sem hold*/ - err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); - res->r = arg.rule; +static void rtmsg_rule(int, struct fib_rule *); +int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r; + struct hlist_node *node; + int err = -ESRCH; + + hlist_for_each_entry(r, node, &fib_rules, hlist) { + if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && + rtm->rtm_src_len == r->r_src_len && + rtm->rtm_dst_len == r->r_dst_len && + (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && + rtm->rtm_tos == r->r_tos && +#ifdef CONFIG_IP_ROUTE_FWMARK + (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && +#endif + (!rtm->rtm_type || rtm->rtm_type == r->r_action) && + (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && + (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && + (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { + err = -EPERM; + if (r == &local_rule) + break; + + hlist_del_rcu(&r->hlist); + r->r_dead = 1; + rtmsg_rule(RTM_DELRULE, r); + fib_rule_put(r); + err = 0; + break; + } + } return err; } -static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) -{ - int err = -EAGAIN; - struct fib_table *tbl; +/* Allocate new unique table id */ - switch (rule->action) { - case FR_ACT_TO_TBL: - break; +static struct fib_table *fib_empty_table(void) +{ + int id; - case FR_ACT_UNREACHABLE: - err = -ENETUNREACH; - goto errout; + for (id = 1; id <= RT_TABLE_MAX; id++) + if (fib_tables[id] == NULL) + return __fib_new_table(id); + return NULL; +} - case FR_ACT_PROHIBIT: - err = -EACCES; - goto errout; +static inline void fib_rule_put_rcu(struct rcu_head *head) +{ + struct fib_rule *r = container_of(head, struct fib_rule, rcu); + kfree(r); +} - case FR_ACT_BLACKHOLE: - default: - err = -EINVAL; - goto errout; +void fib_rule_put(struct fib_rule *r) +{ + if (atomic_dec_and_test(&r->r_clntref)) { + if (r->r_dead) + call_rcu(&r->rcu, fib_rule_put_rcu); + else + printk("Freeing alive rule %p\n", r); } - - if ((tbl = fib_get_table(rule->table)) == NULL) - goto errout; - - err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); - if (err > 0) - err = -EAGAIN; -errout: - return err; } +/* writer func called from netlink -- rtnl_sem hold*/ -void fib_select_default(const struct flowi *flp, struct fib_result *res) +int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - if (res->r && res->r->action == FR_ACT_TO_TBL && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(res->r->table)) != NULL) - tb->tb_select_default(tb, flp, res); + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); + struct fib_rule *r, *new_r, *last = NULL; + struct hlist_node *node = NULL; + unsigned char table_id; + + if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || + (rtm->rtm_tos & ~IPTOS_TOS_MASK)) + return -EINVAL; + + if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) + return -EINVAL; + + table_id = rtm->rtm_table; + if (table_id == RT_TABLE_UNSPEC) { + struct fib_table *table; + if (rtm->rtm_type == RTN_UNICAST) { + if ((table = fib_empty_table()) == NULL) + return -ENOBUFS; + table_id = table->tb_id; + } } -} -static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) -{ - struct fib4_rule *r = (struct fib4_rule *) rule; - u32 daddr = fl->fl4_dst; - u32 saddr = fl->fl4_src; + new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); + if (!new_r) + return -ENOMEM; + + if (rta[RTA_SRC-1]) + memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); + if (rta[RTA_DST-1]) + memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); + if (rta[RTA_GATEWAY-1]) + memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); + new_r->r_src_len = rtm->rtm_src_len; + new_r->r_dst_len = rtm->rtm_dst_len; + new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); + new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); + new_r->r_tos = rtm->rtm_tos; +#ifdef CONFIG_IP_ROUTE_FWMARK + if (rta[RTA_PROTOINFO-1]) + memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); +#endif + new_r->r_action = rtm->rtm_type; + new_r->r_flags = rtm->rtm_flags; + if (rta[RTA_PRIORITY-1]) + memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + new_r->r_table = table_id; + if (rta[RTA_IIF-1]) { + struct net_device *dev; + rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); + new_r->r_ifindex = -1; + dev = __dev_get_by_name(new_r->r_ifname); + if (dev) + new_r->r_ifindex = dev->ifindex; + } +#ifdef CONFIG_NET_CLS_ROUTE + if (rta[RTA_FLOW-1]) + memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); +#endif + r = container_of(fib_rules.first, struct fib_rule, hlist); - if (((saddr ^ r->src) & r->srcmask) || - ((daddr ^ r->dst) & r->dstmask)) - return 0; + if (!new_r->r_preference) { + if (r && r->hlist.next != NULL) { + r = container_of(r->hlist.next, struct fib_rule, hlist); + if (r->r_preference) + new_r->r_preference = r->r_preference - 1; + } + } - if (r->tos && (r->tos != fl->fl4_tos)) - return 0; + hlist_for_each_entry(r, node, &fib_rules, hlist) { + if (r->r_preference > new_r->r_preference) + break; + last = r; + } + atomic_inc(&new_r->r_clntref); -#ifdef CONFIG_IP_ROUTE_FWMARK - if ((r->fwmark ^ fl->fl4_fwmark) & r->fwmask) - return 0; -#endif + if (last) + hlist_add_after_rcu(&last->hlist, &new_r->hlist); + else + hlist_add_before_rcu(&new_r->hlist, &r->hlist); - return 1; + rtmsg_rule(RTM_NEWRULE, new_r); + return 0; } -static struct fib_table *fib_empty_table(void) +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) { - u32 id; - - for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_get_table(id) == NULL) - return fib_new_table(id); - return NULL; + if (res->r) + return res->r->r_tclassid; + return 0; } +#endif -static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, - [FRA_SRC] = { .type = NLA_U32 }, - [FRA_DST] = { .type = NLA_U32 }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_FLOW] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, -}; +/* callers should hold rtnl semaphore */ -static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, - struct nlattr **tb) +static void fib_rules_detach(struct net_device *dev) { - int err = -EINVAL; - struct fib4_rule *rule4 = (struct fib4_rule *) rule; - - if (frh->src_len > 32 || frh->dst_len > 32 || - (frh->tos & ~IPTOS_TOS_MASK)) - goto errout; - - if (rule->table == RT_TABLE_UNSPEC) { - if (rule->action == FR_ACT_TO_TBL) { - struct fib_table *table; + struct hlist_node *node; + struct fib_rule *r; - table = fib_empty_table(); - if (table == NULL) { - err = -ENOBUFS; - goto errout; - } + hlist_for_each_entry(r, node, &fib_rules, hlist) { + if (r->r_ifindex == dev->ifindex) + r->r_ifindex = -1; - rule->table = table->tb_id; - } } +} - if (tb[FRA_SRC]) - rule4->src = nla_get_u32(tb[FRA_SRC]); +/* callers should hold rtnl semaphore */ - if (tb[FRA_DST]) - rule4->dst = nla_get_u32(tb[FRA_DST]); +static void fib_rules_attach(struct net_device *dev) +{ + struct hlist_node *node; + struct fib_rule *r; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (rule4->fwmark) - /* compatibility: if the mark value is non-zero all bits - * are compared unless a mask is explicitly specified. - */ - rule4->fwmask = 0xFFFFFFFF; + hlist_for_each_entry(r, node, &fib_rules, hlist) { + if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) + r->r_ifindex = dev->ifindex; } - - if (tb[FRA_FWMASK]) - rule4->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif - -#ifdef CONFIG_NET_CLS_ROUTE - if (tb[FRA_FLOW]) - rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); -#endif - - rule4->src_len = frh->src_len; - rule4->srcmask = inet_make_mask(rule4->src_len); - rule4->dst_len = frh->dst_len; - rule4->dstmask = inet_make_mask(rule4->dst_len); - rule4->tos = frh->tos; - - err = 0; -errout: - return err; } -static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, - struct nlattr **tb) +int fib_lookup(const struct flowi *flp, struct fib_result *res) { - struct fib4_rule *rule4 = (struct fib4_rule *) rule; + int err; + struct fib_rule *r, *policy; + struct fib_table *tb; + struct hlist_node *node; - if (frh->src_len && (rule4->src_len != frh->src_len)) - return 0; + u32 daddr = flp->fl4_dst; + u32 saddr = flp->fl4_src; - if (frh->dst_len && (rule4->dst_len != frh->dst_len)) - return 0; +FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", + NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); - if (frh->tos && (rule4->tos != frh->tos)) - return 0; + rcu_read_lock(); + hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { + if (((saddr^r->r_src) & r->r_srcmask) || + ((daddr^r->r_dst) & r->r_dstmask) || + (r->r_tos && r->r_tos != flp->fl4_tos) || #ifdef CONFIG_IP_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (rule4->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; + (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || #endif + (r->r_ifindex && r->r_ifindex != flp->iif)) + continue; + +FRprintk("tb %d r %d ", r->r_table, r->r_action); + switch (r->r_action) { + case RTN_UNICAST: + policy = r; + break; + case RTN_UNREACHABLE: + rcu_read_unlock(); + return -ENETUNREACH; + default: + case RTN_BLACKHOLE: + rcu_read_unlock(); + return -EINVAL; + case RTN_PROHIBIT: + rcu_read_unlock(); + return -EACCES; + } -#ifdef CONFIG_NET_CLS_ROUTE - if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) - return 0; -#endif + if ((tb = fib_get_table(r->r_table)) == NULL) + continue; + err = tb->tb_lookup(tb, flp, res); + if (err == 0) { + res->r = policy; + if (policy) + atomic_inc(&policy->r_clntref); + rcu_read_unlock(); + return 0; + } + if (err < 0 && err != -EAGAIN) { + rcu_read_unlock(); + return err; + } + } +FRprintk("FAILURE\n"); + rcu_read_unlock(); + return -ENETUNREACH; +} - if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC]))) - return 0; +void fib_select_default(const struct flowi *flp, struct fib_result *res) +{ + if (res->r && res->r->r_action == RTN_UNICAST && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; + if ((tb = fib_get_table(res->r->r_table)) != NULL) + tb->tb_select_default(tb, flp, res); + } +} - if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST]))) - return 0; +static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +{ + struct net_device *dev = ptr; - return 1; + if (event == NETDEV_UNREGISTER) + fib_rules_detach(dev); + else if (event == NETDEV_REGISTER) + fib_rules_attach(dev); + return NOTIFY_DONE; } -static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) -{ - struct fib4_rule *rule4 = (struct fib4_rule *) rule; - frh->family = AF_INET; - frh->dst_len = rule4->dst_len; - frh->src_len = rule4->src_len; - frh->tos = rule4->tos; +static struct notifier_block fib_rules_notifier = { + .notifier_call =fib_rules_event, +}; +static __inline__ int inet_fill_rule(struct sk_buff *skb, + struct fib_rule *r, + u32 pid, u32 seq, int event, + unsigned int flags) +{ + struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); + rtm = NLMSG_DATA(nlh); + rtm->rtm_family = AF_INET; + rtm->rtm_dst_len = r->r_dst_len; + rtm->rtm_src_len = r->r_src_len; + rtm->rtm_tos = r->r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (rule4->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); - - if (rule4->fwmask || rule4->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, rule4->fwmask); + if (r->r_fwmark) + RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); #endif - - if (rule4->dst_len) - NLA_PUT_U32(skb, FRA_DST, rule4->dst); - - if (rule4->src_len) - NLA_PUT_U32(skb, FRA_SRC, rule4->src); - + rtm->rtm_table = r->r_table; + rtm->rtm_protocol = 0; + rtm->rtm_scope = 0; + rtm->rtm_type = r->r_action; + rtm->rtm_flags = r->r_flags; + + if (r->r_dst_len) + RTA_PUT(skb, RTA_DST, 4, &r->r_dst); + if (r->r_src_len) + RTA_PUT(skb, RTA_SRC, 4, &r->r_src); + if (r->r_ifname[0]) + RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); + if (r->r_preference) + RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); + if (r->r_srcmap) + RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); #ifdef CONFIG_NET_CLS_ROUTE - if (rule4->tclassid) - NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); + if (r->r_tclassid) + RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); #endif - return 0; + nlh->nlmsg_len = skb->tail - b; + return skb->len; -nla_put_failure: - return -ENOBUFS; +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } -int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +/* callers should hold rtnl semaphore */ + +static void rtmsg_rule(int event, struct fib_rule *r) { - return fib_rules_dump(skb, cb, AF_INET); + int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + + if (!skb) + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); + else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); + } else { + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); + } } -static u32 fib4_rule_default_pref(void) +int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { - struct list_head *pos; - struct fib_rule *rule; - - if (!list_empty(&fib4_rules)) { - pos = fib4_rules.next; - if (pos->next != &fib4_rules) { - rule = list_entry(pos->next, struct fib_rule, list); - if (rule->pref) - return rule->pref - 1; - } + int idx = 0; + int s_idx = cb->args[0]; + struct fib_rule *r; + struct hlist_node *node; + + rcu_read_lock(); + hlist_for_each_entry(r, node, &fib_rules, hlist) { + if (idx < s_idx) + goto next; + if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + RTM_NEWRULE, NLM_F_MULTI) < 0) + break; +next: + idx++; } + rcu_read_unlock(); + cb->args[0] = idx; - return 0; + return skb->len; } -static struct fib_rules_ops fib4_rules_ops = { - .family = AF_INET, - .rule_size = sizeof(struct fib4_rule), - .action = fib4_rule_action, - .match = fib4_rule_match, - .configure = fib4_rule_configure, - .compare = fib4_rule_compare, - .fill = fib4_rule_fill, - .default_pref = fib4_rule_default_pref, - .nlgroup = RTNLGRP_IPV4_RULE, - .policy = fib4_rule_policy, - .rules_list = &fib4_rules, - .owner = THIS_MODULE, -}; - -void __init fib4_rules_init(void) +void __init fib_rules_init(void) { - list_add_tail(&local_rule.common.list, &fib4_rules); - list_add_tail(&main_rule.common.list, &fib4_rules); - list_add_tail(&default_rule.common.list, &fib4_rules); - - fib_rules_register(&fib4_rules_ops); + INIT_HLIST_HEAD(&fib_rules); + hlist_add_head(&local_rule.hlist, &fib_rules); + hlist_add_after(&local_rule.hlist, &main_rule.hlist); + hlist_add_after(&main_rule.hlist, &default_rule.hlist); + register_netdevice_notifier(&fib_rules_notifier); } diff --git a/trunk/net/ipv4/fib_semantics.c b/trunk/net/ipv4/fib_semantics.c index 2ead09543f68..51738000f3dc 100644 --- a/trunk/net/ipv4/fib_semantics.c +++ b/trunk/net/ipv4/fib_semantics.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -43,14 +44,12 @@ #include #include #include -#include -#include #include "fib_lookup.h" #define FSprintk(a...) -static DEFINE_SPINLOCK(fib_info_lock); +static DEFINE_RWLOCK(fib_info_lock); static struct hlist_head *fib_info_hash; static struct hlist_head *fib_info_laddrhash; static unsigned int fib_hash_size; @@ -160,7 +159,7 @@ void free_fib_info(struct fib_info *fi) void fib_release_info(struct fib_info *fi) { - spin_lock_bh(&fib_info_lock); + write_lock_bh(&fib_info_lock); if (fi && --fi->fib_treeref == 0) { hlist_del(&fi->fib_hash); if (fi->fib_prefsrc) @@ -173,7 +172,7 @@ void fib_release_info(struct fib_info *fi) fi->fib_dead = 1; fib_info_put(fi); } - spin_unlock_bh(&fib_info_lock); + write_unlock_bh(&fib_info_lock); } static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi) @@ -255,7 +254,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) struct fib_nh *nh; unsigned int hash; - spin_lock(&fib_info_lock); + read_lock(&fib_info_lock); hash = fib_devindex_hashfn(dev->ifindex); head = &fib_info_devhash[hash]; @@ -263,41 +262,41 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) if (nh->nh_dev == dev && nh->nh_gw == gw && !(nh->nh_flags&RTNH_F_DEAD)) { - spin_unlock(&fib_info_lock); + read_unlock(&fib_info_lock); return 0; } } - spin_unlock(&fib_info_lock); + read_unlock(&fib_info_lock); return -1; } void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int dst_len, u32 tb_id, struct nl_info *info) + int z, int tb_id, + struct nlmsghdr *n, struct netlink_skb_parms *req) { struct sk_buff *skb; - int payload = sizeof(struct rtmsg) + 256; - u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; - int err = -ENOBUFS; - - skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); - if (skb == NULL) - goto errout; - - err = fib_dump_info(skb, info->pid, seq, event, tb_id, - fa->fa_type, fa->fa_scope, key, dst_len, - fa->fa_tos, fa->fa_info, 0); - if (err < 0) { + u32 pid = req ? req->pid : n->nlmsg_pid; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return; + + if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, + fa->fa_type, fa->fa_scope, &key, z, + fa->fa_tos, + fa->fa_info, 0) < 0) { kfree_skb(skb); - goto errout; + return; } - - err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, - info->nlh, GFP_KERNEL); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE; + if (n->nlmsg_flags&NLM_F_ECHO) + atomic_inc(&skb->users); + netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL); + if (n->nlmsg_flags&NLM_F_ECHO) + netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); } /* Return the first fib alias matching TOS with @@ -343,100 +342,102 @@ int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) +static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) +{ + while (RTA_OK(attr,attrlen)) { + if (attr->rta_type == type) + return *(u32*)RTA_DATA(attr); + attr = RTA_NEXT(attr, attrlen); + } + return 0; +} + +static int +fib_count_nexthops(struct rtattr *rta) { int nhs = 0; + struct rtnexthop *nhp = RTA_DATA(rta); + int nhlen = RTA_PAYLOAD(rta); - while (rtnh_ok(rtnh, remaining)) { + while (nhlen >= (int)sizeof(struct rtnexthop)) { + if ((nhlen -= nhp->rtnh_len) < 0) + return 0; nhs++; - rtnh = rtnh_next(rtnh, &remaining); - } - - /* leftover implies invalid nexthop configuration, discard it */ - return remaining > 0 ? 0 : nhs; + nhp = RTNH_NEXT(nhp); + }; + return nhs; } -static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, - int remaining, struct fib_config *cfg) +static int +fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) { - change_nexthops(fi) { - int attrlen; + struct rtnexthop *nhp = RTA_DATA(rta); + int nhlen = RTA_PAYLOAD(rta); - if (!rtnh_ok(rtnh, remaining)) + change_nexthops(fi) { + int attrlen = nhlen - sizeof(struct rtnexthop); + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) return -EINVAL; - - nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; - nh->nh_oif = rtnh->rtnh_ifindex; - nh->nh_weight = rtnh->rtnh_hops + 1; - - attrlen = rtnh_attrlen(rtnh); - if (attrlen > 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); - - nla = nla_find(attrs, attrlen, RTA_GATEWAY); - nh->nh_gw = nla ? nla_get_u32(nla) : 0; + nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; + nh->nh_oif = nhp->rtnh_ifindex; + nh->nh_weight = nhp->rtnh_hops + 1; + if (attrlen) { + nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); #ifdef CONFIG_NET_CLS_ROUTE - nla = nla_find(attrs, attrlen, RTA_FLOW); - nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; + nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); #endif } - - rtnh = rtnh_next(rtnh, &remaining); + nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); - return 0; } #endif -int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) +int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, + struct fib_info *fi) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - struct rtnexthop *rtnh; - int remaining; + struct rtnexthop *nhp; + int nhlen; #endif - if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) + if (rta->rta_priority && + *rta->rta_priority != fi->fib_priority) return 1; - if (cfg->fc_oif || cfg->fc_gw) { - if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && - (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) + if (rta->rta_oif || rta->rta_gw) { + if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && + (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (cfg->fc_mp == NULL) + if (rta->rta_mp == NULL) return 0; - - rtnh = cfg->fc_mp; - remaining = cfg->fc_mp_len; + nhp = RTA_DATA(rta->rta_mp); + nhlen = RTA_PAYLOAD(rta->rta_mp); for_nexthops(fi) { - int attrlen; + int attrlen = nhlen - sizeof(struct rtnexthop); + u32 gw; - if (!rtnh_ok(rtnh, remaining)) + if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) return -EINVAL; - - if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) + if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) return 1; - - attrlen = rtnh_attrlen(rtnh); - if (attrlen < 0) { - struct nlattr *nla, *attrs = rtnh_attrs(rtnh); - - nla = nla_find(attrs, attrlen, RTA_GATEWAY); - if (nla && nla_get_u32(nla) != nh->nh_gw) + if (attrlen) { + gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + if (gw && gw != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE - nla = nla_find(attrs, attrlen, RTA_FLOW); - if (nla && nla_get_u32(nla) != nh->nh_tclassid) + gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + if (gw && gw != nh->nh_tclassid) return 1; #endif } - - rtnh = rtnh_next(rtnh, &remaining); + nhp = RTNH_NEXT(nhp); } endfor_nexthops(fi); #endif return 0; @@ -487,8 +488,7 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) |-> {local prefix} (terminal node) */ -static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh) +static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) { int err; @@ -502,7 +502,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; - if (cfg->fc_scope >= RT_SCOPE_LINK) + if (r->rtm_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; @@ -516,15 +516,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, return 0; } { - struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = nh->nh_gw, - .scope = cfg->fc_scope + 1, - }, - }, - .oif = nh->nh_oif, - }; + struct flowi fl = { .nl_u = { .ip4_u = + { .daddr = nh->nh_gw, + .scope = r->rtm_scope + 1 } }, + .oif = nh->nh_oif }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) @@ -603,7 +598,7 @@ static void fib_hash_move(struct hlist_head *new_info_hash, unsigned int old_size = fib_hash_size; unsigned int i, bytes; - spin_lock_bh(&fib_info_lock); + write_lock_bh(&fib_info_lock); old_info_hash = fib_info_hash; old_laddrhash = fib_info_laddrhash; fib_hash_size = new_size; @@ -644,35 +639,46 @@ static void fib_hash_move(struct hlist_head *new_info_hash, } fib_info_laddrhash = new_laddrhash; - spin_unlock_bh(&fib_info_lock); + write_unlock_bh(&fib_info_lock); bytes = old_size * sizeof(struct hlist_head *); fib_hash_free(old_info_hash, bytes); fib_hash_free(old_laddrhash, bytes); } -struct fib_info *fib_create_info(struct fib_config *cfg) +struct fib_info * +fib_create_info(const struct rtmsg *r, struct kern_rta *rta, + const struct nlmsghdr *nlh, int *errp) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; +#ifdef CONFIG_IP_ROUTE_MULTIPATH int nhs = 1; +#else + const int nhs = 1; +#endif +#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED + u32 mp_alg = IP_MP_ALG_NONE; +#endif /* Fast check to catch the most weird cases */ - if (fib_props[cfg->fc_type].scope > cfg->fc_scope) + if (fib_props[r->rtm_type].scope > r->rtm_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (cfg->fc_mp) { - nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); + if (rta->rta_mp) { + nhs = fib_count_nexthops(rta->rta_mp); if (nhs == 0) goto err_inval; } #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (cfg->fc_mp_alg) { - if (cfg->fc_mp_alg < IP_MP_ALG_NONE || - cfg->fc_mp_alg > IP_MP_ALG_MAX) + if (rta->rta_mp_alg) { + mp_alg = *rta->rta_mp_alg; + + if (mp_alg < IP_MP_ALG_NONE || + mp_alg > IP_MP_ALG_MAX) goto err_inval; } #endif @@ -708,42 +714,43 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; fib_info_cnt++; - fi->fib_protocol = cfg->fc_protocol; - fi->fib_flags = cfg->fc_flags; - fi->fib_priority = cfg->fc_priority; - fi->fib_prefsrc = cfg->fc_prefsrc; + fi->fib_protocol = r->rtm_protocol; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; - - if (type) { - if (type > RTAX_MAX) + fi->fib_flags = r->rtm_flags; + if (rta->rta_priority) + fi->fib_priority = *rta->rta_priority; + if (rta->rta_mx) { + int attrlen = RTA_PAYLOAD(rta->rta_mx); + struct rtattr *attr = RTA_DATA(rta->rta_mx); + + while (RTA_OK(attr, attrlen)) { + unsigned flavor = attr->rta_type; + if (flavor) { + if (flavor > RTAX_MAX) goto err_inval; - fi->fib_metrics[type - 1] = nla_get_u32(nla); + fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); } + attr = RTA_NEXT(attr, attrlen); } } + if (rta->rta_prefsrc) + memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); - if (cfg->fc_mp) { + if (rta->rta_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); - if (err != 0) + if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) goto failure; - if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) + if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) goto err_inval; - if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) + if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE - if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) + if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) goto err_inval; #endif #else @@ -751,32 +758,34 @@ struct fib_info *fib_create_info(struct fib_config *cfg) #endif } else { struct fib_nh *nh = fi->fib_nh; - - nh->nh_oif = cfg->fc_oif; - nh->nh_gw = cfg->fc_gw; - nh->nh_flags = cfg->fc_flags; + if (rta->rta_oif) + nh->nh_oif = *rta->rta_oif; + if (rta->rta_gw) + memcpy(&nh->nh_gw, rta->rta_gw, 4); #ifdef CONFIG_NET_CLS_ROUTE - nh->nh_tclassid = cfg->fc_flow; + if (rta->rta_flow) + memcpy(&nh->nh_tclassid, rta->rta_flow, 4); #endif + nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = cfg->fc_mp_alg; + fi->fib_mp_alg = mp_alg; #endif - if (fib_props[cfg->fc_type].error) { - if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) + if (fib_props[r->rtm_type].error) { + if (rta->rta_gw || rta->rta_oif || rta->rta_mp) goto err_inval; goto link_it; } - if (cfg->fc_scope > RT_SCOPE_HOST) + if (r->rtm_scope > RT_SCOPE_HOST) goto err_inval; - if (cfg->fc_scope == RT_SCOPE_HOST) { + if (r->rtm_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -789,14 +798,14 @@ struct fib_info *fib_create_info(struct fib_config *cfg) goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(cfg, fi, nh)) != 0) + if ((err = fib_check_nh(r, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { - if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || - fi->fib_prefsrc != cfg->fc_dst) + if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || + memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -811,7 +820,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg) fi->fib_treeref++; atomic_inc(&fi->fib_clntref); - spin_lock_bh(&fib_info_lock); + write_lock_bh(&fib_info_lock); hlist_add_head(&fi->fib_hash, &fib_info_hash[fib_info_hashfn(fi)]); if (fi->fib_prefsrc) { @@ -830,19 +839,19 @@ struct fib_info *fib_create_info(struct fib_config *cfg) head = &fib_info_devhash[hash]; hlist_add_head(&nh->nh_hash, head); } endfor_nexthops(fi) - spin_unlock_bh(&fib_info_lock); + write_unlock_bh(&fib_info_lock); return fi; err_inval: err = -EINVAL; failure: + *errp = err; if (fi) { fi->fib_dead = 1; free_fib_info(fi); } - - return ERR_PTR(err); + return NULL; } /* Note! fib_semantic_match intentionally uses RCU list functions. */ @@ -928,89 +937,224 @@ u32 __fib_res_prefsrc(struct fib_result *res) return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos, - struct fib_info *fi, unsigned int flags) +int +fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, + u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + struct fib_info *fi, unsigned int flags) { - struct nlmsghdr *nlh; struct rtmsg *rtm; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); - if (nlh == NULL) - return -ENOBUFS; - - rtm = nlmsg_data(nlh); + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); + rtm = NLMSG_DATA(nlh); rtm->rtm_family = AF_INET; rtm->rtm_dst_len = dst_len; rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; - NLA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; - rtm->rtm_protocol = fi->fib_protocol; - if (rtm->rtm_dst_len) - NLA_PUT_U32(skb, RTA_DST, dst); - + RTA_PUT(skb, RTA_DST, 4, dst); + rtm->rtm_protocol = fi->fib_protocol; if (fi->fib_priority) - NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority); - + RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority); if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0) - goto nla_put_failure; - + goto rtattr_failure; if (fi->fib_prefsrc) - NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc); - + RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc); if (fi->fib_nhs == 1) { if (fi->fib_nh->nh_gw) - NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw); - + RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw); if (fi->fib_nh->nh_oif) - NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif); + RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif); #ifdef CONFIG_NET_CLS_ROUTE if (fi->fib_nh[0].nh_tclassid) - NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid); + RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid); #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (fi->fib_nhs > 1) { - struct rtnexthop *rtnh; - struct nlattr *mp; - - mp = nla_nest_start(skb, RTA_MULTIPATH); - if (mp == NULL) - goto nla_put_failure; + struct rtnexthop *nhp; + struct rtattr *mp_head; + if (skb_tailroom(skb) <= RTA_SPACE(0)) + goto rtattr_failure; + mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0)); for_nexthops(fi) { - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); - if (rtnh == NULL) - goto nla_put_failure; - - rtnh->rtnh_flags = nh->nh_flags & 0xFF; - rtnh->rtnh_hops = nh->nh_weight - 1; - rtnh->rtnh_ifindex = nh->nh_oif; - + if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4)) + goto rtattr_failure; + nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp))); + nhp->rtnh_flags = nh->nh_flags & 0xFF; + nhp->rtnh_hops = nh->nh_weight-1; + nhp->rtnh_ifindex = nh->nh_oif; if (nh->nh_gw) - NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw); + RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw); #ifdef CONFIG_NET_CLS_ROUTE if (nh->nh_tclassid) - NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid); + RTA_PUT(skb, RTA_FLOW, 4, &nh->nh_tclassid); #endif - /* length of rtnetlink header + attributes */ - rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh; + nhp->rtnh_len = skb->tail - (unsigned char*)nhp; } endfor_nexthops(fi); - - nla_nest_end(skb, mp); + mp_head->rta_type = RTA_MULTIPATH; + mp_head->rta_len = skb->tail - (u8*)mp_head; } #endif - return nlmsg_end(skb, nlh); + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +#ifndef CONFIG_IP_NOSIOCRT + +int +fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, + struct kern_rta *rta, struct rtentry *r) +{ + int plen; + u32 *ptr; + + memset(rtm, 0, sizeof(*rtm)); + memset(rta, 0, sizeof(*rta)); + + if (r->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* Check mask for validity: + a) it must be contiguous. + b) destination must have all host bits clear. + c) if application forgot to set correct family (AF_INET), + reject request unless it is absolutely clear i.e. + both family and mask are zero. + */ + plen = 32; + ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; + if (!(r->rt_flags&RTF_HOST)) { + u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; + if (r->rt_genmask.sa_family != AF_INET) { + if (mask || r->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + if (bad_mask(mask, *ptr)) + return -EINVAL; + plen = inet_mask_len(mask); + } + + nl->nlmsg_flags = NLM_F_REQUEST; + nl->nlmsg_pid = 0; + nl->nlmsg_seq = 0; + nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); + if (cmd == SIOCDELRT) { + nl->nlmsg_type = RTM_DELROUTE; + nl->nlmsg_flags = 0; + } else { + nl->nlmsg_type = RTM_NEWROUTE; + nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; + rtm->rtm_protocol = RTPROT_BOOT; + } -nla_put_failure: - return nlmsg_cancel(skb, nlh); + rtm->rtm_dst_len = plen; + rta->rta_dst = ptr; + + if (r->rt_metric) { + *(u32*)&r->rt_pad3 = r->rt_metric - 1; + rta->rta_priority = (u32*)&r->rt_pad3; + } + if (r->rt_flags&RTF_REJECT) { + rtm->rtm_scope = RT_SCOPE_HOST; + rtm->rtm_type = RTN_UNREACHABLE; + return 0; + } + rtm->rtm_scope = RT_SCOPE_NOWHERE; + rtm->rtm_type = RTN_UNICAST; + + if (r->rt_dev) { + char *colon; + struct net_device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + devname[IFNAMSIZ-1] = 0; + colon = strchr(devname, ':'); + if (colon) + *colon = 0; + dev = __dev_get_by_name(devname); + if (!dev) + return -ENODEV; + rta->rta_oif = &dev->ifindex; + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + rta->rta_prefsrc = &ifa->ifa_local; + } + } + + ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; + if (r->rt_gateway.sa_family == AF_INET && *ptr) { + rta->rta_gw = ptr; + if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) + rtm->rtm_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) + return -EINVAL; + + if (rtm->rtm_scope == RT_SCOPE_NOWHERE) + rtm->rtm_scope = RT_SCOPE_LINK; + + if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { + struct rtattr *rec; + struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + rta->rta_mx = mx; + mx->rta_type = RTA_METRICS; + mx->rta_len = RTA_LENGTH(0); + if (r->rt_flags&RTF_MTU) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_ADVMSS; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; + } + if (r->rt_flags&RTF_WINDOW) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_WINDOW; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_window; + } + if (r->rt_flags&RTF_IRTT) { + rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); + rec->rta_type = RTAX_RTT; + rec->rta_len = RTA_LENGTH(4); + mx->rta_len += RTA_LENGTH(4); + *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; + } + } + return 0; } +#endif + /* Update FIB if: - local address disappeared -> we must delete all the entries diff --git a/trunk/net/ipv4/fib_trie.c b/trunk/net/ipv4/fib_trie.c index 9c3ff6ba6e21..01801c0f885d 100644 --- a/trunk/net/ipv4/fib_trie.c +++ b/trunk/net/ipv4/fib_trie.c @@ -1124,14 +1124,17 @@ fib_insert_node(struct trie *t, int *err, u32 key, int plen) return fa_head; } -static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) +static int +fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; struct list_head *fa_head = NULL; struct fib_info *fi; - int plen = cfg->fc_dst_len; - u8 tos = cfg->fc_tos; + int plen = r->rtm_dst_len; + int type = r->rtm_type; + u8 tos = r->rtm_tos; u32 key, mask; int err; struct leaf *l; @@ -1139,9 +1142,13 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) if (plen > 32) return -EINVAL; - key = ntohl(cfg->fc_dst); + key = 0; + if (rta->rta_dst) + memcpy(&key, rta->rta_dst, 4); + + key = ntohl(key); - pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1150,11 +1157,10 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) key = key & mask; - fi = fib_create_info(cfg); - if (IS_ERR(fi)) { - err = PTR_ERR(fi); + fi = fib_create_info(r, rta, nlhdr, &err); + + if (!fi) goto err; - } l = fib_find_node(t, key); fa = NULL; @@ -1179,10 +1185,10 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) struct fib_alias *fa_orig; err = -EEXIST; - if (cfg->fc_nlflags & NLM_F_EXCL) + if (nlhdr->nlmsg_flags & NLM_F_EXCL) goto out; - if (cfg->fc_nlflags & NLM_F_REPLACE) { + if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; @@ -1194,8 +1200,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) fi_drop = fa->fa_info; new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; + new_fa->fa_type = type; + new_fa->fa_scope = r->rtm_scope; state = fa->fa_state; new_fa->fa_state &= ~FA_S_ACCESSED; @@ -1218,17 +1224,17 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == cfg->fc_type && - fa->fa_scope == cfg->fc_scope && + if (fa->fa_type == type && + fa->fa_scope == r->rtm_scope && fa->fa_info == fi) { goto out; } } - if (!(cfg->fc_nlflags & NLM_F_APPEND)) + if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(cfg->fc_nlflags & NLM_F_CREATE)) + if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1238,8 +1244,8 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = cfg->fc_type; - new_fa->fa_scope = cfg->fc_scope; + new_fa->fa_type = type; + new_fa->fa_scope = r->rtm_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1256,8 +1262,7 @@ static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, - &cfg->fc_nlinfo); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); succeeded: return 0; @@ -1543,21 +1548,28 @@ static int trie_leaf_remove(struct trie *t, t_key key) return 1; } -static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) +static int +fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, + struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; - int plen = cfg->fc_dst_len; - u8 tos = cfg->fc_tos; + int plen = r->rtm_dst_len; + u8 tos = r->rtm_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; + if (plen > 32) return -EINVAL; - key = ntohl(cfg->fc_dst); + key = 0; + if (rta->rta_dst) + memcpy(&key, rta->rta_dst, 4); + + key = ntohl(key); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) @@ -1586,12 +1598,13 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) if (fa->fa_tos != tos) break; - if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && - (cfg->fc_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == cfg->fc_scope) && - (!cfg->fc_protocol || - fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi) == 0) { + if ((!r->rtm_type || + fa->fa_type == r->rtm_type) && + (r->rtm_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == r->rtm_scope) && + (!r->rtm_protocol || + fi->fib_protocol == r->rtm_protocol) && + fib_nh_match(r, nlhdr, rta, fi) == 0) { fa_to_delete = fa; break; } @@ -1601,8 +1614,7 @@ static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) return -ESRCH; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, - &cfg->fc_nlinfo); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); l = fib_find_node(t, key); li = find_leaf_info(l, plen); @@ -1836,7 +1848,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi u32 xkey = htonl(key); - s_i = cb->args[4]; + s_i = cb->args[3]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1854,16 +1866,16 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi tb->tb_id, fa->fa_type, fa->fa_scope, - xkey, + &xkey, plen, fa->fa_tos, fa->fa_info, 0) < 0) { - cb->args[4] = i; + cb->args[3] = i; return -1; } i++; } - cb->args[4] = i; + cb->args[3] = i; return skb->len; } @@ -1874,14 +1886,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str struct list_head *fa_head; struct leaf *l = NULL; - s_h = cb->args[3]; + s_h = cb->args[2]; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[4], 0, - sizeof(cb->args) - 4*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); @@ -1892,11 +1904,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[3] = h; + cb->args[2] = h; return -1; } } - cb->args[3] = h; + cb->args[2] = h; return skb->len; } @@ -1905,23 +1917,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin int m, s_m; struct trie *t = (struct trie *) tb->tb_data; - s_m = cb->args[2]; + s_m = cb->args[1]; rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[2], 0, + sizeof(cb->args) - 2*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { - cb->args[2] = m; + cb->args[1] = m; goto out; } } rcu_read_unlock(); - cb->args[2] = m; + cb->args[1] = m; return skb->len; out: rcu_read_unlock(); @@ -1931,9 +1943,9 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin /* Fix more generic FIB names for init later */ #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(u32 id) +struct fib_table * fib_hash_init(int id) #else -struct fib_table * __init fib_hash_init(u32 id) +struct fib_table * __init fib_hash_init(int id) #endif { struct fib_table *tb; diff --git a/trunk/net/ipv4/icmp.c b/trunk/net/ipv4/icmp.c index c2ad07e48ab4..4c86ac3d882d 100644 --- a/trunk/net/ipv4/icmp.c +++ b/trunk/net/ipv4/icmp.c @@ -187,11 +187,11 @@ struct icmp_err icmp_err_convert[] = { }; /* Control parameters for ECHO replies. */ -int sysctl_icmp_echo_ignore_all __read_mostly; -int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1; +int sysctl_icmp_echo_ignore_all; +int sysctl_icmp_echo_ignore_broadcasts = 1; /* Control parameter - ignore bogus broadcast responses? */ -int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; +int sysctl_icmp_ignore_bogus_error_responses = 1; /* * Configurable global rate limit. @@ -205,9 +205,9 @@ int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1; * time exceeded (11), parameter problem (12) */ -int sysctl_icmp_ratelimit __read_mostly = 1 * HZ; -int sysctl_icmp_ratemask __read_mostly = 0x1818; -int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly; +int sysctl_icmp_ratelimit = 1 * HZ; +int sysctl_icmp_ratemask = 0x1818; +int sysctl_icmp_errors_use_inbound_ifaddr; /* * ICMP control array. This specifies what to do with each ICMP. @@ -406,7 +406,6 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; - security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -561,7 +560,6 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; - security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -930,7 +928,7 @@ int icmp_rcv(struct sk_buff *skb) ICMP_INC_STATS_BH(ICMP_MIB_INMSGS); switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ diff --git a/trunk/net/ipv4/igmp.c b/trunk/net/ipv4/igmp.c index 58be8227b0cb..8e8117c19e4d 100644 --- a/trunk/net/ipv4/igmp.c +++ b/trunk/net/ipv4/igmp.c @@ -931,7 +931,7 @@ int igmp_rcv(struct sk_buff *skb) goto drop; switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (!(u16)csum_fold(skb->csum)) break; /* fall through */ @@ -1397,8 +1397,8 @@ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr) /* * Join a socket to a group */ -int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS; -int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF; +int sysctl_igmp_max_memberships = IP_MAX_MEMBERSHIPS; +int sysctl_igmp_max_msf = IP_MAX_MSF; static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode, diff --git a/trunk/net/ipv4/inet_connection_sock.c b/trunk/net/ipv4/inet_connection_sock.c index 07204391d083..e50a1bfd7ccc 100644 --- a/trunk/net/ipv4/inet_connection_sock.c +++ b/trunk/net/ipv4/inet_connection_sock.c @@ -327,7 +327,6 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; - security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -510,8 +509,6 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); - - security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/trunk/net/ipv4/inet_hashtables.c b/trunk/net/ipv4/inet_hashtables.c index fb296c9a7f3f..95fac5532994 100644 --- a/trunk/net/ipv4/inet_hashtables.c +++ b/trunk/net/ipv4/inet_hashtables.c @@ -124,10 +124,8 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif) +struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, + const unsigned short hnum, const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -161,33 +159,6 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, return result; } -/* Optimize the common listener case. */ -struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, const unsigned short hnum, - const int dif) -{ - struct sock *sk = NULL; - const struct hlist_head *head; - - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) - goto sherry_cache; - sk = inet_lookup_listener_slow(head, daddr, hnum, dif); - } - if (sk) { -sherry_cache: - sock_hold(sk); - } - read_unlock(&hashinfo->lhash_lock); - return sk; -} EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* called with local bh disabled */ diff --git a/trunk/net/ipv4/inetpeer.c b/trunk/net/ipv4/inetpeer.c index a675602ef295..03ff62ebcfeb 100644 --- a/trunk/net/ipv4/inetpeer.c +++ b/trunk/net/ipv4/inetpeer.c @@ -126,9 +126,12 @@ void __init inet_initpeers(void) peer_cachep = kmem_cache_create("inet_peer_cache", sizeof(struct inet_peer), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!peer_cachep) + panic("cannot create inet_peer_cache"); + /* All the timers, started at system startup tend to synchronize. Perturb it a bit. */ diff --git a/trunk/net/ipv4/ip_fragment.c b/trunk/net/ipv4/ip_fragment.c index 165d72859ddf..b84b53a47526 100644 --- a/trunk/net/ipv4/ip_fragment.c +++ b/trunk/net/ipv4/ip_fragment.c @@ -54,15 +54,15 @@ * even the most extreme cases without allowing an attacker to measurably * harm machine performance. */ -int sysctl_ipfrag_high_thresh __read_mostly = 256*1024; -int sysctl_ipfrag_low_thresh __read_mostly = 192*1024; +int sysctl_ipfrag_high_thresh = 256*1024; +int sysctl_ipfrag_low_thresh = 192*1024; -int sysctl_ipfrag_max_dist __read_mostly = 64; +int sysctl_ipfrag_max_dist = 64; /* Important NOTE! Fragment queue must be destroyed before MSL expires. * RFC791 is wrong proposing to prolongate timer each fragment arrival by TTL. */ -int sysctl_ipfrag_time __read_mostly = IP_FRAG_TIME; +int sysctl_ipfrag_time = IP_FRAG_TIME; struct ipfrag_skb_cb { @@ -130,7 +130,7 @@ static unsigned int ipqhashfn(u16 id, u32 saddr, u32 daddr, u8 prot) } static struct timer_list ipfrag_secret_timer; -int sysctl_ipfrag_secret_interval __read_mostly = 10 * 60 * HZ; +int sysctl_ipfrag_secret_interval = 10 * 60 * HZ; static void ipfrag_secret_rebuild(unsigned long dummy) { @@ -665,7 +665,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) + else if (head->ip_summed == CHECKSUM_HW) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip_frag_mem); diff --git a/trunk/net/ipv4/ip_gre.c b/trunk/net/ipv4/ip_gre.c index f5fba051df3d..0f9b3a31997b 100644 --- a/trunk/net/ipv4/ip_gre.c +++ b/trunk/net/ipv4/ip_gre.c @@ -393,8 +393,7 @@ static void ipgre_err(struct sk_buff *skb, u32 info) int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; + int rel_info = 0; u16 flags; int grehlen = (iph->ihl<<2) + 4; struct sk_buff *skb2; @@ -423,16 +422,14 @@ static void ipgre_err(struct sk_buff *skb, u32 info) default: return; case ICMP_PARAMETERPROB: - n = ntohl(skb->h.icmph->un.gateway) >> 24; - if (n < (iph->ihl<<2)) + if (skb->h.icmph->un.gateway < (iph->ihl<<2)) return; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - n -= grehlen; - rel_info = htonl(n << 24); + rel_info = skb->h.icmph->un.gateway - grehlen; break; case ICMP_DEST_UNREACH: @@ -443,14 +440,13 @@ static void ipgre_err(struct sk_buff *skb, u32 info) return; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - n = ntohs(skb->h.icmph->un.frag.mtu); - if (n < grehlen+68) + rel_info = ntohs(skb->h.icmph->un.frag.mtu); + if (rel_info < grehlen+68) return; - n -= grehlen; + rel_info -= grehlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) + if (rel_info > ntohs(eiph->tot_len)) return; - rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -512,11 +508,12 @@ static void ipgre_err(struct sk_buff *skb, u32 info) /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { + if (rel_info > dst_mtu(skb2->dst)) { kfree_skb(skb2); return; } - skb2->dst->ops->update_pmtu(skb2->dst, n); + skb2->dst->ops->update_pmtu(skb2->dst, rel_info); + rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { @@ -579,7 +576,7 @@ static int ipgre_rcv(struct sk_buff *skb) if (flags&GRE_CSUM) { switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: csum = (u16)csum_fold(skb->csum); if (!csum) break; @@ -587,7 +584,7 @@ static int ipgre_rcv(struct sk_buff *skb) case CHECKSUM_NONE: skb->csum = 0; csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; + skb->ip_summed = CHECKSUM_HW; } offset += 4; } diff --git a/trunk/net/ipv4/ip_options.c b/trunk/net/ipv4/ip_options.c index e7437c091326..406056edc02b 100644 --- a/trunk/net/ipv4/ip_options.c +++ b/trunk/net/ipv4/ip_options.c @@ -24,7 +24,6 @@ #include #include #include -#include /* * Write options to IP header, record destination address to @@ -195,13 +194,6 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } - if (sopt->cipso) { - optlen = sptr[sopt->cipso+1]; - dopt->cipso = dopt->optlen+sizeof(struct iphdr); - memcpy(dptr, sptr+sopt->cipso, optlen); - dptr += optlen; - dopt->optlen += optlen; - } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -442,17 +434,6 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; - case IPOPT_CIPSO: - if (opt->cipso) { - pp_ptr = optptr; - goto error; - } - opt->cipso = optptr - iph; - if (cipso_v4_validate(&optptr)) { - pp_ptr = optptr; - goto error; - } - break; case IPOPT_SEC: case IPOPT_SID: default: @@ -525,6 +506,7 @@ static int ip_options_get_finish(struct ip_options **optp, opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; opt->is_data = 1; + opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { kfree(opt); return -EINVAL; diff --git a/trunk/net/ipv4/ip_output.c b/trunk/net/ipv4/ip_output.c index 97aee76fb746..a2ede167e045 100644 --- a/trunk/net/ipv4/ip_output.c +++ b/trunk/net/ipv4/ip_output.c @@ -83,7 +83,7 @@ #include #include -int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; +int sysctl_ip_default_ttl = IPDEFTTL; /* Generate a checksum for an outgoing IP datagram. */ __inline__ void ip_send_check(struct iphdr *iph) @@ -328,7 +328,6 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ - security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -426,7 +425,7 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) int ptr; struct net_device *dev; struct sk_buff *skb2; - unsigned int mtu, hlen, left, len, ll_rs, pad; + unsigned int mtu, hlen, left, len, ll_rs; int offset; __be16 not_last_frag; struct rtable *rt = (struct rtable*)skb->dst; @@ -556,13 +555,14 @@ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*)) left = skb->len - hlen; /* Space per frame */ ptr = raw + hlen; /* Where to start from */ +#ifdef CONFIG_BRIDGE_NETFILTER /* for bridged IP traffic encapsulated inside f.e. a vlan header, - * we need to make room for the encapsulating header - */ - pad = nf_bridge_pad(skb); - ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, pad); - mtu -= pad; - + * we need to make room for the encapsulating header */ + ll_rs = LL_RESERVED_SPACE_EXTRA(rt->u.dst.dev, nf_bridge_pad(skb)); + mtu -= nf_bridge_pad(skb); +#else + ll_rs = LL_RESERVED_SPACE(rt->u.dst.dev); +#endif /* * Fragment the datagram. */ @@ -679,7 +679,7 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk { struct iovec *iov = from; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { if (memcpy_fromiovecend(to, iov, offset, len) < 0) return -EFAULT; } else { @@ -735,7 +735,7 @@ static inline int ip_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -843,7 +843,7 @@ int ip_append_data(struct sock *sk, length + fragheaderlen <= mtu && rt->u.dst.dev->features & NETIF_F_ALL_CSUM && !exthdrlen) - csummode = CHECKSUM_PARTIAL; + csummode = CHECKSUM_HW; inet->cork.length += length; if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && @@ -1366,7 +1366,6 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; - security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/trunk/net/ipv4/ipcomp.c b/trunk/net/ipv4/ipcomp.c index 17342430a843..5bb9c9f03fb6 100644 --- a/trunk/net/ipv4/ipcomp.c +++ b/trunk/net/ipv4/ipcomp.c @@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) ip_send_check(iph); return 0; } @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = XFRM_MODE_TUNNEL; + t->props.mode = 1; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/trunk/net/ipv4/ipconfig.c b/trunk/net/ipv4/ipconfig.c index 1fbb38415b19..cb8a92f18ef6 100644 --- a/trunk/net/ipv4/ipconfig.c +++ b/trunk/net/ipv4/ipconfig.c @@ -31,6 +31,7 @@ * -- Josef Siemes , Aug 2002 */ +#include #include #include #include diff --git a/trunk/net/ipv4/ipip.c b/trunk/net/ipv4/ipip.c index 0c4556529228..76ab50b0d6ef 100644 --- a/trunk/net/ipv4/ipip.c +++ b/trunk/net/ipv4/ipip.c @@ -341,8 +341,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) int code = skb->h.icmph->code; int rel_type = 0; int rel_code = 0; - __be32 rel_info = 0; - __u32 n = 0; + int rel_info = 0; struct sk_buff *skb2; struct flowi fl; struct rtable *rt; @@ -355,15 +354,14 @@ static int ipip_err(struct sk_buff *skb, u32 info) default: return 0; case ICMP_PARAMETERPROB: - n = ntohl(skb->h.icmph->un.gateway) >> 24; - if (n < hlen) + if (skb->h.icmph->un.gateway < hlen) return 0; /* So... This guy found something strange INSIDE encapsulated packet. Well, he is fool, but what can we do ? */ rel_type = ICMP_PARAMETERPROB; - rel_info = htonl((n - hlen) << 24); + rel_info = skb->h.icmph->un.gateway - hlen; break; case ICMP_DEST_UNREACH: @@ -374,14 +372,13 @@ static int ipip_err(struct sk_buff *skb, u32 info) return 0; case ICMP_FRAG_NEEDED: /* And it is the only really necessary thing :-) */ - n = ntohs(skb->h.icmph->un.frag.mtu); - if (n < hlen+68) + rel_info = ntohs(skb->h.icmph->un.frag.mtu); + if (rel_info < hlen+68) return 0; - n -= hlen; + rel_info -= hlen; /* BSD 4.2 MORE DOES NOT EXIST IN NATURE. */ - if (n > ntohs(eiph->tot_len)) + if (rel_info > ntohs(eiph->tot_len)) return 0; - rel_info = htonl(n); break; default: /* All others are translated to HOST_UNREACH. @@ -443,11 +440,12 @@ static int ipip_err(struct sk_buff *skb, u32 info) /* change mtu on this route */ if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - if (n > dst_mtu(skb2->dst)) { + if (rel_info > dst_mtu(skb2->dst)) { kfree_skb(skb2); return 0; } - skb2->dst->ops->update_pmtu(skb2->dst, n); + skb2->dst->ops->update_pmtu(skb2->dst, rel_info); + rel_info = htonl(rel_info); } else if (type == ICMP_TIME_EXCEEDED) { struct ip_tunnel *t = netdev_priv(skb2->dev); if (t->parms.iph.ttl) { diff --git a/trunk/net/ipv4/ipmr.c b/trunk/net/ipv4/ipmr.c index ba49588da242..85893eef6b16 100644 --- a/trunk/net/ipv4/ipmr.c +++ b/trunk/net/ipv4/ipmr.c @@ -312,8 +312,7 @@ static void ipmr_destroy_unres(struct mfc_cache *c) e = NLMSG_DATA(nlh); e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - - rtnl_unicast(skb, NETLINK_CB(skb).pid); + netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); } else kfree_skb(skb); } @@ -513,6 +512,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) while((skb=__skb_dequeue(&uc->mfc_un.unres.unresolved))) { if (skb->nh.iph->version == 0) { + int err; struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr)); if (ipmr_fill_mroute(skb, c, NLMSG_DATA(nlh)) > 0) { @@ -525,8 +525,7 @@ static void ipmr_cache_resolve(struct mfc_cache *uc, struct mfc_cache *c) e->error = -EMSGSIZE; memset(&e->msg, 0, sizeof(e->msg)); } - - rtnl_unicast(skb, NETLINK_CB(skb).pid); + err = netlink_unicast(rtnl, skb, NETLINK_CB(skb).dst_pid, MSG_DONTWAIT); } else ip_mr_forward(skb, c, 0); } @@ -1900,8 +1899,11 @@ void __init ip_mr_init(void) { mrt_cachep = kmem_cache_create("ip_mrt_cache", sizeof(struct mfc_cache), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!mrt_cachep) + panic("cannot allocate ip_mrt_cache"); + init_timer(&ipmr_expire_timer); ipmr_expire_timer.function=ipmr_expire_process; register_netdevice_notifier(&ip_mr_notifier); diff --git a/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c b/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c index 820e8318d10d..bc28b1160a3a 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/trunk/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -151,7 +151,7 @@ tcp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -204,7 +204,7 @@ tcp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(tcph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -229,7 +229,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) switch (skb->ip_summed) { case CHECKSUM_NONE: skb->csum = skb_checksum(skb, tcphoff, skb->len - tcphoff, 0); - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - tcphoff, skb->nh.iph->protocol, skb->csum)) { @@ -239,7 +239,7 @@ tcp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* No need to checksum. */ + /* CHECKSUM_UNNECESSARY */ break; } diff --git a/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c b/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c index 90c8166c0ec1..89d9175d8f28 100644 --- a/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/trunk/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -161,7 +161,7 @@ udp_snat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->daddr, cp->vaddr, cp->dport, cp->vport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -216,7 +216,7 @@ udp_dnat_handler(struct sk_buff **pskb, /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(udph, cp->vaddr, cp->daddr, cp->vport, cp->dport); - if ((*pskb)->ip_summed == CHECKSUM_COMPLETE) + if ((*pskb)->ip_summed == CHECKSUM_HW) (*pskb)->ip_summed = CHECKSUM_NONE; } else { /* full checksum calculation */ @@ -250,7 +250,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) case CHECKSUM_NONE: skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (csum_tcpudp_magic(skb->nh.iph->saddr, skb->nh.iph->daddr, skb->len - udphoff, @@ -262,7 +262,7 @@ udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) } break; default: - /* No need to checksum. */ + /* CHECKSUM_UNNECESSARY */ break; } } diff --git a/trunk/net/ipv4/netfilter.c b/trunk/net/ipv4/netfilter.c index f88347de21a9..6a9e34b794bc 100644 --- a/trunk/net/ipv4/netfilter.c +++ b/trunk/net/ipv4/netfilter.c @@ -168,7 +168,7 @@ unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (hook != NF_IP_PRE_ROUTING && hook != NF_IP_LOCAL_IN) break; if ((protocol == 0 && !(u16)csum_fold(skb->csum)) || diff --git a/trunk/net/ipv4/netfilter/Kconfig b/trunk/net/ipv4/netfilter/Kconfig index a55b8ff70ded..ef0b5aac5838 100644 --- a/trunk/net/ipv4/netfilter/Kconfig +++ b/trunk/net/ipv4/netfilter/Kconfig @@ -278,6 +278,17 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_DSCP + tristate "DSCP match support" + depends on IP_NF_IPTABLES + help + This option adds a `DSCP' match, which allows you to match against + the IPv4 header DSCP field (DSCP codepoint). + + The DSCP codepoint can have any value between 0x0 and 0x4f. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_MATCH_AH tristate "AH match support" depends on IP_NF_IPTABLES @@ -557,6 +568,17 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_TARGET_DSCP + tristate "DSCP target support" + depends on IP_NF_MANGLE + help + This option adds a `DSCP' match, which allows you to match against + the IPv4 header DSCP field (DSCP codepoint). + + The DSCP codepoint can have any value between 0x0 and 0x4f. + + To compile it as a module, choose M here. If unsure, say N. + config IP_NF_TARGET_TTL tristate 'TTL target support' depends on IP_NF_MANGLE diff --git a/trunk/net/ipv4/netfilter/Makefile b/trunk/net/ipv4/netfilter/Makefile index 09aaed1a8063..3ded4a3af59c 100644 --- a/trunk/net/ipv4/netfilter/Makefile +++ b/trunk/net/ipv4/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o +obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o @@ -67,6 +68,7 @@ obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o +obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o diff --git a/trunk/net/ipv4/netfilter/arp_tables.c b/trunk/net/ipv4/netfilter/arp_tables.c index 85f0d73ebfb4..8d1d7a6e72a5 100644 --- a/trunk/net/ipv4/netfilter/arp_tables.c +++ b/trunk/net/ipv4/netfilter/arp_tables.c @@ -56,6 +56,8 @@ do { \ #define ARP_NF_ASSERT(x) #endif +#include + static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, char *hdr_addr, int len) { @@ -206,7 +208,8 @@ static unsigned int arpt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { if (net_ratelimit()) printk("arp_tables: error: '%s'\n", (char *)targinfo); @@ -223,7 +226,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table) + struct arpt_table *table, + void *userdata) { static const char nulldevname[IFNAMSIZ]; unsigned int verdict = NF_DROP; @@ -298,7 +302,8 @@ unsigned int arpt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data); + t->data, + userdata); /* Target might have changed stuff. */ arp = (*pskb)->nh.arph; @@ -485,10 +490,12 @@ static inline int check_entry(struct arpt_entry *e, const char *name, unsigned i if (t->u.kernel.target == &arpt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto err; + goto out; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, + t->u.target_size + - sizeof(*t), e->comefrom)) { duprintf("arp_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -555,7 +562,8 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) t = arpt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); + t->u.kernel.target->destroy(t->u.kernel.target, t->data, + t->u.target_size - sizeof(*t)); module_put(t->u.kernel.target->me); return 0; } diff --git a/trunk/net/ipv4/netfilter/arpt_mangle.c b/trunk/net/ipv4/netfilter/arpt_mangle.c index d12b1df252a1..a58325c1ceb9 100644 --- a/trunk/net/ipv4/netfilter/arpt_mangle.c +++ b/trunk/net/ipv4/netfilter/arpt_mangle.c @@ -11,7 +11,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { const struct arpt_mangle *mangle = targinfo; struct arphdr *arp; @@ -67,7 +67,7 @@ target(struct sk_buff **pskb, static int checkentry(const char *tablename, const void *e, const struct xt_target *target, - void *targinfo, unsigned int hook_mask) + void *targinfo, unsigned int targinfosize, unsigned int hook_mask) { const struct arpt_mangle *mangle = targinfo; diff --git a/trunk/net/ipv4/netfilter/arptable_filter.c b/trunk/net/ipv4/netfilter/arptable_filter.c index 7edea2a1696c..d7c472faa53b 100644 --- a/trunk/net/ipv4/netfilter/arptable_filter.c +++ b/trunk/net/ipv4/netfilter/arptable_filter.c @@ -155,7 +155,7 @@ static unsigned int arpt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return arpt_do_table(pskb, hook, in, out, &packet_filter); + return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); } static struct nf_hook_ops arpt_ops[] = { diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_core.c b/trunk/net/ipv4/netfilter/ip_conntrack_core.c index c432b3163609..aa459177c3f8 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_core.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_core.c @@ -47,6 +47,7 @@ #include #include #include +#include #define IP_CONNTRACK_VERSION "2.4" @@ -63,17 +64,17 @@ atomic_t ip_conntrack_count = ATOMIC_INIT(0); void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly; +struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size __read_mostly = 0; -int ip_conntrack_max __read_mostly; -struct list_head *ip_conntrack_hash __read_mostly; +unsigned int ip_conntrack_htable_size = 0; +int ip_conntrack_max; +struct list_head *ip_conntrack_hash; static kmem_cache_t *ip_conntrack_cachep __read_mostly; static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid __read_mostly; +unsigned int ip_ct_log_invalid; static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc __read_mostly; +static int ip_conntrack_vmalloc; static unsigned int ip_conntrack_next_id; static unsigned int ip_conntrack_expect_next_id; @@ -293,10 +294,15 @@ void ip_ct_remove_expectations(struct ip_conntrack *ct) static void clean_from_lists(struct ip_conntrack *ct) { + unsigned int ho, hr; + DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&ip_conntrack_lock); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); + + ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ ip_ct_remove_expectations(ct); @@ -307,7 +313,6 @@ destroy_conntrack(struct nf_conntrack *nfct) { struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; - struct ip_conntrack_helper *helper; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -316,10 +321,6 @@ destroy_conntrack(struct nf_conntrack *nfct) ip_conntrack_event(IPCT_DESTROY, ct); set_bit(IPS_DYING_BIT, &ct->status); - helper = ct->helper; - if (helper && helper->destroy) - helper->destroy(ct); - /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ @@ -366,6 +367,16 @@ static void death_by_timeout(unsigned long ul_conntrack) ip_conntrack_put(ct); } +static inline int +conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_tuple *tuple, + const struct ip_conntrack *ignored_conntrack) +{ + ASSERT_READ_LOCK(&ip_conntrack_lock); + return tuplehash_to_ctrack(i) != ignored_conntrack + && ip_ct_tuple_equal(tuple, &i->tuple); +} + struct ip_conntrack_tuple_hash * __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, const struct ip_conntrack *ignored_conntrack) @@ -375,8 +386,7 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple, ASSERT_READ_LOCK(&ip_conntrack_lock); list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (tuplehash_to_ctrack(h) != ignored_conntrack && - ip_ct_tuple_equal(tuple, &h->tuple)) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { CONNTRACK_STAT_INC(found); return h; } @@ -407,10 +417,10 @@ static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, unsigned int repl_hash) { ct->id = ++ip_conntrack_next_id; - list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, - &ip_conntrack_hash[hash]); - list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, - &ip_conntrack_hash[repl_hash]); + list_prepend(&ip_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&ip_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); } void ip_conntrack_hash_insert(struct ip_conntrack *ct) @@ -430,7 +440,6 @@ int __ip_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; - struct ip_conntrack_tuple_hash *h; struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -461,43 +470,43 @@ __ip_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - list_for_each_entry(h, &ip_conntrack_hash[hash], list) - if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) - goto out; - list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list) - if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) - goto out; - - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + if (!LIST_FIND(&ip_conntrack_hash[hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&ip_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct ip_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); + __ip_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + CONNTRACK_STAT_INC(insert); + write_unlock_bh(&ip_conntrack_lock); + if (ct->helper) + ip_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + ip_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); + ip_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; + return NF_ACCEPT; + } -out: CONNTRACK_STAT_INC(insert_failed); write_unlock_bh(&ip_conntrack_lock); + return NF_DROP; } @@ -518,21 +527,23 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct ip_conntrack_tuple_hash *i) +{ + return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); +} + static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL, *tmp; + struct ip_conntrack *ct = NULL; int dropped = 0; read_lock_bh(&ip_conntrack_lock); - list_for_each_entry_reverse(h, chain, list) { - tmp = tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { - ct = tmp; - atomic_inc(&ct->ct_general.use); - break; - } + h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); + if (h) { + ct = tuplehash_to_ctrack(h); + atomic_inc(&ct->ct_general.use); } read_unlock_bh(&ip_conntrack_lock); @@ -548,16 +559,18 @@ static int early_drop(struct list_head *chain) return dropped; } +static inline int helper_cmp(const struct ip_conntrack_helper *i, + const struct ip_conntrack_tuple *rtuple) +{ + return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + static struct ip_conntrack_helper * __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) { - struct ip_conntrack_helper *h; - - list_for_each_entry(h, &helpers, list) { - if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) - return h; - } - return NULL; + return LIST_FIND(&helpers, helper_cmp, + struct ip_conntrack_helper *, + tuple); } struct ip_conntrack_helper * @@ -627,15 +640,11 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, ip_conntrack_hash_rnd_initted = 1; } - /* We don't want any race condition at early drop stage */ - atomic_inc(&ip_conntrack_count); - if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) > ip_conntrack_max) { + && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&ip_conntrack_hash[hash])) { - atomic_dec(&ip_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "ip_conntrack: table full, dropping" @@ -647,7 +656,6 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); if (!conntrack) { DEBUGP("Can't allocate conntrack.\n"); - atomic_dec(&ip_conntrack_count); return ERR_PTR(-ENOMEM); } @@ -661,6 +669,8 @@ struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; + atomic_inc(&ip_conntrack_count); + return conntrack; } @@ -1052,7 +1062,7 @@ int ip_conntrack_helper_register(struct ip_conntrack_helper *me) { BUG_ON(me->timeout == 0); write_lock_bh(&ip_conntrack_lock); - list_add(&me->list, &helpers); + list_prepend(&helpers, me); write_unlock_bh(&ip_conntrack_lock); return 0; @@ -1071,24 +1081,24 @@ __ip_conntrack_helper_find_byname(const char *name) return NULL; } -static inline void unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) +static inline int unhelp(struct ip_conntrack_tuple_hash *i, + const struct ip_conntrack_helper *me) { if (tuplehash_to_ctrack(i)->helper == me) { ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); tuplehash_to_ctrack(i)->helper = NULL; } + return 0; } void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) { unsigned int i; - struct ip_conntrack_tuple_hash *h; struct ip_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&ip_conntrack_lock); - list_del(&me->list); + LIST_DELETE(&helpers, me); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { @@ -1098,12 +1108,10 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) } } /* Get rid of expecteds, set helpers to NULL. */ - list_for_each_entry(h, &unconfirmed, list) - unhelp(h, me); - for (i = 0; i < ip_conntrack_htable_size; i++) { - list_for_each_entry(h, &ip_conntrack_hash[i], list) - unhelp(h, me); - } + LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); + for (i = 0; i < ip_conntrack_htable_size; i++) + LIST_FIND_W(&ip_conntrack_hash[i], unhelp, + struct ip_conntrack_tuple_hash *, me); write_unlock_bh(&ip_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1229,43 +1237,46 @@ static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) nf_conntrack_get(nskb->nfct); } +static inline int +do_iter(const struct ip_conntrack_tuple_hash *i, + int (*iter)(struct ip_conntrack *i, void *data), + void *data) +{ + return iter(tuplehash_to_ctrack(i), data); +} + /* Bring out ya dead! */ -static struct ip_conntrack * +static struct ip_conntrack_tuple_hash * get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), void *data, unsigned int *bucket) { - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct; + struct ip_conntrack_tuple_hash *h = NULL; write_lock_bh(&ip_conntrack_lock); for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) { - ct = tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; - } - } - list_for_each_entry(h, &unconfirmed, list) { - ct = tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; + h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, + struct ip_conntrack_tuple_hash *, iter, data); + if (h) + break; } + if (!h) + h = LIST_FIND_W(&unconfirmed, do_iter, + struct ip_conntrack_tuple_hash *, iter, data); + if (h) + atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&ip_conntrack_lock); - return NULL; -found: - atomic_inc(&ct->ct_general.use); - write_unlock_bh(&ip_conntrack_lock); - return ct; + return h; } void ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) { - struct ip_conntrack *ct; + struct ip_conntrack_tuple_hash *h; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { + struct ip_conntrack *ct = tuplehash_to_ctrack(h); /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/trunk/net/ipv4/netfilter/ip_conntrack_helper_pptp.c index fb0aee691721..b020a33e65e9 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_helper_pptp.c @@ -20,11 +20,11 @@ * - We can only support one single call within each session * * TODO: - * - testing of incoming PPTP calls + * - testing of incoming PPTP calls * - * Changes: + * Changes: * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from + * - Call ip_conntrack_unexpect_related() from * pptp_destroy_siblings() to destroy expectations in case * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen * (Philip Craig ) @@ -80,7 +80,7 @@ int struct PptpControlHeader *ctlh, union pptp_ctrl_union *pptpReq); -void +int (*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply); @@ -141,7 +141,7 @@ static void pptp_expectfn(struct ip_conntrack *ct, invert_tuplepr(&inv_t, &exp->tuple); DEBUGP("trying to unexpect other dir: "); DUMP_TUPLE(&inv_t); - + exp_other = ip_conntrack_expect_find(&inv_t); if (exp_other) { /* delete other expectation. */ @@ -194,16 +194,15 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) { struct ip_conntrack_tuple t; - ip_ct_gre_keymap_destroy(ct); - /* Since ct->sibling_list has literally rusted away in 2.6.11, + /* Since ct->sibling_list has literally rusted away in 2.6.11, * we now need another way to find out about our sibling * contrack and expects... -HW */ /* try original (pns->pac) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = ct->help.ct_pptp_info.pns_call_id; - t.dst.u.gre.key = ct->help.ct_pptp_info.pac_call_id; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout original pns->pac ct/exp\n"); @@ -211,8 +210,8 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* try reply (pac->pns) tuple */ memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = ct->help.ct_pptp_info.pac_call_id; - t.dst.u.gre.key = ct->help.ct_pptp_info.pns_call_id; + t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); + t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); if (!destroy_sibling_or_exp(&t)) DEBUGP("failed to timeout reply pac->pns ct/exp\n"); @@ -220,63 +219,94 @@ static void pptp_destroy_siblings(struct ip_conntrack *ct) /* expect GRE connections (PNS->PAC and PAC->PNS direction) */ static inline int -exp_gre(struct ip_conntrack *ct, +exp_gre(struct ip_conntrack *master, + u_int32_t seq, __be16 callid, __be16 peer_callid) { + struct ip_conntrack_tuple inv_tuple; + struct ip_conntrack_tuple exp_tuples[] = { + /* tuple in original direction, PNS->PAC */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, + .u = { .gre = { .key = peer_callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, + .u = { .gre = { .key = callid } }, + .protonum = IPPROTO_GRE + }, + }, + /* tuple in reply direction, PAC->PNS */ + { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, + .u = { .gre = { .key = callid } } + }, + .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, + .u = { .gre = { .key = peer_callid } }, + .protonum = IPPROTO_GRE + }, + } + }; struct ip_conntrack_expect *exp_orig, *exp_reply; int ret = 1; - exp_orig = ip_conntrack_expect_alloc(ct); + exp_orig = ip_conntrack_expect_alloc(master); if (exp_orig == NULL) goto out; - exp_reply = ip_conntrack_expect_alloc(ct); + exp_reply = ip_conntrack_expect_alloc(master); if (exp_reply == NULL) goto out_put_orig; - /* original direction, PNS->PAC */ - exp_orig->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - exp_orig->tuple.src.u.gre.key = peer_callid; - exp_orig->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - exp_orig->tuple.dst.u.gre.key = callid; - exp_orig->tuple.dst.protonum = IPPROTO_GRE; + memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); exp_orig->mask.src.ip = 0xffffffff; exp_orig->mask.src.u.all = 0; + exp_orig->mask.dst.u.all = 0; exp_orig->mask.dst.u.gre.key = htons(0xffff); exp_orig->mask.dst.ip = 0xffffffff; exp_orig->mask.dst.protonum = 0xff; - - exp_orig->master = ct; + + exp_orig->master = master; exp_orig->expectfn = pptp_expectfn; exp_orig->flags = 0; /* both expectations are identical apart from tuple */ memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - - /* reply direction, PAC->PNS */ - exp_reply->tuple.src.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - exp_reply->tuple.src.u.gre.key = callid; - exp_reply->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - exp_reply->tuple.dst.u.gre.key = peer_callid; - exp_reply->tuple.dst.protonum = IPPROTO_GRE; + memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); if (ip_nat_pptp_hook_exp_gre) - ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - if (ip_conntrack_expect_related(exp_orig) != 0) - goto out_put_both; - if (ip_conntrack_expect_related(exp_reply) != 0) - goto out_unexpect_orig; - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(ct, &exp_orig->tuple, 0) != 0) - goto out_unexpect_both; - if (ip_ct_gre_keymap_add(ct, &exp_reply->tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(ct); - goto out_unexpect_both; + ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); + else { + + DEBUGP("calling expect_related PNS->PAC"); + DUMP_TUPLE(&exp_orig->tuple); + + if (ip_conntrack_expect_related(exp_orig) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_put_both; + } + + DEBUGP("calling expect_related PAC->PNS"); + DUMP_TUPLE(&exp_reply->tuple); + + if (ip_conntrack_expect_related(exp_reply) != 0) { + DEBUGP("cannot expect_related()\n"); + goto out_unexpect_orig; + } + + /* Add GRE keymap entries */ + if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { + DEBUGP("cannot keymap_add() exp\n"); + goto out_unexpect_both; + } + + invert_tuplepr(&inv_tuple, &exp_reply->tuple); + if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { + ip_ct_gre_keymap_destroy(master); + DEBUGP("cannot keymap_add() exp_inv\n"); + goto out_unexpect_both; + } + ret = 0; } - ret = 0; out_put_both: ip_conntrack_expect_put(exp_reply); @@ -292,36 +322,73 @@ exp_gre(struct ip_conntrack *ct, goto out_put_both; } -static inline int +static inline int pptp_inbound_pkt(struct sk_buff **pskb, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq, - unsigned int reqlen, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 cid = 0, pcid = 0; + __be16 *cid, *pcid; + u_int32_t seq; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) { + DEBUGP("error during skb_header_pointer\n"); + return NF_ACCEPT; + } msg = ntohs(ctlh->messageType); DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); switch (msg) { case PPTP_START_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.srep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* server confirms new control session */ - if (info->sstate < PPTP_SESSION_REQUESTED) - goto invalid; + if (info->sstate < PPTP_SESSION_REQUESTED) { + DEBUGP("%s without START_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } if (pptpReq->srep.resultCode == PPTP_START_OK) info->sstate = PPTP_SESSION_CONFIRMED; - else + else info->sstate = PPTP_SESSION_ERROR; break; case PPTP_STOP_SESSION_REPLY: + if (reqlen < sizeof(_pptpReq.strep)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* server confirms end of control session */ - if (info->sstate > PPTP_SESSION_STOPREQ) - goto invalid; + if (info->sstate > PPTP_SESSION_STOPREQ) { + DEBUGP("%s without STOP_SESS_REQUEST\n", + pptp_msg_name[msg]); + break; + } if (pptpReq->strep.resultCode == PPTP_STOP_OK) info->sstate = PPTP_SESSION_NONE; else @@ -329,64 +396,116 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.ocack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* server accepted call, we now expect GRE frames */ - if (info->sstate != PPTP_SESSION_CONFIRMED) - goto invalid; + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } if (info->cstate != PPTP_CALL_OUT_REQ && - info->cstate != PPTP_CALL_OUT_CONF) - goto invalid; - - cid = pptpReq->ocack.callID; - pcid = pptpReq->ocack.peersCallID; - if (info->pns_call_id != pcid) - goto invalid; - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); - - if (pptpReq->ocack.resultCode == PPTP_OUTCALL_CONNECT) { - info->cstate = PPTP_CALL_OUT_CONF; - info->pac_call_id = cid; - exp_gre(ct, cid, pcid); - } else + info->cstate != PPTP_CALL_OUT_CONF) { + DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); + break; + } + if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { info->cstate = PPTP_CALL_NONE; + break; + } + + cid = &pptpReq->ocack.callID; + pcid = &pptpReq->ocack.peersCallID; + + info->pac_call_id = ntohs(*cid); + + if (htons(info->pns_call_id) != *pcid) { + DEBUGP("%s for unknown callid %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + + DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], + ntohs(*cid), ntohs(*pcid)); + + info->cstate = PPTP_CALL_OUT_CONF; + + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); break; case PPTP_IN_CALL_REQUEST: - /* server tells us about incoming call request */ - if (info->sstate != PPTP_SESSION_CONFIRMED) - goto invalid; + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } - cid = pptpReq->icreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + /* server tells us about incoming call request */ + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + pcid = &pptpReq->icack.peersCallID; + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = cid; + info->pac_call_id = ntohs(*pcid); break; case PPTP_IN_CALL_CONNECT: + if (reqlen < sizeof(_pptpReq.iccon)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* server tells us about incoming call established */ - if (info->sstate != PPTP_SESSION_CONFIRMED) - goto invalid; - if (info->cstate != PPTP_CALL_IN_REP && - info->cstate != PPTP_CALL_IN_CONF) - goto invalid; + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", pptp_msg_name[msg]); + break; + } + if (info->cstate != PPTP_CALL_IN_REP + && info->cstate != PPTP_CALL_IN_CONF) { + DEBUGP("%s but never sent IN_CALL_REPLY\n", + pptp_msg_name[msg]); + break; + } - pcid = pptpReq->iccon.peersCallID; - cid = info->pac_call_id; + pcid = &pptpReq->iccon.peersCallID; + cid = &info->pac_call_id; - if (info->pns_call_id != pcid) - goto invalid; + if (info->pns_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown CallID %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(pcid)); + DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); info->cstate = PPTP_CALL_IN_CONF; /* we expect a GRE connection from PAC to PNS */ - exp_gre(ct, cid, pcid); + seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader) + + ((void *)pcid - (void *)pptpReq); + + if (exp_gre(ct, seq, *cid, *pcid) != 0) + printk("ip_conntrack_pptp: error during exp_gre\n"); + break; case PPTP_CALL_DISCONNECT_NOTIFY: + if (reqlen < sizeof(_pptpReq.disc)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* server confirms disconnect */ - cid = pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); + cid = &pptpReq->disc.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); info->cstate = PPTP_CALL_NONE; /* untrack this call id, unexpect GRE packets */ @@ -394,39 +513,54 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; case PPTP_WAN_ERROR_NOTIFY: + break; + case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - goto invalid; + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) + ? pptp_msg_name[msg]:pptp_msg_name[0], msg); + break; } + if (ip_nat_pptp_hook_inbound) return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, pptpReq); - return NF_ACCEPT; -invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; + } static inline int pptp_outbound_pkt(struct sk_buff **pskb, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq, - unsigned int reqlen, + struct tcphdr *tcph, + unsigned int nexthdr_off, + unsigned int datalen, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { + struct PptpControlHeader _ctlh, *ctlh; + unsigned int reqlen; + union pptp_ctrl_union _pptpReq, *pptpReq; struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; u_int16_t msg; - __be16 cid = 0, pcid = 0; + __be16 *cid, *pcid; + + ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); + if (!ctlh) + return NF_ACCEPT; + nexthdr_off += sizeof(_ctlh); + datalen -= sizeof(_ctlh); + + reqlen = datalen; + if (reqlen > sizeof(*pptpReq)) + reqlen = sizeof(*pptpReq); + pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); + if (!pptpReq) + return NF_ACCEPT; msg = ntohs(ctlh->messageType); DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); @@ -434,8 +568,10 @@ pptp_outbound_pkt(struct sk_buff **pskb, switch (msg) { case PPTP_START_SESSION_REQUEST: /* client requests for new control session */ - if (info->sstate != PPTP_SESSION_NONE) - goto invalid; + if (info->sstate != PPTP_SESSION_NONE) { + DEBUGP("%s but we already have one", + pptp_msg_name[msg]); + } info->sstate = PPTP_SESSION_REQUESTED; break; case PPTP_STOP_SESSION_REQUEST: @@ -444,115 +580,123 @@ pptp_outbound_pkt(struct sk_buff **pskb, break; case PPTP_OUT_CALL_REQUEST: + if (reqlen < sizeof(_pptpReq.ocreq)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + /* FIXME: break; */ + } + /* client initiating connection to server */ - if (info->sstate != PPTP_SESSION_CONFIRMED) - goto invalid; + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("%s but no session\n", + pptp_msg_name[msg]); + break; + } info->cstate = PPTP_CALL_OUT_REQ; /* track PNS call id */ - cid = pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(cid)); - info->pns_call_id = cid; + cid = &pptpReq->ocreq.callID; + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); + info->pns_call_id = ntohs(*cid); break; case PPTP_IN_CALL_REPLY: + if (reqlen < sizeof(_pptpReq.icack)) { + DEBUGP("%s: short packet\n", pptp_msg_name[msg]); + break; + } + /* client answers incoming call */ - if (info->cstate != PPTP_CALL_IN_REQ && - info->cstate != PPTP_CALL_IN_REP) - goto invalid; - - cid = pptpReq->icack.callID; - pcid = pptpReq->icack.peersCallID; - if (info->pac_call_id != pcid) - goto invalid; - DEBUGP("%s, CID=%X PCID=%X\n", pptp_msg_name[msg], - ntohs(cid), ntohs(pcid)); - - if (pptpReq->icack.resultCode == PPTP_INCALL_ACCEPT) { - /* part two of the three-way handshake */ - info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = cid; - } else + if (info->cstate != PPTP_CALL_IN_REQ + && info->cstate != PPTP_CALL_IN_REP) { + DEBUGP("%s without incall_req\n", + pptp_msg_name[msg]); + break; + } + if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { info->cstate = PPTP_CALL_NONE; + break; + } + pcid = &pptpReq->icack.peersCallID; + if (info->pac_call_id != ntohs(*pcid)) { + DEBUGP("%s for unknown call %u\n", + pptp_msg_name[msg], ntohs(*pcid)); + break; + } + DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); + /* part two of the three-way handshake */ + info->cstate = PPTP_CALL_IN_REP; + info->pns_call_id = ntohs(pptpReq->icack.callID); break; case PPTP_CALL_CLEAR_REQUEST: /* client requests hangup of call */ - if (info->sstate != PPTP_SESSION_CONFIRMED) - goto invalid; + if (info->sstate != PPTP_SESSION_CONFIRMED) { + DEBUGP("CLEAR_CALL but no session\n"); + break; + } /* FUTURE: iterate over all calls and check if * call ID is valid. We don't do this without newnat, * because we only know about last call */ info->cstate = PPTP_CALL_CLEAR_REQ; break; case PPTP_SET_LINK_INFO: + break; case PPTP_ECHO_REQUEST: case PPTP_ECHO_REPLY: /* I don't have to explain these ;) */ break; default: - goto invalid; + DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0], msg); + /* unknown: no need to create GRE masq table entry */ + break; } - + if (ip_nat_pptp_hook_outbound) return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, pptpReq); - return NF_ACCEPT; -invalid: - DEBUGP("invalid %s: type=%d cid=%u pcid=%u " - "cstate=%d sstate=%d pns_cid=%u pac_cid=%u\n", - msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : pptp_msg_name[0], - msg, ntohs(cid), ntohs(pcid), info->cstate, info->sstate, - ntohs(info->pns_call_id), ntohs(info->pac_call_id)); return NF_ACCEPT; } -static const unsigned int pptp_msg_size[] = { - [PPTP_START_SESSION_REQUEST] = sizeof(struct PptpStartSessionRequest), - [PPTP_START_SESSION_REPLY] = sizeof(struct PptpStartSessionReply), - [PPTP_STOP_SESSION_REQUEST] = sizeof(struct PptpStopSessionRequest), - [PPTP_STOP_SESSION_REPLY] = sizeof(struct PptpStopSessionReply), - [PPTP_OUT_CALL_REQUEST] = sizeof(struct PptpOutCallRequest), - [PPTP_OUT_CALL_REPLY] = sizeof(struct PptpOutCallReply), - [PPTP_IN_CALL_REQUEST] = sizeof(struct PptpInCallRequest), - [PPTP_IN_CALL_REPLY] = sizeof(struct PptpInCallReply), - [PPTP_IN_CALL_CONNECT] = sizeof(struct PptpInCallConnected), - [PPTP_CALL_CLEAR_REQUEST] = sizeof(struct PptpClearCallRequest), - [PPTP_CALL_DISCONNECT_NOTIFY] = sizeof(struct PptpCallDisconnectNotify), - [PPTP_WAN_ERROR_NOTIFY] = sizeof(struct PptpWanErrorNotify), - [PPTP_SET_LINK_INFO] = sizeof(struct PptpSetLinkInfo), -}; /* track caller id inside control connection, call expect_related */ -static int +static int conntrack_pptp_help(struct sk_buff **pskb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { + struct pptp_pkt_hdr _pptph, *pptph; + struct tcphdr _tcph, *tcph; + u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; + u_int32_t datalen; int dir = CTINFO2DIR(ctinfo); struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - struct tcphdr _tcph, *tcph; - struct pptp_pkt_hdr _pptph, *pptph; - struct PptpControlHeader _ctlh, *ctlh; - union pptp_ctrl_union _pptpReq, *pptpReq; - unsigned int tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; - unsigned int datalen, reqlen, nexthdr_off; + unsigned int nexthdr_off; + int oldsstate, oldcstate; int ret; - u_int16_t msg; /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED + if (ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { DEBUGP("ctinfo = %u, skipping\n", ctinfo); return NF_ACCEPT; } - + nexthdr_off = (*pskb)->nh.iph->ihl*4; tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); BUG_ON(!tcph); nexthdr_off += tcph->doff * 4; datalen = tcplen - tcph->doff * 4; + if (tcph->fin || tcph->rst) { + DEBUGP("RST/FIN received, timeouting GRE\n"); + /* can't do this after real newnat */ + info->cstate = PPTP_CALL_NONE; + + /* untrack this call id, unexpect GRE packets */ + pptp_destroy_siblings(ct); + } + pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); if (!pptph) { DEBUGP("no full PPTP header, can't track\n"); @@ -568,23 +712,6 @@ conntrack_pptp_help(struct sk_buff **pskb, return NF_ACCEPT; } - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) - return NF_ACCEPT; - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - msg = ntohs(ctlh->messageType); - if (msg > 0 && msg <= PPTP_MSG_MAX && reqlen < pptp_msg_size[msg]) - return NF_ACCEPT; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) - return NF_ACCEPT; - oldsstate = info->sstate; oldcstate = info->cstate; @@ -594,11 +721,11 @@ conntrack_pptp_help(struct sk_buff **pskb, * established from PNS->PAC. However, RFC makes no guarantee */ if (dir == IP_CT_DIR_ORIGINAL) /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, ctinfo); else /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, ctlh, pptpReq, reqlen, ct, + ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, ctinfo); DEBUGP("sstate: %d->%d, cstate: %d->%d\n", oldsstate, info->sstate, oldcstate, info->cstate); @@ -608,31 +735,30 @@ conntrack_pptp_help(struct sk_buff **pskb, } /* control protocol helper */ -static struct ip_conntrack_helper pptp = { +static struct ip_conntrack_helper pptp = { .list = { NULL, NULL }, - .name = "pptp", + .name = "pptp", .me = THIS_MODULE, .max_expected = 2, .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, + .tuple = { .src = { .ip = 0, + .u = { .tcp = { .port = + __constant_htons(PPTP_CONTROL_PORT) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, .protonum = IPPROTO_TCP - } + } }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, + .mask = { .src = { .ip = 0, + .u = { .tcp = { .port = __constant_htons(0xffff) } } + }, + .dst = { .ip = 0, .u = { .all = 0 }, - .protonum = 0xff - } + .protonum = 0xff + } }, - .help = conntrack_pptp_help, - .destroy = pptp_destroy_siblings, + .help = conntrack_pptp_help }; extern void ip_ct_proto_gre_fini(void); @@ -642,7 +768,7 @@ extern int __init ip_ct_proto_gre_init(void); static int __init ip_conntrack_helper_pptp_init(void) { int retcode; - + retcode = ip_ct_proto_gre_init(); if (retcode < 0) return retcode; diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/trunk/net/ipv4/netfilter/ip_conntrack_netbios_ns.c index 3d0b438783db..a566a81325b2 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_netbios_ns.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_netlink.c b/trunk/net/ipv4/netfilter/ip_conntrack_netlink.c index 52eddea27e93..0d4cc92391fa 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_netlink.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_netlink.c @@ -329,7 +329,11 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { + } else if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -381,10 +385,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; - if (events & IPCT_MARK - && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; - nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; @@ -436,11 +436,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif } if (cb->args[1]) { cb->args[1] = 0; @@ -456,6 +451,46 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +#ifdef CONFIG_IP_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct ip_conntrack *ct = NULL; + struct ip_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&ip_conntrack_lock); + for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { + h = (struct ip_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = tuplehash_to_ctrack(h); + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&ip_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + static const size_t cta_min_ip[CTA_IP_MAX] = { [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), [CTA_IP_V4_DST-1] = sizeof(u_int32_t), @@ -740,14 +775,22 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (msg->nfgen_family != AF_INET) return -EAFNOSUPPORT; -#ifndef CONFIG_IP_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_IP_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else return -ENOTSUPP; #endif - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; + } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) @@ -1210,9 +1253,6 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; - if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_generic.c index 36f2b5e5d80a..f891308b5e4c 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_generic.c @@ -12,7 +12,7 @@ #include #include -unsigned int ip_ct_generic_timeout __read_mostly = 600*HZ; +unsigned int ip_ct_generic_timeout = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_gre.c index 5fe026f467d3..4ee016c427b4 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_gre.c @@ -1,15 +1,15 @@ /* - * ip_conntrack_proto_gre.c - Version 3.0 + * ip_conntrack_proto_gre.c - Version 3.0 * * Connection tracking protocol helper module for GRE. * * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -37,6 +37,7 @@ static DEFINE_RWLOCK(ip_ct_gre_lock); #define ASSERT_READ_LOCK(x) #define ASSERT_WRITE_LOCK(x) +#include #include #include #include @@ -61,7 +62,7 @@ MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); #define DEBUGP(x, args...) #define DUMP_TUPLE_GRE(x) #endif - + /* GRE KEYMAP HANDLING FUNCTIONS */ static LIST_HEAD(gre_keymap_list); @@ -81,14 +82,12 @@ static __be16 gre_keymap_lookup(struct ip_conntrack_tuple *t) __be16 key = 0; read_lock_bh(&ip_ct_gre_lock); - list_for_each_entry(km, &gre_keymap_list, list) { - if (gre_key_cmpfn(km, t)) { - key = km->tuple.src.u.gre.key; - break; - } - } + km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (km) + key = km->tuple.src.u.gre.key; read_unlock_bh(&ip_ct_gre_lock); - + DEBUGP("lookup src key 0x%x up key for ", key); DUMP_TUPLE_GRE(t); @@ -100,25 +99,28 @@ int ip_ct_gre_keymap_add(struct ip_conntrack *ct, struct ip_conntrack_tuple *t, int reply) { - struct ip_ct_gre_keymap **exist_km, *km; + struct ip_ct_gre_keymap **exist_km, *km, *old; if (!ct->helper || strcmp(ct->helper->name, "pptp")) { DEBUGP("refusing to add GRE keymap to non-pptp session\n"); return -1; } - if (!reply) + if (!reply) exist_km = &ct->help.ct_pptp_info.keymap_orig; else exist_km = &ct->help.ct_pptp_info.keymap_reply; if (*exist_km) { /* check whether it's a retransmission */ - list_for_each_entry(km, &gre_keymap_list, list) { - if (gre_key_cmpfn(km, t) && km == *exist_km) - return 0; + old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, + struct ip_ct_gre_keymap *, t); + if (old == *exist_km) { + DEBUGP("retransmission\n"); + return 0; } - DEBUGP("trying to override keymap_%s for ct %p\n", + + DEBUGP("trying to override keymap_%s for ct %p\n", reply? "reply":"orig", ct); return -EEXIST; } @@ -134,7 +136,7 @@ ip_ct_gre_keymap_add(struct ip_conntrack *ct, DUMP_TUPLE_GRE(&km->tuple); write_lock_bh(&ip_ct_gre_lock); - list_add_tail(&km->list, &gre_keymap_list); + list_append(&gre_keymap_list, km); write_unlock_bh(&ip_ct_gre_lock); return 0; @@ -152,7 +154,7 @@ void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) write_lock_bh(&ip_ct_gre_lock); if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", + DEBUGP("removing %p from list\n", ct->help.ct_pptp_info.keymap_orig); list_del(&ct->help.ct_pptp_info.keymap_orig->list); kfree(ct->help.ct_pptp_info.keymap_orig); @@ -220,7 +222,7 @@ static int gre_pkt_to_tuple(const struct sk_buff *skb, static int gre_print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple) { - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", + return seq_printf(s, "srckey=0x%x dstkey=0x%x ", ntohs(tuple->src.u.gre.key), ntohs(tuple->dst.u.gre.key)); } @@ -250,14 +252,14 @@ static int gre_packet(struct ip_conntrack *ct, } else ip_ct_refresh_acct(ct, conntrackinfo, skb, ct->proto.gre.timeout); - + return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ static int gre_new(struct ip_conntrack *ct, const struct sk_buff *skb) -{ +{ DEBUGP(": "); DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); @@ -283,9 +285,9 @@ static void gre_destroy(struct ip_conntrack *ct) } /* protocol helper struct */ -static struct ip_conntrack_protocol gre = { +static struct ip_conntrack_protocol gre = { .proto = IPPROTO_GRE, - .name = "gre", + .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, .print_tuple = gre_print_tuple, @@ -323,7 +325,7 @@ void ip_ct_proto_gre_fini(void) } write_unlock_bh(&ip_ct_gre_lock); - ip_conntrack_protocol_unregister(&gre); + ip_conntrack_protocol_unregister(&gre); } EXPORT_SYMBOL(ip_ct_gre_keymap_add); diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_icmp.c index 09c40ebe3345..23f1c504586d 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_icmp.c @@ -21,7 +21,7 @@ #include #include -unsigned int ip_ct_icmp_timeout __read_mostly = 30*HZ; +unsigned int ip_ct_icmp_timeout = 30*HZ; #if 0 #define DEBUGP printk diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_sctp.c index b908a4842e18..2d3612cd5f18 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_sctp.c @@ -58,13 +58,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int ip_ct_sctp_timeout_closed __read_mostly = 10 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; -static unsigned int ip_ct_sctp_timeout_established __read_mostly = 5 DAYS; -static unsigned int ip_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; +static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; +static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; +static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; +static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; static const unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c index 03ae9a04cb37..fb920e76ec10 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_tcp.c @@ -48,19 +48,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int ip_ct_tcp_be_liberal __read_mostly = 0; +int ip_ct_tcp_be_liberal = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int ip_ct_tcp_loose __read_mostly = 3; +int ip_ct_tcp_loose = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int ip_ct_tcp_max_retrans __read_mostly = 3; +int ip_ct_tcp_max_retrans = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -83,19 +83,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int ip_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; -unsigned int ip_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; -unsigned int ip_ct_tcp_timeout_established __read_mostly = 5 DAYS; -unsigned int ip_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; -unsigned int ip_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; -unsigned int ip_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; -unsigned int ip_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; -unsigned int ip_ct_tcp_timeout_close __read_mostly = 10 SECS; +unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned int ip_ct_tcp_timeout_established = 5 DAYS; +unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; +unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; +unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; +unsigned int ip_ct_tcp_timeout_close = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int ip_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; static const unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ @@ -731,15 +731,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end - && state->last_win == win) + && state->last_end == end) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; - state->last_win = win; state->retrans = 0; } } @@ -867,7 +865,8 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because it is assumed to be correct. + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/trunk/net/ipv4/netfilter/ip_conntrack_proto_udp.c index d0e8a16970ec..9b2c16b4d2ff 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_proto_udp.c @@ -18,8 +18,8 @@ #include #include -unsigned int ip_ct_udp_timeout __read_mostly = 30*HZ; -unsigned int ip_ct_udp_timeout_stream __read_mostly = 180*HZ; +unsigned int ip_ct_udp_timeout = 30*HZ; +unsigned int ip_ct_udp_timeout_stream = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -117,7 +117,8 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the checksum is assumed to be correct. + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. * FIXME: Source route IP option packets --RR */ if (ip_conntrack_checksum && hooknum == NF_IP_PRE_ROUTING && nf_ip_checksum(skb, hooknum, iph->ihl * 4, IPPROTO_UDP)) { diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_sip.c b/trunk/net/ipv4/netfilter/ip_conntrack_sip.c index 2893e9c74850..4f222d6be009 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_sip.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_sip.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include diff --git a/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c b/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c index 02135756562e..7a9fa04a467a 100644 --- a/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/trunk/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -35,6 +35,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -533,7 +534,7 @@ static struct nf_hook_ops ip_conntrack_ops[] = { /* Sysctl support */ -int ip_conntrack_checksum __read_mostly = 1; +int ip_conntrack_checksum = 1; #ifdef CONFIG_SYSCTL @@ -562,7 +563,7 @@ extern unsigned int ip_ct_udp_timeout_stream; /* From ip_conntrack_proto_icmp.c */ extern unsigned int ip_ct_icmp_timeout; -/* From ip_conntrack_proto_generic.c */ +/* From ip_conntrack_proto_icmp.c */ extern unsigned int ip_ct_generic_timeout; /* Log invalid packets of a given protocol */ diff --git a/trunk/net/ipv4/netfilter/ip_nat_core.c b/trunk/net/ipv4/netfilter/ip_nat_core.c index 71f3e09cbc84..1741d555ad0d 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_core.c +++ b/trunk/net/ipv4/netfilter/ip_nat_core.c @@ -22,6 +22,9 @@ #include #include +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + #include #include #include @@ -30,6 +33,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -97,6 +101,18 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) write_unlock_bh(&ip_nat_lock); } +/* We do checksum mangling, so if they were wrong before they're still + * wrong. Also works for incomplete packets (eg. ICMP dest + * unreachables.) */ +u_int16_t +ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} +EXPORT_SYMBOL(ip_nat_cheat_check); + /* Is this tuple already taken? (not by us) */ int ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, @@ -362,12 +378,12 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = nf_csum_update(~iph->saddr, target->src.ip, - iph->check); + iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, + iph->check); iph->saddr = target->src.ip; } else { - iph->check = nf_csum_update(~iph->daddr, target->dst.ip, - iph->check); + iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, + iph->check); iph->daddr = target->dst.ip; } return 1; @@ -407,10 +423,10 @@ unsigned int ip_nat_packet(struct ip_conntrack *ct, EXPORT_SYMBOL_GPL(ip_nat_packet); /* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) +int ip_nat_icmp_reply_translation(struct sk_buff **pskb, + struct ip_conntrack *ct, + enum ip_nat_manip_type manip, + enum ip_conntrack_dir dir) { struct { struct icmphdr icmp; @@ -418,9 +434,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, } *inside; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; - enum ip_nat_manip_type manip = HOOK2MANIP(hooknum); if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) return 0; @@ -429,8 +443,12 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, /* We're actually going to mangle it beyond trivial checksum adjustment, so make sure the current checksum is correct. */ - if (nf_ip_checksum(*pskb, hooknum, hdrlen, 0)) - return 0; + if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { + hdrlen = (*pskb)->nh.iph->ihl * 4; + if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, 0))) + return 0; + } /* Must be RELATED */ IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || @@ -469,14 +487,12 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, !manip)) return 0; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); - } + /* Reloading "inside" here since manip_pkt inner. */ + inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; + inside->icmp.checksum = 0; + inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, + (*pskb)->len - hdrlen, + 0)); /* Change outer to look the reply to an incoming packet * (proto 0 means don't invert per-proto part). */ diff --git a/trunk/net/ipv4/netfilter/ip_nat_helper.c b/trunk/net/ipv4/netfilter/ip_nat_helper.c index 7f6a75984f6c..cbcaa45370ae 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_helper.c +++ b/trunk/net/ipv4/netfilter/ip_nat_helper.c @@ -27,12 +27,16 @@ #include #include +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + #include #include #include #include #include #include +#include #if 0 #define DEBUGP printk @@ -161,7 +165,7 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct tcphdr *tcph; - int oldlen, datalen; + int datalen; if (!skb_make_writable(pskb, (*pskb)->len)) return 0; @@ -176,22 +180,13 @@ ip_nat_mangle_tcp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; tcph = (void *)iph + iph->ihl*4; - oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, match_offset, match_len, rep_buffer, rep_len); datalen = (*pskb)->len - iph->ihl*4; - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, - iph->saddr, iph->daddr, - csum_partial((char *)tcph, - datalen, 0)); - } else - tcph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ 0xFFFF, - htons(datalen), - tcph->check, 1); + tcph->check = 0; + tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, + csum_partial((char *)tcph, datalen, 0)); if (rep_len != match_len) { set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); @@ -226,7 +221,6 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, { struct iphdr *iph; struct udphdr *udph; - int datalen, oldlen; /* UDP helpers might accidentally mangle the wrong packet */ iph = (*pskb)->nh.iph; @@ -244,32 +238,22 @@ ip_nat_mangle_udp_packet(struct sk_buff **pskb, iph = (*pskb)->nh.iph; udph = (void *)iph + iph->ihl*4; - - oldlen = (*pskb)->len - iph->ihl*4; mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), match_offset, match_len, rep_buffer, rep_len); /* update the length of the UDP packet */ - datalen = (*pskb)->len - iph->ihl*4; - udph->len = htons(datalen); + udph->len = htons((*pskb)->len - iph->ihl*4); /* fix udp checksum if udp checksum was previously calculated */ - if (!udph->check && (*pskb)->ip_summed != CHECKSUM_PARTIAL) - return 1; - - if ((*pskb)->ip_summed != CHECKSUM_PARTIAL) { + if (udph->check) { + int datalen = (*pskb)->len - iph->ihl * 4; udph->check = 0; udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, datalen, IPPROTO_UDP, csum_partial((char *)udph, datalen, 0)); - if (!udph->check) - udph->check = -1; - } else - udph->check = nf_proto_csum_update(*pskb, - htons(oldlen) ^ 0xFFFF, - htons(datalen), - udph->check, 1); + } + return 1; } EXPORT_SYMBOL(ip_nat_mangle_udp_packet); @@ -309,14 +293,11 @@ sack_adjust(struct sk_buff *skb, ntohl(sack->start_seq), new_start_seq, ntohl(sack->end_seq), new_end_seq); - tcph->check = nf_proto_csum_update(skb, - ~sack->start_seq, - new_start_seq, - tcph->check, 0); - tcph->check = nf_proto_csum_update(skb, - ~sack->end_seq, - new_end_seq, - tcph->check, 0); + tcph->check = + ip_nat_cheat_check(~sack->start_seq, new_start_seq, + ip_nat_cheat_check(~sack->end_seq, + new_end_seq, + tcph->check)); sack->start_seq = new_start_seq; sack->end_seq = new_end_seq; sackoff += sizeof(*sack); @@ -400,10 +381,10 @@ ip_nat_seq_adjust(struct sk_buff **pskb, newack = ntohl(tcph->ack_seq) - other_way->offset_before; newack = htonl(newack); - tcph->check = nf_proto_csum_update(*pskb, ~tcph->seq, newseq, - tcph->check, 0); - tcph->check = nf_proto_csum_update(*pskb, ~tcph->ack_seq, newack, - tcph->check, 0); + tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, + ip_nat_cheat_check(~tcph->ack_seq, + newack, + tcph->check)); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), diff --git a/trunk/net/ipv4/netfilter/ip_nat_helper_pptp.c b/trunk/net/ipv4/netfilter/ip_nat_helper_pptp.c index 2ff578807123..1d149964dc38 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ b/trunk/net/ipv4/netfilter/ip_nat_helper_pptp.c @@ -32,7 +32,7 @@ * 2005-06-10 - Version 3.0 * - kernel >= 2.6.11 version, * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * + * */ #include @@ -85,17 +85,19 @@ static void pptp_nat_expected(struct ip_conntrack *ct, DEBUGP("we are PNS->PAC\n"); /* therefore, build tuple for PAC->PNS */ t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = master->help.ct_pptp_info.pac_call_id; + t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = master->help.ct_pptp_info.pns_call_id; + t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); t.dst.protonum = IPPROTO_GRE; } else { DEBUGP("we are PAC->PNS\n"); /* build tuple for PNS->PAC */ t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = master->nat.help.nat_pptp_info.pns_call_id; + t.src.u.gre.key = + htons(master->nat.help.nat_pptp_info.pns_call_id); t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = master->nat.help.nat_pptp_info.pac_call_id; + t.dst.u.gre.key = + htons(master->nat.help.nat_pptp_info.pac_call_id); t.dst.protonum = IPPROTO_GRE; } @@ -147,52 +149,51 @@ pptp_outbound_pkt(struct sk_buff **pskb, { struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg; - __be16 new_callid; + u_int16_t msg, new_callid; unsigned int cid_off; - new_callid = ct_pptp_info->pns_call_id; - + new_callid = htons(ct_pptp_info->pns_call_id); + switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ - - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = new_callid; - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icack.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = ntohs(new_callid); + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icreq.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, + (msg <= PPTP_MSG_MAX)? + pptp_msg_name[msg]:pptp_msg_name[0]); + /* fall through */ + + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; } /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass @@ -211,28 +212,80 @@ pptp_outbound_pkt(struct sk_buff **pskb, return NF_ACCEPT; } -static void +static int pptp_exp_gre(struct ip_conntrack_expect *expect_orig, struct ip_conntrack_expect *expect_reply) { + struct ip_ct_pptp_master *ct_pptp_info = + &expect_orig->master->help.ct_pptp_info; + struct ip_nat_pptp *nat_pptp_info = + &expect_orig->master->nat.help.nat_pptp_info; + struct ip_conntrack *ct = expect_orig->master; - struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; + + struct ip_conntrack_tuple inv_t; + struct ip_conntrack_tuple *orig_t, *reply_t; /* save original PAC call ID in nat_info */ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + /* alter expectation */ + orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; + /* alter expectation for PNS->PAC direction */ - expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; - expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; - expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; + invert_tuplepr(&inv_t, &expect_orig->tuple); + expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); + expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); expect_orig->dir = IP_CT_DIR_ORIGINAL; + inv_t.src.ip = reply_t->src.ip; + inv_t.dst.ip = reply_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); + + if (!ip_conntrack_expect_related(expect_orig)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_orig)\n"); + return 1; + } /* alter expectation for PAC->PNS direction */ - expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; - expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; - expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; + invert_tuplepr(&inv_t, &expect_reply->tuple); + expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); + expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); + expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); expect_reply->dir = IP_CT_DIR_REPLY; + inv_t.src.ip = orig_t->src.ip; + inv_t.dst.ip = orig_t->dst.ip; + inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); + inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); + + if (!ip_conntrack_expect_related(expect_reply)) { + DEBUGP("successfully registered expect\n"); + } else { + DEBUGP("can't expect_related(expect_reply)\n"); + ip_conntrack_unexpect_related(expect_orig); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { + DEBUGP("can't register original keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + return 1; + } + + if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { + DEBUGP("can't register reply keymap\n"); + ip_conntrack_unexpect_related(expect_orig); + ip_conntrack_unexpect_related(expect_reply); + ip_ct_gre_keymap_destroy(ct); + return 1; + } + + return 0; } /* inbound packets == from PAC to PNS */ @@ -244,15 +297,15 @@ pptp_inbound_pkt(struct sk_buff **pskb, union pptp_ctrl_union *pptpReq) { struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg; - __be16 new_pcid; - unsigned int pcid_off; + u_int16_t msg, new_cid = 0, new_pcid; + unsigned int pcid_off, cid_off = 0; - new_pcid = nat_pptp_info->pns_call_id; + new_pcid = htons(nat_pptp_info->pns_call_id); switch (msg = ntohs(ctlh->messageType)) { case PPTP_OUT_CALL_REPLY: pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); + cid_off = offsetof(union pptp_ctrl_union, ocack.callID); break; case PPTP_IN_CALL_CONNECT: pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); @@ -271,7 +324,7 @@ pptp_inbound_pkt(struct sk_buff **pskb, break; default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? + DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? pptp_msg_name[msg]:pptp_msg_name[0]); /* fall through */ @@ -298,6 +351,17 @@ pptp_inbound_pkt(struct sk_buff **pskb, sizeof(new_pcid), (char *)&new_pcid, sizeof(new_pcid)) == 0) return NF_DROP; + + if (new_cid) { + DEBUGP("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_cid)); + if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_cid), (char *)&new_cid, + sizeof(new_cid)) == 0) + return NF_DROP; + } return NF_ACCEPT; } diff --git a/trunk/net/ipv4/netfilter/ip_nat_proto_gre.c b/trunk/net/ipv4/netfilter/ip_nat_proto_gre.c index bf91f9312b3c..38acfdf540eb 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_proto_gre.c +++ b/trunk/net/ipv4/netfilter/ip_nat_proto_gre.c @@ -6,10 +6,10 @@ * GRE is a generic encapsulation protocol, which is generally not very * suited for NAT, as it has no protocol-specific part as port numbers. * - * It has an optional key field, which may help us distinguishing two + * It has an optional key field, which may help us distinguishing two * connections between the same two hosts. * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 * * PPTP is built on top of a modified version of GRE, and has a mandatory * field called "CallID", which serves us for the same purpose as the key @@ -60,14 +60,14 @@ gre_in_range(const struct ip_conntrack_tuple *tuple, } /* generate unique tuple ... */ -static int +static int gre_unique_tuple(struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range, enum ip_nat_manip_type maniptype, const struct ip_conntrack *conntrack) { static u_int16_t key; - __be16 *keyptr; + u_int16_t *keyptr; unsigned int min, i, range_size; if (maniptype == IP_NAT_MANIP_SRC) @@ -84,7 +84,7 @@ gre_unique_tuple(struct ip_conntrack_tuple *tuple, range_size = ntohs(range->max.gre.key) - min + 1; } - DEBUGP("min = %u, range_size = %u\n", min, range_size); + DEBUGP("min = %u, range_size = %u\n", min, range_size); for (i = 0; i < range_size; i++, key++) { *keyptr = htons(min + key % range_size); @@ -117,7 +117,7 @@ gre_manip_pkt(struct sk_buff **pskb, greh = (void *)(*pskb)->data + hdroff; pgreh = (struct gre_hdr_pptp *) greh; - /* we only have destination manip of a packet, since 'source key' + /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ if (maniptype == IP_NAT_MANIP_DST) { /* key manipulation is always dest */ @@ -129,16 +129,15 @@ gre_manip_pkt(struct sk_buff **pskb, } if (greh->csum) { /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = - nf_proto_csum_update(*pskb, - ~*(gre_key(greh)), + *(gre_csum(greh)) = + ip_nat_cheat_check(~*(gre_key(greh)), tuple->dst.u.gre.key, - *(gre_csum(greh)), 0); + *(gre_csum(greh))); } *(gre_key(greh)) = tuple->dst.u.gre.key; break; case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", + DEBUGP("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); pgreh->call_id = tuple->dst.u.gre.key; break; @@ -152,8 +151,8 @@ gre_manip_pkt(struct sk_buff **pskb, } /* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", +static struct ip_nat_protocol gre = { + .name = "GRE", .protonum = IPPROTO_GRE, .manip_pkt = gre_manip_pkt, .in_range = gre_in_range, @@ -164,7 +163,7 @@ static struct ip_nat_protocol gre = { .nfattr_to_range = ip_nat_port_nfattr_to_range, #endif }; - + int __init ip_nat_proto_gre_init(void) { return ip_nat_protocol_register(&gre); diff --git a/trunk/net/ipv4/netfilter/ip_nat_proto_icmp.c b/trunk/net/ipv4/netfilter/ip_nat_proto_icmp.c index ec50cc295317..31a3f4ccb99c 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ b/trunk/net/ipv4/netfilter/ip_nat_proto_icmp.c @@ -66,10 +66,10 @@ icmp_manip_pkt(struct sk_buff **pskb, return 0; hdr = (struct icmphdr *)((*pskb)->data + hdroff); - hdr->checksum = nf_proto_csum_update(*pskb, - hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum, 0); + + hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, + tuple->src.u.icmp.id, + hdr->checksum); hdr->un.echo.id = tuple->src.u.icmp.id; return 1; } diff --git a/trunk/net/ipv4/netfilter/ip_nat_proto_tcp.c b/trunk/net/ipv4/netfilter/ip_nat_proto_tcp.c index 72a6307bd2db..a3d14079eba6 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ b/trunk/net/ipv4/netfilter/ip_nat_proto_tcp.c @@ -129,9 +129,10 @@ tcp_manip_pkt(struct sk_buff **pskb, if (hdrsize < sizeof(*hdr)) return 1; - hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, hdr->check, 1); - hdr->check = nf_proto_csum_update(*pskb, oldport ^ 0xFFFF, newport, - hdr->check, 0); + hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(oldport ^ 0xFFFF, + newport, + hdr->check)); return 1; } diff --git a/trunk/net/ipv4/netfilter/ip_nat_proto_udp.c b/trunk/net/ipv4/netfilter/ip_nat_proto_udp.c index 5da196ae758c..ec6053fdc867 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_proto_udp.c +++ b/trunk/net/ipv4/netfilter/ip_nat_proto_udp.c @@ -113,16 +113,11 @@ udp_manip_pkt(struct sk_buff **pskb, newport = tuple->dst.u.udp.port; portptr = &hdr->dest; } - - if (hdr->check || (*pskb)->ip_summed == CHECKSUM_PARTIAL) { - hdr->check = nf_proto_csum_update(*pskb, ~oldip, newip, - hdr->check, 1); - hdr->check = nf_proto_csum_update(*pskb, - *portptr ^ 0xFFFF, newport, - hdr->check, 0); - if (!hdr->check) - hdr->check = -1; - } + if (hdr->check) /* 0 is a special case meaning no checksum */ + hdr->check = ip_nat_cheat_check(~oldip, newip, + ip_nat_cheat_check(*portptr ^ 0xFFFF, + newport, + hdr->check)); *portptr = newport; return 1; } diff --git a/trunk/net/ipv4/netfilter/ip_nat_rule.c b/trunk/net/ipv4/netfilter/ip_nat_rule.c index 7b703839aa58..1aba926c1cb0 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_rule.c +++ b/trunk/net/ipv4/netfilter/ip_nat_rule.c @@ -19,10 +19,14 @@ #include #include +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + #include #include #include #include +#include #if 0 #define DEBUGP printk @@ -100,7 +104,8 @@ static unsigned int ipt_snat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -142,7 +147,8 @@ static unsigned int ipt_dnat_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct ipt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; @@ -168,6 +174,7 @@ static int ipt_snat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -184,6 +191,7 @@ static int ipt_dnat_checkentry(const char *tablename, const void *entry, const struct ipt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct ip_nat_multi_range_compat *mr = targinfo; @@ -247,7 +255,7 @@ int ip_nat_rule_find(struct sk_buff **pskb, { int ret; - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table); + ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); if (ret == NF_ACCEPT) { if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) diff --git a/trunk/net/ipv4/netfilter/ip_nat_standalone.c b/trunk/net/ipv4/netfilter/ip_nat_standalone.c index 9c577db62047..17de077a7901 100644 --- a/trunk/net/ipv4/netfilter/ip_nat_standalone.c +++ b/trunk/net/ipv4/netfilter/ip_nat_standalone.c @@ -30,6 +30,9 @@ #include #include +#define ASSERT_READ_LOCK(x) +#define ASSERT_WRITE_LOCK(x) + #include #include #include @@ -37,6 +40,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -106,6 +110,11 @@ ip_nat_fn(unsigned int hooknum, IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET))); + /* If we had a hardware checksum before, it's now invalid */ + if ((*pskb)->ip_summed == CHECKSUM_HW) + if (skb_checksum_help(*pskb, (out == NULL))) + return NF_DROP; + ct = ip_conntrack_get(*pskb, &ctinfo); /* Can't track? It's not due to stress, or conntrack would have dropped it. Hence it's the user's responsibilty to @@ -136,8 +145,8 @@ ip_nat_fn(unsigned int hooknum, case IP_CT_RELATED: case IP_CT_RELATED+IP_CT_IS_REPLY: if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(ct, ctinfo, - hooknum, pskb)) + if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, + CTINFO2DIR(ctinfo))) return NF_DROP; else return NF_ACCEPT; diff --git a/trunk/net/ipv4/netfilter/ip_queue.c b/trunk/net/ipv4/netfilter/ip_queue.c index 7edad790478a..198ac36db861 100644 --- a/trunk/net/ipv4/netfilter/ip_queue.c +++ b/trunk/net/ipv4/netfilter/ip_queue.c @@ -52,15 +52,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; -static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode = IPQ_COPY_NONE; +static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid __read_mostly; -static unsigned int copy_range __read_mostly; +static int peer_pid; +static unsigned int copy_range; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl __read_mostly; +static struct sock *ipqnl; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); @@ -208,9 +208,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || - entry->skb->ip_summed == CHECKSUM_COMPLETE) && - (*errp = skb_checksum_help(entry->skb))) { + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/trunk/net/ipv4/netfilter/ip_tables.c b/trunk/net/ipv4/netfilter/ip_tables.c index 800067d69a9a..048514f15f2f 100644 --- a/trunk/net/ipv4/netfilter/ip_tables.c +++ b/trunk/net/ipv4/netfilter/ip_tables.c @@ -180,7 +180,8 @@ ipt_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { if (net_ratelimit()) printk("ip_tables: error: `%s'\n", (char *)targinfo); @@ -216,7 +217,8 @@ ipt_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table) + struct ipt_table *table, + void *userdata) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); u_int16_t offset; @@ -306,7 +308,8 @@ ipt_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data); + t->data, + userdata); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ipt_entry *)table_base)->comefrom @@ -464,7 +467,8 @@ cleanup_match(struct ipt_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); + m->u.kernel.match->destroy(m->u.kernel.match, m->data, + m->u.match_size - sizeof(*m)); module_put(m->u.kernel.match->me); return 0; } @@ -517,6 +521,7 @@ check_match(struct ipt_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, m->data, + m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -573,10 +578,12 @@ check_entry(struct ipt_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto err; + goto cleanup_matches; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, + t->u.target_size + - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -648,7 +655,8 @@ cleanup_entry(struct ipt_entry *e, unsigned int *i) IPT_MATCH_ITERATE(e, cleanup_match, NULL); t = ipt_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); + t->u.kernel.target->destroy(t->u.kernel.target, t->data, + t->u.target_size - sizeof(*t)); module_put(t->u.kernel.target->me); return 0; } @@ -942,28 +950,73 @@ static short compat_calc_jump(u_int16_t offset) return delta; } -static void compat_standard_from_user(void *dst, void *src) +struct compat_ipt_standard_target { - int v = *(compat_int_t *)src; + struct compat_xt_entry_target target; + compat_int_t verdict; +}; - if (v > 0) - v += compat_calc_jump(v); - memcpy(dst, &v, sizeof(v)); -} +struct compat_ipt_standard +{ + struct compat_ipt_entry entry; + struct compat_ipt_standard_target target; +}; -static int compat_standard_to_user(void __user *dst, void *src) +#define IPT_ST_LEN XT_ALIGN(sizeof(struct ipt_standard_target)) +#define IPT_ST_COMPAT_LEN COMPAT_XT_ALIGN(sizeof(struct compat_ipt_standard_target)) +#define IPT_ST_OFFSET (IPT_ST_LEN - IPT_ST_COMPAT_LEN) + +static int compat_ipt_standard_fn(void *target, + void **dstptr, int *size, int convert) { - compat_int_t cv = *(int *)src; + struct compat_ipt_standard_target compat_st, *pcompat_st; + struct ipt_standard_target st, *pst; + int ret; - if (cv > 0) - cv -= compat_calc_jump(cv); - return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; + ret = 0; + switch (convert) { + case COMPAT_TO_USER: + pst = target; + memcpy(&compat_st.target, &pst->target, + sizeof(compat_st.target)); + compat_st.verdict = pst->verdict; + if (compat_st.verdict > 0) + compat_st.verdict -= + compat_calc_jump(compat_st.verdict); + compat_st.target.u.user.target_size = IPT_ST_COMPAT_LEN; + if (copy_to_user(*dstptr, &compat_st, IPT_ST_COMPAT_LEN)) + ret = -EFAULT; + *size -= IPT_ST_OFFSET; + *dstptr += IPT_ST_COMPAT_LEN; + break; + case COMPAT_FROM_USER: + pcompat_st = target; + memcpy(&st.target, &pcompat_st->target, IPT_ST_COMPAT_LEN); + st.verdict = pcompat_st->verdict; + if (st.verdict > 0) + st.verdict += compat_calc_jump(st.verdict); + st.target.u.user.target_size = IPT_ST_LEN; + memcpy(*dstptr, &st, IPT_ST_LEN); + *size += IPT_ST_OFFSET; + *dstptr += IPT_ST_LEN; + break; + case COMPAT_CALC_SIZE: + *size += IPT_ST_OFFSET; + break; + default: + ret = -ENOPROTOOPT; + break; + } + return ret; } static inline int compat_calc_match(struct ipt_entry_match *m, int * size) { - *size += xt_compat_match_offset(m->u.kernel.match); + if (m->u.kernel.match->compat) + m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); + else + xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); return 0; } @@ -978,7 +1031,10 @@ static int compat_calc_entry(struct ipt_entry *e, struct xt_table_info *info, entry_offset = (void *)e - base; IPT_MATCH_ITERATE(e, compat_calc_match, &off); t = ipt_get_target(e); - off += xt_compat_target_offset(t->u.kernel.target); + if (t->u.kernel.target->compat) + t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); + else + xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); newinfo->size -= off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1364,13 +1420,17 @@ struct compat_ipt_replace { }; static inline int compat_copy_match_to_user(struct ipt_entry_match *m, - void * __user *dstptr, compat_uint_t *size) + void __user **dstptr, compat_uint_t *size) { - return xt_compat_match_to_user(m, dstptr, size); + if (m->u.kernel.match->compat) + return m->u.kernel.match->compat(m, dstptr, size, + COMPAT_TO_USER); + else + return xt_compat_match(m, dstptr, size, COMPAT_TO_USER); } static int compat_copy_entry_to_user(struct ipt_entry *e, - void * __user *dstptr, compat_uint_t *size) + void __user **dstptr, compat_uint_t *size) { struct ipt_entry_target __user *t; struct compat_ipt_entry __user *ce; @@ -1390,7 +1450,11 @@ static int compat_copy_entry_to_user(struct ipt_entry *e, if (ret) goto out; t = ipt_get_target(e); - ret = xt_compat_target_to_user(t, dstptr, size); + if (t->u.kernel.target->compat) + ret = t->u.kernel.target->compat(t, dstptr, size, + COMPAT_TO_USER); + else + ret = xt_compat_target(t, dstptr, size, COMPAT_TO_USER); if (ret) goto out; ret = -EFAULT; @@ -1422,7 +1486,11 @@ compat_check_calc_match(struct ipt_entry_match *m, return match ? PTR_ERR(match) : -ENOENT; } m->u.kernel.match = match; - *size += xt_compat_match_offset(match); + + if (m->u.kernel.match->compat) + m->u.kernel.match->compat(m, NULL, size, COMPAT_CALC_SIZE); + else + xt_compat_match(m, NULL, size, COMPAT_CALC_SIZE); (*i)++; return 0; @@ -1469,7 +1537,7 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, ret = IPT_MATCH_ITERATE(e, compat_check_calc_match, name, &e->ip, e->comefrom, &off, &j); if (ret != 0) - goto cleanup_matches; + goto out; t = ipt_get_target(e); target = try_then_request_module(xt_find_target(AF_INET, @@ -1479,11 +1547,14 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, if (IS_ERR(target) || !target) { duprintf("check_entry: `%s' not found\n", t->u.user.name); ret = target ? PTR_ERR(target) : -ENOENT; - goto cleanup_matches; + goto out; } t->u.kernel.target = target; - off += xt_compat_target_offset(target); + if (t->u.kernel.target->compat) + t->u.kernel.target->compat(t, NULL, &off, COMPAT_CALC_SIZE); + else + xt_compat_target(t, NULL, &off, COMPAT_CALC_SIZE); *size += off; ret = compat_add_offset(entry_offset, off); if (ret) @@ -1503,17 +1574,14 @@ check_compat_entry_size_and_hooks(struct ipt_entry *e, (*i)++; return 0; - out: - module_put(t->u.kernel.target->me); -cleanup_matches: IPT_MATCH_ITERATE(e, cleanup_match, &j); return ret; } static inline int compat_copy_match_from_user(struct ipt_entry_match *m, void **dstptr, compat_uint_t *size, const char *name, - const struct ipt_ip *ip, unsigned int hookmask, int *i) + const struct ipt_ip *ip, unsigned int hookmask) { struct ipt_entry_match *dm; struct ipt_match *match; @@ -1521,28 +1589,26 @@ static inline int compat_copy_match_from_user(struct ipt_entry_match *m, dm = (struct ipt_entry_match *)*dstptr; match = m->u.kernel.match; - xt_compat_match_from_user(m, dstptr, size); + if (match->compat) + match->compat(m, dstptr, size, COMPAT_FROM_USER); + else + xt_compat_match(m, dstptr, size, COMPAT_FROM_USER); ret = xt_check_match(match, AF_INET, dm->u.match_size - sizeof(*dm), name, hookmask, ip->proto, ip->invflags & IPT_INV_PROTO); if (ret) - goto err; + return ret; if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ip, match, dm->data, + dm->u.match_size - sizeof(*dm), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); - ret = -EINVAL; - goto err; + return -EINVAL; } - (*i)++; return 0; - -err: - module_put(m->u.kernel.match->me); - return ret; } static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, @@ -1553,23 +1619,25 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, struct ipt_target *target; struct ipt_entry *de; unsigned int origsize; - int ret, h, j; + int ret, h; ret = 0; origsize = *size; de = (struct ipt_entry *)*dstptr; memcpy(de, e, sizeof(struct ipt_entry)); - j = 0; *dstptr += sizeof(struct compat_ipt_entry); ret = IPT_MATCH_ITERATE(e, compat_copy_match_from_user, dstptr, size, - name, &de->ip, de->comefrom, &j); + name, &de->ip, de->comefrom); if (ret) - goto cleanup_matches; + goto out; de->target_offset = e->target_offset - (origsize - *size); t = ipt_get_target(e); target = t->u.kernel.target; - xt_compat_target_from_user(t, dstptr, size); + if (target->compat) + target->compat(t, dstptr, size, COMPAT_FROM_USER); + else + xt_compat_target(t, dstptr, size, COMPAT_FROM_USER); de->next_offset = e->next_offset - (origsize - *size); for (h = 0; h < NF_IP_NUMHOOKS; h++) { @@ -1585,26 +1653,22 @@ static int compat_copy_entry_from_user(struct ipt_entry *e, void **dstptr, name, e->comefrom, e->ip.proto, e->ip.invflags & IPT_INV_PROTO); if (ret) - goto err; + goto out; ret = -EINVAL; if (t->u.kernel.target == &ipt_standard_target) { if (!standard_check(t, *size)) - goto err; + goto out; } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, de, target, - t->data, de->comefrom)) { + t->data, t->u.target_size - sizeof(*t), + de->comefrom)) { duprintf("ip_tables: compat: check failed for `%s'.\n", t->u.kernel.target->name); - goto err; + goto out; } ret = 0; - return ret; - -err: - module_put(t->u.kernel.target->me); -cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); +out: return ret; } @@ -1925,8 +1989,6 @@ compat_get_entries(struct compat_ipt_get_entries __user *uptr, int *len) return ret; } -static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); - static int compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) { @@ -1940,7 +2002,8 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = compat_get_entries(user, len); break; default: - ret = do_ipt_get_ctl(sk, cmd, user, len); + duprintf("compat_do_ipt_get_ctl: unknown request %i\n", cmd); + ret = -EINVAL; } return ret; } @@ -2122,6 +2185,7 @@ icmp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ipt_icmp *icmpinfo = matchinfo; @@ -2136,9 +2200,7 @@ static struct ipt_target ipt_standard_target = { .targetsize = sizeof(int), .family = AF_INET, #ifdef CONFIG_COMPAT - .compatsize = sizeof(compat_int_t), - .compat_from_user = compat_standard_from_user, - .compat_to_user = compat_standard_to_user, + .compat = &compat_ipt_standard_fn, #endif }; diff --git a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c index 41589665fc5d..d994c5f5744c 100644 --- a/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/trunk/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -302,7 +302,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_clusterip_tgt_info *cipinfo = targinfo; enum ip_conntrack_info ctinfo; @@ -372,6 +373,7 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; @@ -448,7 +450,8 @@ checkentry(const char *tablename, } /* drop reference count of cluster config when rule is deleted */ -static void destroy(const struct xt_target *target, void *targinfo) +static void destroy(const struct xt_target *target, void *targinfo, + unsigned int targinfosize) { struct ipt_clusterip_tgt_info *cipinfo = targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_DSCP.c b/trunk/net/ipv4/netfilter/ipt_DSCP.c new file mode 100644 index 000000000000..c8e971288dfe --- /dev/null +++ b/trunk/net/ipv4/netfilter/ipt_DSCP.c @@ -0,0 +1,96 @@ +/* iptables module for setting the IPv4 DSCP field, Version 1.8 + * + * (C) 2002 by Harald Welte + * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * See RFC2474 for a description of the DSCP field within the IP Header. + * + * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp +*/ + +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables DSCP modification module"); +MODULE_LICENSE("GPL"); + +static unsigned int +target(struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + unsigned int hooknum, + const struct xt_target *target, + const void *targinfo, + void *userinfo) +{ + const struct ipt_DSCP_info *dinfo = targinfo; + u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); + + + if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { + u_int16_t diffs[2]; + + if (!skb_make_writable(pskb, sizeof(struct iphdr))) + return NF_DROP; + + diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; + (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) + | sh_dscp; + diffs[1] = htons((*pskb)->nh.iph->tos); + (*pskb)->nh.iph->check + = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + (*pskb)->nh.iph->check + ^ 0xFFFF)); + } + return IPT_CONTINUE; +} + +static int +checkentry(const char *tablename, + const void *e_void, + const struct xt_target *target, + void *targinfo, + unsigned int targinfosize, + unsigned int hook_mask) +{ + const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; + + if ((dscp > IPT_DSCP_MAX)) { + printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); + return 0; + } + return 1; +} + +static struct ipt_target ipt_dscp_reg = { + .name = "DSCP", + .target = target, + .targetsize = sizeof(struct ipt_DSCP_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init ipt_dscp_init(void) +{ + return ipt_register_target(&ipt_dscp_reg); +} + +static void __exit ipt_dscp_fini(void) +{ + ipt_unregister_target(&ipt_dscp_reg); +} + +module_init(ipt_dscp_init); +module_exit(ipt_dscp_fini); diff --git a/trunk/net/ipv4/netfilter/ipt_ECN.c b/trunk/net/ipv4/netfilter/ipt_ECN.c index 23f9c7ebe7eb..4adf5c9d34f5 100644 --- a/trunk/net/ipv4/netfilter/ipt_ECN.c +++ b/trunk/net/ipv4/netfilter/ipt_ECN.c @@ -27,28 +27,32 @@ MODULE_DESCRIPTION("iptables ECN modification module"); static inline int set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) { - struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; + if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) + != (einfo->ip_ect & IPT_ECN_IP_MASK)) { + u_int16_t diffs[2]; - if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { if (!skb_make_writable(pskb, sizeof(struct iphdr))) return 0; - iph = (*pskb)->nh.iph; - oldtos = iph->tos; - iph->tos &= ~IPT_ECN_IP_MASK; - iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, - iph->check); + + diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; + (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; + (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + diffs[1] = htons((*pskb)->nh.iph->tos); + (*pskb)->nh.iph->check + = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + (*pskb)->nh.iph->check + ^0xFFFF)); } return 1; } /* Return 0 if there was an error. */ static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) +set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) { struct tcphdr _tcph, *tcph; - u_int16_t oldval; + u_int16_t diffs[2]; /* Not enought header? */ tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, @@ -66,16 +70,22 @@ set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) return 0; tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; - oldval = ((u_int16_t *)tcph)[6]; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, inward)) + return 0; + + diffs[0] = ((u_int16_t *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; + diffs[1] = ((u_int16_t *)tcph)[6]; + diffs[0] = diffs[0] ^ 0xFFFF; - tcph->check = nf_proto_csum_update((*pskb), - oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], - tcph->check, 0); + if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) + tcph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + tcph->check^0xFFFF)); return 1; } @@ -85,7 +95,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_ECN_info *einfo = targinfo; @@ -95,7 +106,7 @@ target(struct sk_buff **pskb, if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo)) + if (!set_ect_tcp(pskb, einfo, (out == NULL))) return NF_DROP; return IPT_CONTINUE; @@ -106,6 +117,7 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_LOG.c b/trunk/net/ipv4/netfilter/ipt_LOG.c index 7dc820df8bc5..b98f7b08b084 100644 --- a/trunk/net/ipv4/netfilter/ipt_LOG.c +++ b/trunk/net/ipv4/netfilter/ipt_LOG.c @@ -416,7 +416,8 @@ ipt_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -439,6 +440,7 @@ static int ipt_log_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_log_info *loginfo = targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c b/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c index bc65168a3437..ebd94f2abf0d 100644 --- a/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/trunk/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -42,6 +42,7 @@ masquerade_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -63,7 +64,8 @@ masquerade_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_NETMAP.c b/trunk/net/ipv4/netfilter/ipt_NETMAP.c index beb2914225ff..736c4b5a86a7 100644 --- a/trunk/net/ipv4/netfilter/ipt_NETMAP.c +++ b/trunk/net/ipv4/netfilter/ipt_NETMAP.c @@ -33,6 +33,7 @@ check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -54,7 +55,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_REDIRECT.c b/trunk/net/ipv4/netfilter/ipt_REDIRECT.c index f03d43671c6d..f290463232de 100644 --- a/trunk/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/trunk/net/ipv4/netfilter/ipt_REDIRECT.c @@ -36,6 +36,7 @@ redirect_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ip_nat_multi_range_compat *mr = targinfo; @@ -57,7 +58,8 @@ redirect_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_REJECT.c b/trunk/net/ipv4/netfilter/ipt_REJECT.c index b81821edd893..269bc2067cb8 100644 --- a/trunk/net/ipv4/netfilter/ipt_REJECT.c +++ b/trunk/net/ipv4/netfilter/ipt_REJECT.c @@ -90,7 +90,6 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; - security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); @@ -185,7 +184,6 @@ static void send_reset(struct sk_buff *oldskb, int hook) tcph->urg_ptr = 0; /* Adjust TCP checksum */ - nskb->ip_summed = CHECKSUM_NONE; tcph->check = 0; tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), nskb->nh.iph->saddr, @@ -228,7 +226,8 @@ static unsigned int reject(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_reject_info *reject = targinfo; @@ -276,6 +275,7 @@ static int check(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_reject_info *rejinfo = targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_SAME.c b/trunk/net/ipv4/netfilter/ipt_SAME.c index efbcb1198832..7169b09b5a67 100644 --- a/trunk/net/ipv4/netfilter/ipt_SAME.c +++ b/trunk/net/ipv4/netfilter/ipt_SAME.c @@ -52,6 +52,7 @@ same_check(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { unsigned int count, countess, rangeip, index = 0; @@ -115,7 +116,8 @@ same_check(const char *tablename, } static void -same_destroy(const struct xt_target *target, void *targinfo) +same_destroy(const struct xt_target *target, void *targinfo, + unsigned int targinfosize) { struct ipt_same_info *mr = targinfo; @@ -131,7 +133,8 @@ same_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { struct ip_conntrack *ct; enum ip_conntrack_info ctinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_TCPMSS.c b/trunk/net/ipv4/netfilter/ipt_TCPMSS.c index 4246c4321e5b..ef2fe5b3f0d8 100644 --- a/trunk/net/ipv4/netfilter/ipt_TCPMSS.c +++ b/trunk/net/ipv4/netfilter/ipt_TCPMSS.c @@ -21,14 +21,26 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables TCP MSS modification module"); +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +static u_int16_t +cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) +{ + u_int32_t diffs[] = { oldvalinv, newval }; + return csum_fold(csum_partial((char *)diffs, sizeof(diffs), + oldcheck^0xFFFF)); +} + static inline unsigned int optlen(const u_int8_t *opt, unsigned int offset) { /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) - return 1; - else - return opt[offset+1]; + if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; + else return opt[offset+1]; } static unsigned int @@ -37,7 +49,8 @@ ipt_tcpmss_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; struct tcphdr *tcph; @@ -49,8 +62,13 @@ ipt_tcpmss_target(struct sk_buff **pskb, if (!skb_make_writable(pskb, (*pskb)->len)) return NF_DROP; + if ((*pskb)->ip_summed == CHECKSUM_HW && + skb_checksum_help(*pskb, out == NULL)) + return NF_DROP; + iph = (*pskb)->nh.iph; tcplen = (*pskb)->len - iph->ihl*4; + tcph = (void *)iph + iph->ihl*4; /* Since it passed flags test in tcp match, we know it is is @@ -66,41 +84,54 @@ ipt_tcpmss_target(struct sk_buff **pskb, return NF_DROP; } - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if (dst_mtu((*pskb)->dst) <= sizeof(struct iphdr) + - sizeof(struct tcphdr)) { + if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { + if(!(*pskb)->dst) { if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target: " - "unknown or invalid path-MTU (%d)\n", - dst_mtu((*pskb)->dst)); + printk(KERN_ERR + "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); return NF_DROP; /* or IPT_CONTINUE ?? */ } - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - - sizeof(struct tcphdr); + if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { + if (net_ratelimit()) + printk(KERN_ERR + "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); + return NF_DROP; /* or IPT_CONTINUE ?? */ + } + + newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); } else newmss = tcpmssinfo->mss; opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) { - if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS && - opt[i+1] == TCPOLEN_MSS) { + for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ + if ((opt[i] == TCPOPT_MSS) && + ((tcph->doff*4 - i) >= TCPOLEN_MSS) && + (opt[i+1] == TCPOLEN_MSS)) { u_int16_t oldmss; oldmss = (opt[i+2] << 8) | opt[i+3]; - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - oldmss <= newmss) - return IPT_CONTINUE; + if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && + (oldmss <= newmss)) + return IPT_CONTINUE; opt[i+2] = (newmss & 0xff00) >> 8; opt[i+3] = (newmss & 0x00ff); - tcph->check = nf_proto_csum_update(*pskb, - htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check, 0); - return IPT_CONTINUE; + tcph->check = cheat_check(htons(oldmss)^0xFFFF, + htons(newmss), + tcph->check); + + DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" + "->%u.%u.%u.%u:%hu changed TCP MSS option" + " (from %u to %u)\n", + NIPQUAD((*pskb)->nh.iph->saddr), + ntohs(tcph->source), + NIPQUAD((*pskb)->nh.iph->daddr), + ntohs(tcph->dest), + oldmss, newmss); + goto retmodified; } } @@ -112,8 +143,13 @@ ipt_tcpmss_target(struct sk_buff **pskb, newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) + if (!newskb) { + if (net_ratelimit()) + printk(KERN_ERR "ipt_tcpmss_target:" + " unable to allocate larger skb\n"); return NF_DROP; + } + kfree_skb(*pskb); *pskb = newskb; iph = (*pskb)->nh.iph; @@ -125,29 +161,36 @@ ipt_tcpmss_target(struct sk_buff **pskb, opt = (u_int8_t *)tcph + sizeof(struct tcphdr); memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - tcph->check = nf_proto_csum_update(*pskb, - htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), - tcph->check, 1); + tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, + htons(tcplen + TCPOLEN_MSS), tcph->check); + tcplen += TCPOLEN_MSS; + opt[0] = TCPOPT_MSS; opt[1] = TCPOLEN_MSS; opt[2] = (newmss & 0xff00) >> 8; opt[3] = (newmss & 0x00ff); - tcph->check = nf_proto_csum_update(*pskb, ~0, *((u_int32_t *)opt), - tcph->check, 0); + tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); oldval = ((u_int16_t *)tcph)[6]; tcph->doff += TCPOLEN_MSS/4; - tcph->check = nf_proto_csum_update(*pskb, - oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], - tcph->check, 0); + tcph->check = cheat_check(oldval ^ 0xFFFF, + ((u_int16_t *)tcph)[6], tcph->check); newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = nf_csum_update(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); + iph->check = cheat_check(iph->tot_len ^ 0xFFFF, + newtotlen, iph->check); iph->tot_len = newtotlen; + + DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" + "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", + NIPQUAD((*pskb)->nh.iph->saddr), + ntohs(tcph->source), + NIPQUAD((*pskb)->nh.iph->daddr), + ntohs(tcph->dest), + newmss); + + retmodified: return IPT_CONTINUE; } @@ -157,9 +200,9 @@ static inline int find_syn_match(const struct ipt_entry_match *m) { const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - if (strcmp(m->u.kernel.match->name, "tcp") == 0 && - tcpinfo->flg_cmp & TH_SYN && - !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) + if (strcmp(m->u.kernel.match->name, "tcp") == 0 + && (tcpinfo->flg_cmp & TH_SYN) + && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) return 1; return 0; @@ -171,17 +214,17 @@ ipt_tcpmss_checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ipt_tcpmss_info *tcpmssinfo = targinfo; const struct ipt_entry *e = e_void; - if (tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU && - (hook_mask & ~((1 << NF_IP_FORWARD) | - (1 << NF_IP_LOCAL_OUT) | - (1 << NF_IP_POST_ROUTING))) != 0) { - printk("TCPMSS: path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && + ((hook_mask & ~((1 << NF_IP_FORWARD) + | (1 << NF_IP_LOCAL_OUT) + | (1 << NF_IP_POST_ROUTING))) != 0)) { + printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return 0; } diff --git a/trunk/net/ipv4/netfilter/ipt_TOS.c b/trunk/net/ipv4/netfilter/ipt_TOS.c index 471a4c438b0a..1c7a5ca399b3 100644 --- a/trunk/net/ipv4/netfilter/ipt_TOS.c +++ b/trunk/net/ipv4/netfilter/ipt_TOS.c @@ -26,20 +26,27 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ipt_tos_target_info *tosinfo = targinfo; - struct iphdr *iph = (*pskb)->nh.iph; - u_int16_t oldtos; - if ((iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { + if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { + u_int16_t diffs[2]; + if (!skb_make_writable(pskb, sizeof(struct iphdr))) return NF_DROP; - iph = (*pskb)->nh.iph; - oldtos = iph->tos; - iph->tos = (iph->tos & IPTOS_PREC_MASK) | tosinfo->tos; - iph->check = nf_csum_update(oldtos ^ 0xFFFF, iph->tos, - iph->check); + + diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; + (*pskb)->nh.iph->tos + = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) + | tosinfo->tos; + diffs[1] = htons((*pskb)->nh.iph->tos); + (*pskb)->nh.iph->check + = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + (*pskb)->nh.iph->check + ^0xFFFF)); } return IPT_CONTINUE; } @@ -49,6 +56,7 @@ checkentry(const char *tablename, const void *e_void, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; diff --git a/trunk/net/ipv4/netfilter/ipt_TTL.c b/trunk/net/ipv4/netfilter/ipt_TTL.c index 96e79cc6d0f2..f48892ae0be5 100644 --- a/trunk/net/ipv4/netfilter/ipt_TTL.c +++ b/trunk/net/ipv4/netfilter/ipt_TTL.c @@ -23,10 +23,11 @@ static unsigned int ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { struct iphdr *iph; const struct ipt_TTL_info *info = targinfo; + u_int16_t diffs[2]; int new_ttl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -54,10 +55,12 @@ ipt_ttl_target(struct sk_buff **pskb, } if (new_ttl != iph->ttl) { - iph->check = nf_csum_update(ntohs((iph->ttl << 8)) ^ 0xFFFF, - ntohs(new_ttl << 8), - iph->check); + diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; iph->ttl = new_ttl; + diffs[1] = htons(((unsigned)iph->ttl) << 8); + iph->check = csum_fold(csum_partial((char *)diffs, + sizeof(diffs), + iph->check^0xFFFF)); } return IPT_CONTINUE; @@ -67,6 +70,7 @@ static int ipt_ttl_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct ipt_TTL_info *info = targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_ULOG.c b/trunk/net/ipv4/netfilter/ipt_ULOG.c index 2b104ea54f48..d46fd677fa11 100644 --- a/trunk/net/ipv4/netfilter/ipt_ULOG.c +++ b/trunk/net/ipv4/netfilter/ipt_ULOG.c @@ -308,7 +308,7 @@ static unsigned int ipt_ulog_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; @@ -346,6 +346,7 @@ static int ipt_ulog_checkentry(const char *tablename, const void *e, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hookmask) { struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_ah.c b/trunk/net/ipv4/netfilter/ipt_ah.c index 1798f86bc534..2927135873d7 100644 --- a/trunk/net/ipv4/netfilter/ipt_ah.c +++ b/trunk/net/ipv4/netfilter/ipt_ah.c @@ -74,6 +74,7 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct ipt_ah *ahinfo = matchinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_dscp.c b/trunk/net/ipv4/netfilter/ipt_dscp.c new file mode 100644 index 000000000000..47177591aeb6 --- /dev/null +++ b/trunk/net/ipv4/netfilter/ipt_dscp.c @@ -0,0 +1,54 @@ +/* IP tables module for matching the value of the IPv4 DSCP field + * + * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp + * + * (C) 2002 by Harald Welte + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include + +#include +#include + +MODULE_AUTHOR("Harald Welte "); +MODULE_DESCRIPTION("iptables DSCP matching module"); +MODULE_LICENSE("GPL"); + +static int match(const struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + const struct xt_match *match, const void *matchinfo, + int offset, unsigned int protoff, int *hotdrop) +{ + const struct ipt_dscp_info *info = matchinfo; + const struct iphdr *iph = skb->nh.iph; + + u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); + + return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; +} + +static struct ipt_match dscp_match = { + .name = "dscp", + .match = match, + .matchsize = sizeof(struct ipt_dscp_info), + .me = THIS_MODULE, +}; + +static int __init ipt_dscp_init(void) +{ + return ipt_register_match(&dscp_match); +} + +static void __exit ipt_dscp_fini(void) +{ + ipt_unregister_match(&dscp_match); + +} + +module_init(ipt_dscp_init); +module_exit(ipt_dscp_fini); diff --git a/trunk/net/ipv4/netfilter/ipt_ecn.c b/trunk/net/ipv4/netfilter/ipt_ecn.c index dafbdec0efc0..b28250414933 100644 --- a/trunk/net/ipv4/netfilter/ipt_ecn.c +++ b/trunk/net/ipv4/netfilter/ipt_ecn.c @@ -88,7 +88,8 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) { const struct ipt_ecn_info *info = matchinfo; const struct ipt_ip *ip = ip_void; diff --git a/trunk/net/ipv4/netfilter/ipt_hashlimit.c b/trunk/net/ipv4/netfilter/ipt_hashlimit.c index 4f73a61aa3dd..3bd2368e1fc9 100644 --- a/trunk/net/ipv4/netfilter/ipt_hashlimit.c +++ b/trunk/net/ipv4/netfilter/ipt_hashlimit.c @@ -478,6 +478,7 @@ hashlimit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { struct ipt_hashlimit_info *r = matchinfo; @@ -528,46 +529,18 @@ hashlimit_checkentry(const char *tablename, } static void -hashlimit_destroy(const struct xt_match *match, void *matchinfo) +hashlimit_destroy(const struct xt_match *match, void *matchinfo, + unsigned int matchsize) { struct ipt_hashlimit_info *r = matchinfo; htable_put(r->hinfo); } -#ifdef CONFIG_COMPAT -struct compat_ipt_hashlimit_info { - char name[IFNAMSIZ]; - struct hashlimit_cfg cfg; - compat_uptr_t hinfo; - compat_uptr_t master; -}; - -static void compat_from_user(void *dst, void *src) -{ - int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); - - memcpy(dst, src, off); - memset(dst + off, 0, sizeof(struct compat_ipt_hashlimit_info) - off); -} - -static int compat_to_user(void __user *dst, void *src) -{ - int off = offsetof(struct compat_ipt_hashlimit_info, hinfo); - - return copy_to_user(dst, src, off) ? -EFAULT : 0; -} -#endif - static struct ipt_match ipt_hashlimit = { .name = "hashlimit", .match = hashlimit_match, .matchsize = sizeof(struct ipt_hashlimit_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_ipt_hashlimit_info), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, -#endif .checkentry = hashlimit_checkentry, .destroy = hashlimit_destroy, .me = THIS_MODULE diff --git a/trunk/net/ipv4/netfilter/ipt_owner.c b/trunk/net/ipv4/netfilter/ipt_owner.c index 78c336f12a9e..5ac6ac023b5e 100644 --- a/trunk/net/ipv4/netfilter/ipt_owner.c +++ b/trunk/net/ipv4/netfilter/ipt_owner.c @@ -56,6 +56,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ipt_owner_info *info = matchinfo; diff --git a/trunk/net/ipv4/netfilter/ipt_recent.c b/trunk/net/ipv4/netfilter/ipt_recent.c index 32ae8d7ac506..61a2139f9cfd 100644 --- a/trunk/net/ipv4/netfilter/ipt_recent.c +++ b/trunk/net/ipv4/netfilter/ipt_recent.c @@ -35,20 +35,14 @@ static unsigned int ip_list_tot = 100; static unsigned int ip_pkt_list_tot = 20; static unsigned int ip_list_hash_size = 0; static unsigned int ip_list_perms = 0644; -static unsigned int ip_list_uid = 0; -static unsigned int ip_list_gid = 0; module_param(ip_list_tot, uint, 0400); module_param(ip_pkt_list_tot, uint, 0400); module_param(ip_list_hash_size, uint, 0400); module_param(ip_list_perms, uint, 0400); -module_param(ip_list_uid, uint, 0400); -module_param(ip_list_gid, uint, 0400); MODULE_PARM_DESC(ip_list_tot, "number of IPs to remember per list"); MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP to remember (max. 255)"); MODULE_PARM_DESC(ip_list_hash_size, "size of hash table used to look up IPs"); MODULE_PARM_DESC(ip_list_perms, "permissions on /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_uid,"owner of /proc/net/ipt_recent/* files"); -MODULE_PARM_DESC(ip_list_gid,"owning group of /proc/net/ipt_recent/* files"); struct recent_entry { @@ -238,7 +232,7 @@ ipt_recent_match(const struct sk_buff *skb, static int ipt_recent_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) + unsigned int matchsize, unsigned int hook_mask) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; @@ -280,8 +274,6 @@ ipt_recent_checkentry(const char *tablename, const void *ip, goto out; } t->proc->proc_fops = &recent_fops; - t->proc->uid = ip_list_uid; - t->proc->gid = ip_list_gid; t->proc->data = t; #endif spin_lock_bh(&recent_lock); @@ -294,7 +286,8 @@ ipt_recent_checkentry(const char *tablename, const void *ip, } static void -ipt_recent_destroy(const struct xt_match *match, void *matchinfo) +ipt_recent_destroy(const struct xt_match *match, void *matchinfo, + unsigned int matchsize) { const struct ipt_recent_info *info = matchinfo; struct recent_table *t; diff --git a/trunk/net/ipv4/netfilter/iptable_filter.c b/trunk/net/ipv4/netfilter/iptable_filter.c index e2e7dd8d7903..7f417484bfbf 100644 --- a/trunk/net/ipv4/netfilter/iptable_filter.c +++ b/trunk/net/ipv4/netfilter/iptable_filter.c @@ -90,7 +90,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); } static unsigned int @@ -108,7 +108,7 @@ ipt_local_out_hook(unsigned int hook, return NF_ACCEPT; } - return ipt_do_table(pskb, hook, in, out, &packet_filter); + return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); } static struct nf_hook_ops ipt_ops[] = { diff --git a/trunk/net/ipv4/netfilter/iptable_mangle.c b/trunk/net/ipv4/netfilter/iptable_mangle.c index 79336cb42527..4e7998beda63 100644 --- a/trunk/net/ipv4/netfilter/iptable_mangle.c +++ b/trunk/net/ipv4/netfilter/iptable_mangle.c @@ -119,7 +119,7 @@ ipt_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_mangler); + return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); } static unsigned int @@ -148,7 +148,7 @@ ipt_local_hook(unsigned int hook, daddr = (*pskb)->nh.iph->daddr; tos = (*pskb)->nh.iph->tos; - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler); + ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); /* Reroute for ANY change. */ if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE && ((*pskb)->nh.iph->saddr != saddr diff --git a/trunk/net/ipv4/netfilter/iptable_raw.c b/trunk/net/ipv4/netfilter/iptable_raw.c index bcbeb4aeacd9..7912cce1e1b8 100644 --- a/trunk/net/ipv4/netfilter/iptable_raw.c +++ b/trunk/net/ipv4/netfilter/iptable_raw.c @@ -95,7 +95,7 @@ ipt_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_raw); + return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); } /* 'raw' is the very first table. */ diff --git a/trunk/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/trunk/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 790f00d500c3..663a73ee3f2f 100644 --- a/trunk/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/trunk/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -25,7 +25,7 @@ #include #include -unsigned long nf_ct_icmp_timeout __read_mostly = 30*HZ; +unsigned long nf_ct_icmp_timeout = 30*HZ; #if 0 #define DEBUGP printk diff --git a/trunk/net/ipv4/proc.c b/trunk/net/ipv4/proc.c index 9c6cbe3d9fb8..d61e2a9d394d 100644 --- a/trunk/net/ipv4/proc.c +++ b/trunk/net/ipv4/proc.c @@ -173,8 +173,6 @@ static const struct snmp_mib snmp4_udp_list[] = { SNMP_MIB_ITEM("NoPorts", UDP_MIB_NOPORTS), SNMP_MIB_ITEM("InErrors", UDP_MIB_INERRORS), SNMP_MIB_ITEM("OutDatagrams", UDP_MIB_OUTDATAGRAMS), - SNMP_MIB_ITEM("RcvbufErrors", UDP_MIB_RCVBUFERRORS), - SNMP_MIB_ITEM("SndbufErrors", UDP_MIB_SNDBUFERRORS), SNMP_MIB_SENTINEL }; diff --git a/trunk/net/ipv4/raw.c b/trunk/net/ipv4/raw.c index 0e935b4c8741..62b2762a2420 100644 --- a/trunk/net/ipv4/raw.c +++ b/trunk/net/ipv4/raw.c @@ -38,7 +38,8 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ - + +#include #include #include #include @@ -483,7 +484,6 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); - security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) diff --git a/trunk/net/ipv4/route.c b/trunk/net/ipv4/route.c index 20ffe8e88c0f..b873cbcdd0b8 100644 --- a/trunk/net/ipv4/route.c +++ b/trunk/net/ipv4/route.c @@ -2639,54 +2639,51 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, { struct rtable *rt = (struct rtable*)skb->dst; struct rtmsg *r; - struct nlmsghdr *nlh; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; struct rta_cacheinfo ci; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); - if (nlh == NULL) - return -ENOBUFS; - - r = nlmsg_data(nlh); +#ifdef CONFIG_IP_MROUTE + struct rtattr *eptr; +#endif + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); + r = NLMSG_DATA(nlh); r->rtm_family = AF_INET; r->rtm_dst_len = 32; r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; - NLA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; if (rt->rt_flags & RTCF_NOTIFY) r->rtm_flags |= RTM_F_NOTIFY; - - NLA_PUT_U32(skb, RTA_DST, rt->rt_dst); - + RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst); if (rt->fl.fl4_src) { r->rtm_src_len = 32; - NLA_PUT_U32(skb, RTA_SRC, rt->fl.fl4_src); + RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src); } if (rt->u.dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->u.dst.dev->ifindex); + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); #ifdef CONFIG_NET_CLS_ROUTE if (rt->u.dst.tclassid) - NLA_PUT_U32(skb, RTA_FLOW, rt->u.dst.tclassid); + RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid); #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rt->rt_multipath_alg != IP_MP_ALG_NONE) - NLA_PUT_U32(skb, RTA_MP_ALGO, rt->rt_multipath_alg); + if (rt->rt_multipath_alg != IP_MP_ALG_NONE) { + __u32 alg = rt->rt_multipath_alg; + + RTA_PUT(skb, RTA_MP_ALGO, 4, &alg); + } #endif if (rt->fl.iif) - NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_spec_dst); + RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst); else if (rt->rt_src != rt->fl.fl4_src) - NLA_PUT_U32(skb, RTA_PREFSRC, rt->rt_src); - + RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src); if (rt->rt_dst != rt->rt_gateway) - NLA_PUT_U32(skb, RTA_GATEWAY, rt->rt_gateway); - + RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway); if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto nla_put_failure; - + goto rtattr_failure; ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); ci.rta_used = rt->u.dst.__use; ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); @@ -2703,7 +2700,10 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, ci.rta_tsage = xtime.tv_sec - rt->peer->tcp_ts_stamp; } } - +#ifdef CONFIG_IP_MROUTE + eptr = (struct rtattr*)skb->tail; +#endif + RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); if (rt->fl.iif) { #ifdef CONFIG_IP_MROUTE u32 dst = rt->rt_dst; @@ -2715,46 +2715,41 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, if (!nowait) { if (err == 0) return 0; - goto nla_put_failure; + goto nlmsg_failure; } else { if (err == -EMSGSIZE) - goto nla_put_failure; - ci.rta_error = err; + goto nlmsg_failure; + ((struct rta_cacheinfo*)RTA_DATA(eptr))->rta_error = err; } } } else #endif - NLA_PUT_U32(skb, RTA_IIF, rt->fl.iif); + RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); } - NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); - - return nlmsg_end(skb, nlh); + nlh->nlmsg_len = skb->tail - b; + return skb->len; -nla_put_failure: - return nlmsg_cancel(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *rtm; - struct nlattr *tb[RTA_MAX+1]; + struct rtattr **rta = arg; + struct rtmsg *rtm = NLMSG_DATA(nlh); struct rtable *rt = NULL; - u32 dst, src, iif; - int err; + u32 dst = 0; + u32 src = 0; + int iif = 0; + int err = -ENOBUFS; struct sk_buff *skb; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); - if (err < 0) - goto errout; - - rtm = nlmsg_data(nlh); - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto errout; - } + if (!skb) + goto out; /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2765,61 +2760,62 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0; - dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0; - iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; + if (rta[RTA_SRC - 1]) + memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); + if (rta[RTA_DST - 1]) + memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); + if (rta[RTA_IIF - 1]) + memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); if (iif) { - struct net_device *dev; - - dev = __dev_get_by_index(iif); - if (dev == NULL) { - err = -ENODEV; - goto errout_free; - } - + struct net_device *dev = __dev_get_by_index(iif); + err = -ENODEV; + if (!dev) + goto out_free; skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - - rt = (struct rtable*) skb->dst; - if (err == 0 && rt->u.dst.error) + rt = (struct rtable*)skb->dst; + if (!err && rt->u.dst.error) err = -rt->u.dst.error; } else { - struct flowi fl = { - .nl_u = { - .ip4_u = { - .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos, - }, - }, - .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, - }; + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, + .saddr = src, + .tos = rtm->rtm_tos } } }; + int oif = 0; + if (rta[RTA_OIF - 1]) + memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); + fl.oif = oif; err = ip_route_output_key(&rt, &fl); } - if (err) - goto errout_free; + goto out_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (err <= 0) - goto errout_free; + if (!err) + goto out_free; + if (err < 0) { + err = -EMSGSIZE; + goto out_free; + } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -errout: - return err; + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: return err; -errout_free: +out_free: kfree_skb(skb); - goto errout; + goto out; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -3147,9 +3143,13 @@ int __init ip_rt_init(void) } #endif - ipv4_dst_ops.kmem_cachep = - kmem_cache_create("ip_dst_cache", sizeof(struct rtable), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + ipv4_dst_ops.kmem_cachep = kmem_cache_create("ip_dst_cache", + sizeof(struct rtable), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + + if (!ipv4_dst_ops.kmem_cachep) + panic("IP: failed to allocate ip_dst_cache\n"); rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", diff --git a/trunk/net/ipv4/syncookies.c b/trunk/net/ipv4/syncookies.c index 661e0a4bca72..e20be3331f67 100644 --- a/trunk/net/ipv4/syncookies.c +++ b/trunk/net/ipv4/syncookies.c @@ -214,10 +214,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; - if (security_inet_conn_request(sk, skb, req)) { - reqsk_free(req); - goto out; - } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -263,7 +259,6 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; - security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/trunk/net/ipv4/sysctl_net_ipv4.c b/trunk/net/ipv4/sysctl_net_ipv4.c index 19b2071ff319..70cea9d08a38 100644 --- a/trunk/net/ipv4/sysctl_net_ipv4.c +++ b/trunk/net/ipv4/sysctl_net_ipv4.c @@ -17,7 +17,6 @@ #include #include #include -#include /* From af_inet.c */ extern int sysctl_ip_nonlocal_bind; @@ -698,40 +697,6 @@ ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = &proc_dointvec }, -#ifdef CONFIG_NETLABEL - { - .ctl_name = NET_CIPSOV4_CACHE_ENABLE, - .procname = "cipso_cache_enable", - .data = &cipso_v4_cache_enabled, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_CIPSOV4_CACHE_BUCKET_SIZE, - .procname = "cipso_cache_bucket_size", - .data = &cipso_v4_cache_bucketsize, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_CIPSOV4_RBM_OPTFMT, - .procname = "cipso_rbm_optfmt", - .data = &cipso_v4_rbm_optfmt, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_CIPSOV4_RBM_STRICTVALID, - .procname = "cipso_rbm_strictvalid", - .data = &cipso_v4_rbm_strictvalid, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, -#endif /* CONFIG_NETLABEL */ { .ctl_name = 0 } }; diff --git a/trunk/net/ipv4/tcp.c b/trunk/net/ipv4/tcp.c index 66e9a729f6df..934396bb1376 100644 --- a/trunk/net/ipv4/tcp.c +++ b/trunk/net/ipv4/tcp.c @@ -268,7 +268,7 @@ #include #include -int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT; +int sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; DEFINE_SNMP_STAT(struct tcp_mib, tcp_statistics) __read_mostly; @@ -568,7 +568,7 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse skb->truesize += copy; sk->sk_wmem_queued += copy; sk->sk_forward_alloc -= copy; - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; tp->write_seq += copy; TCP_SKB_CB(skb)->end_seq += copy; skb_shinfo(skb)->gso_segs = 0; @@ -723,7 +723,7 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, * Check whether we can use HW checksum. */ if (sk->sk_route_caps & NETIF_F_ALL_CSUM) - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; skb_entail(sk, tp, skb); copy = size_goal; @@ -955,11 +955,8 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && - ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || - ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong)) && - !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } @@ -2208,7 +2205,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) th->fin = th->psh = 0; th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_HW) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2222,7 +2219,7 @@ struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features) delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len); th->check = ~csum_fold(th->check + delta); - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_HW) th->check = csum_fold(csum_partial(skb->h.raw, thlen, skb->csum)); @@ -2257,7 +2254,9 @@ void __init tcp_init(void) tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!tcp_hashinfo.bind_bucket_cachep) + panic("tcp_init: Cannot alloc tcp_bind_bucket cache."); /* Size and allocate the main established and bind bucket * hash tables. diff --git a/trunk/net/ipv4/tcp_bic.c b/trunk/net/ipv4/tcp_bic.c index 5730333cd0ac..b0134ab08379 100644 --- a/trunk/net/ipv4/tcp_bic.c +++ b/trunk/net/ipv4/tcp_bic.c @@ -231,7 +231,7 @@ static struct tcp_congestion_ops bictcp = { static int __init bictcp_register(void) { - BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&bictcp); } diff --git a/trunk/net/ipv4/tcp_cubic.c b/trunk/net/ipv4/tcp_cubic.c index a60ef38d75c6..2be27980ca78 100644 --- a/trunk/net/ipv4/tcp_cubic.c +++ b/trunk/net/ipv4/tcp_cubic.c @@ -358,7 +358,7 @@ static struct tcp_congestion_ops cubictcp = { static int __init cubictcp_register(void) { - BUILD_BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); /* Precompute a bunch of the scaling factors that are used per-packet * based on SRTT of 100ms diff --git a/trunk/net/ipv4/tcp_highspeed.c b/trunk/net/ipv4/tcp_highspeed.c index c4fc811bf377..fa3e1aad660c 100644 --- a/trunk/net/ipv4/tcp_highspeed.c +++ b/trunk/net/ipv4/tcp_highspeed.c @@ -189,7 +189,7 @@ static struct tcp_congestion_ops tcp_highspeed = { static int __init hstcp_register(void) { - BUILD_BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hstcp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_highspeed); } diff --git a/trunk/net/ipv4/tcp_htcp.c b/trunk/net/ipv4/tcp_htcp.c index 682e7d5b6f2f..6edfe5e4510e 100644 --- a/trunk/net/ipv4/tcp_htcp.c +++ b/trunk/net/ipv4/tcp_htcp.c @@ -286,7 +286,7 @@ static struct tcp_congestion_ops htcp = { static int __init htcp_register(void) { - BUILD_BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct htcp) > ICSK_CA_PRIV_SIZE); BUILD_BUG_ON(BETA_MIN >= BETA_MAX); return tcp_register_congestion_control(&htcp); } diff --git a/trunk/net/ipv4/tcp_hybla.c b/trunk/net/ipv4/tcp_hybla.c index 59e691d26f64..7406e0c5fb8e 100644 --- a/trunk/net/ipv4/tcp_hybla.c +++ b/trunk/net/ipv4/tcp_hybla.c @@ -170,7 +170,7 @@ static struct tcp_congestion_ops tcp_hybla = { static int __init hybla_register(void) { - BUILD_BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct hybla) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_hybla); } diff --git a/trunk/net/ipv4/tcp_input.c b/trunk/net/ipv4/tcp_input.c index b3def0df14fb..159fa3f1ba67 100644 --- a/trunk/net/ipv4/tcp_input.c +++ b/trunk/net/ipv4/tcp_input.c @@ -72,24 +72,24 @@ #include #include -int sysctl_tcp_timestamps __read_mostly = 1; -int sysctl_tcp_window_scaling __read_mostly = 1; -int sysctl_tcp_sack __read_mostly = 1; -int sysctl_tcp_fack __read_mostly = 1; -int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH; -int sysctl_tcp_ecn __read_mostly; -int sysctl_tcp_dsack __read_mostly = 1; -int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 2; - -int sysctl_tcp_stdurg __read_mostly; -int sysctl_tcp_rfc1337 __read_mostly; -int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly; -int sysctl_tcp_nometrics_save __read_mostly; - -int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_abc __read_mostly; +int sysctl_tcp_timestamps = 1; +int sysctl_tcp_window_scaling = 1; +int sysctl_tcp_sack = 1; +int sysctl_tcp_fack = 1; +int sysctl_tcp_reordering = TCP_FASTRETRANS_THRESH; +int sysctl_tcp_ecn; +int sysctl_tcp_dsack = 1; +int sysctl_tcp_app_win = 31; +int sysctl_tcp_adv_win_scale = 2; + +int sysctl_tcp_stdurg; +int sysctl_tcp_rfc1337; +int sysctl_tcp_max_orphans = NR_FILE; +int sysctl_tcp_frto; +int sysctl_tcp_nometrics_save; + +int sysctl_tcp_moderate_rcvbuf = 1; +int sysctl_tcp_abc; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -127,7 +127,7 @@ static void tcp_measure_rcv_mss(struct sock *sk, /* skb->len may jitter because of SACKs, even if peer * sends good full-sized frames. */ - len = skb_shinfo(skb)->gso_size ?: skb->len; + len = skb->len; if (len >= icsk->icsk_ack.rcv_mss) { icsk->icsk_ack.rcv_mss = len; } else { @@ -156,8 +156,6 @@ static void tcp_measure_rcv_mss(struct sock *sk, return; } } - if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) - icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } diff --git a/trunk/net/ipv4/tcp_ipv4.c b/trunk/net/ipv4/tcp_ipv4.c index 39b179856082..4b04c3edd4a9 100644 --- a/trunk/net/ipv4/tcp_ipv4.c +++ b/trunk/net/ipv4/tcp_ipv4.c @@ -78,8 +78,8 @@ #include #include -int sysctl_tcp_tw_reuse __read_mostly; -int sysctl_tcp_low_latency __read_mostly; +int sysctl_tcp_tw_reuse; +int sysctl_tcp_low_latency; /* Check TCP sequence numbers in ICMP packets. */ #define ICMP_MIN_LENGTH 8 @@ -484,7 +484,7 @@ void tcp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { th->check = ~tcp_v4_check(th, len, inet->saddr, inet->daddr, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -509,7 +509,7 @@ int tcp_v4_gso_send_check(struct sk_buff *skb) th->check = 0; th->check = ~tcp_v4_check(th, skb->len, iph->saddr, iph->daddr, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; return 0; } @@ -798,9 +798,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); - if (security_inet_conn_request(sk, skb, req)) - goto drop_and_free; - ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; @@ -951,9 +948,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, - th->source, skb->nh.iph->daddr, - th->dest, inet_iif(skb)); + nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + ntohs(th->dest), inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -973,7 +970,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) static int tcp_v4_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (skb->ip_summed == CHECKSUM_HW) { if (!tcp_v4_check(skb->h.th, skb->len, skb->nh.iph->saddr, skb->nh.iph->daddr, skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1090,7 +1087,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, th->dest, + skb->nh.iph->daddr, ntohs(th->dest), inet_iif(skb)); if (!sk) @@ -1104,7 +1101,7 @@ int tcp_v4_rcv(struct sk_buff *skb) goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 0)) goto discard_and_relse; skb->dev = NULL; @@ -1168,7 +1165,7 @@ int tcp_v4_rcv(struct sk_buff *skb) case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, - th->dest, + ntohs(th->dest), inet_iif(skb)); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, diff --git a/trunk/net/ipv4/tcp_lp.c b/trunk/net/ipv4/tcp_lp.c index 308fb7e071c5..48f28d617ce6 100644 --- a/trunk/net/ipv4/tcp_lp.c +++ b/trunk/net/ipv4/tcp_lp.c @@ -35,6 +35,7 @@ * Version: $Id: tcp_lp.c,v 1.24 2006/09/05 20:22:53 hswong3i Exp $ */ +#include #include #include @@ -327,7 +328,7 @@ static struct tcp_congestion_ops tcp_lp = { static int __init tcp_lp_register(void) { - BUILD_BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct lp) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_lp); } diff --git a/trunk/net/ipv4/tcp_minisocks.c b/trunk/net/ipv4/tcp_minisocks.c index 0163d9826907..624e2b2c7f53 100644 --- a/trunk/net/ipv4/tcp_minisocks.c +++ b/trunk/net/ipv4/tcp_minisocks.c @@ -34,8 +34,8 @@ #define SYNC_INIT 1 #endif -int sysctl_tcp_syncookies __read_mostly = SYNC_INIT; -int sysctl_tcp_abort_on_overflow __read_mostly; +int sysctl_tcp_syncookies = SYNC_INIT; +int sysctl_tcp_abort_on_overflow; struct inet_timewait_death_row tcp_death_row = { .sysctl_max_tw_buckets = NR_FILE * 2, diff --git a/trunk/net/ipv4/tcp_output.c b/trunk/net/ipv4/tcp_output.c index 061edfae0c29..b4f3ffe1b3b4 100644 --- a/trunk/net/ipv4/tcp_output.c +++ b/trunk/net/ipv4/tcp_output.c @@ -43,24 +43,24 @@ #include /* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse __read_mostly = 1; +int sysctl_tcp_retrans_collapse = 1; /* People can turn this on to work with those rare, broken TCPs that * interpret the window field as a signed quantity. */ -int sysctl_tcp_workaround_signed_windows __read_mostly = 0; +int sysctl_tcp_workaround_signed_windows = 0; /* This limits the percentage of the congestion window which we * will allow a single TSO frame to consume. Building TSO frames * which are too large can cause TCP streams to be bursty. */ -int sysctl_tcp_tso_win_divisor __read_mostly = 3; +int sysctl_tcp_tso_win_divisor = 3; -int sysctl_tcp_mtu_probing __read_mostly = 0; -int sysctl_tcp_base_mss __read_mostly = 512; +int sysctl_tcp_mtu_probing = 0; +int sysctl_tcp_base_mss = 512; /* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +int sysctl_tcp_slow_start_after_idle = 1; static void update_send_head(struct sock *sk, struct tcp_sock *tp, struct sk_buff *skb) @@ -577,7 +577,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss TCP_SKB_CB(buff)->sacked = TCP_SKB_CB(skb)->sacked; TCP_SKB_CB(skb)->sacked &= ~TCPCB_AT_TAIL; - if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_PARTIAL) { + if (!skb_shinfo(skb)->nr_frags && skb->ip_summed != CHECKSUM_HW) { /* Copy and checksum data tail into the new buffer. */ buff->csum = csum_partial_copy_nocheck(skb->data + len, skb_put(buff, nsize), nsize, 0); @@ -586,7 +586,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, unsigned int mss skb->csum = csum_block_sub(skb->csum, buff->csum, len); } else { - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; skb_split(skb, buff, len); } @@ -689,7 +689,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) __pskb_trim_head(skb, len - skb_headlen(skb)); TCP_SKB_CB(skb)->seq += len; - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; skb->truesize -= len; sk->sk_wmem_queued -= len; @@ -1062,7 +1062,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* This packet was never sent out yet, so no SACK bits. */ TCP_SKB_CB(buff)->sacked = 0; - buff->ip_summed = skb->ip_summed = CHECKSUM_PARTIAL; + buff->ip_summed = skb->ip_summed = CHECKSUM_HW; skb_split(skb, buff, len); /* Fix up tso_factor for both original and new SKB. */ @@ -1206,7 +1206,8 @@ static int tcp_mtu_probe(struct sock *sk) TCP_SKB_CB(nskb)->flags = TCPCB_FLAG_ACK; TCP_SKB_CB(nskb)->sacked = 0; nskb->csum = 0; - nskb->ip_summed = skb->ip_summed; + if (skb->ip_summed == CHECKSUM_HW) + nskb->ip_summed = CHECKSUM_HW; len = 0; while (len < probe_size) { @@ -1230,7 +1231,7 @@ static int tcp_mtu_probe(struct sock *sk) ~(TCPCB_FLAG_FIN|TCPCB_FLAG_PSH); if (!skb_shinfo(skb)->nr_frags) { skb_pull(skb, copy); - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_HW) skb->csum = csum_partial(skb->data, skb->len, 0); } else { __pskb_trim_head(skb, copy); @@ -1571,9 +1572,10 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *skb, int m memcpy(skb_put(skb, next_skb_size), next_skb->data, next_skb_size); - skb->ip_summed = next_skb->ip_summed; + if (next_skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_HW; - if (skb->ip_summed != CHECKSUM_PARTIAL) + if (skb->ip_summed != CHECKSUM_HW) skb->csum = csum_block_add(skb->csum, next_skb->csum, skb_size); /* Update sequence range on original skb. */ diff --git a/trunk/net/ipv4/tcp_timer.c b/trunk/net/ipv4/tcp_timer.c index fb09ade5897b..7c1bde3cd6cb 100644 --- a/trunk/net/ipv4/tcp_timer.c +++ b/trunk/net/ipv4/tcp_timer.c @@ -23,14 +23,14 @@ #include #include -int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; -int sysctl_tcp_synack_retries __read_mostly = TCP_SYNACK_RETRIES; -int sysctl_tcp_keepalive_time __read_mostly = TCP_KEEPALIVE_TIME; -int sysctl_tcp_keepalive_probes __read_mostly = TCP_KEEPALIVE_PROBES; -int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; -int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; -int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; -int sysctl_tcp_orphan_retries __read_mostly; +int sysctl_tcp_syn_retries = TCP_SYN_RETRIES; +int sysctl_tcp_synack_retries = TCP_SYNACK_RETRIES; +int sysctl_tcp_keepalive_time = TCP_KEEPALIVE_TIME; +int sysctl_tcp_keepalive_probes = TCP_KEEPALIVE_PROBES; +int sysctl_tcp_keepalive_intvl = TCP_KEEPALIVE_INTVL; +int sysctl_tcp_retries1 = TCP_RETR1; +int sysctl_tcp_retries2 = TCP_RETR2; +int sysctl_tcp_orphan_retries; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); diff --git a/trunk/net/ipv4/tcp_vegas.c b/trunk/net/ipv4/tcp_vegas.c index a3b7aa015a2f..490360b5b4bf 100644 --- a/trunk/net/ipv4/tcp_vegas.c +++ b/trunk/net/ipv4/tcp_vegas.c @@ -370,7 +370,7 @@ static struct tcp_congestion_ops tcp_vegas = { static int __init tcp_vegas_register(void) { - BUILD_BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct vegas) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_vegas); return 0; } diff --git a/trunk/net/ipv4/tcp_veno.c b/trunk/net/ipv4/tcp_veno.c index ce57bf302f6c..11b42a7135c1 100644 --- a/trunk/net/ipv4/tcp_veno.c +++ b/trunk/net/ipv4/tcp_veno.c @@ -9,6 +9,7 @@ * See http://www.ntu.edu.sg/home5/ZHOU0022/papers/CPFu03a.pdf */ +#include #include #include #include @@ -212,7 +213,7 @@ static struct tcp_congestion_ops tcp_veno = { static int __init tcp_veno_register(void) { - BUILD_BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct veno) > ICSK_CA_PRIV_SIZE); tcp_register_congestion_control(&tcp_veno); return 0; } diff --git a/trunk/net/ipv4/tcp_westwood.c b/trunk/net/ipv4/tcp_westwood.c index 4f42a86c77f3..5446312ffd2a 100644 --- a/trunk/net/ipv4/tcp_westwood.c +++ b/trunk/net/ipv4/tcp_westwood.c @@ -289,7 +289,7 @@ static struct tcp_congestion_ops tcp_westwood = { static int __init tcp_westwood_register(void) { - BUILD_BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); + BUG_ON(sizeof(struct westwood) > ICSK_CA_PRIV_SIZE); return tcp_register_congestion_control(&tcp_westwood); } diff --git a/trunk/net/ipv4/udp.c b/trunk/net/ipv4/udp.c index 77e265d7bb8f..f136cec96d95 100644 --- a/trunk/net/ipv4/udp.c +++ b/trunk/net/ipv4/udp.c @@ -118,33 +118,14 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -static int udp_port_rover; +/* Shared by v4/v6 udp. */ +int udp_port_rover; -static inline int udp_lport_inuse(u16 num) -{ - struct sock *sk; - struct hlist_node *node; - - sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) - if (inet_sk(sk)->num == num) - return 1; - return 0; -} - -/** - * udp_get_port - common port lookup for IPv4 and IPv6 - * - * @sk: socket struct in question - * @snum: port number to look up - * @saddr_comp: AF-dependent comparison of bound local IP addresses - */ -int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) +static int udp_v4_get_port(struct sock *sk, unsigned short snum) { struct hlist_node *node; - struct hlist_head *head; struct sock *sk2; - int error = 1; + struct inet_sock *inet = inet_sk(sk); write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -156,10 +137,11 @@ int udp_get_port(struct sock *sk, unsigned short snum, best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + struct hlist_head *list; int size; - head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(head)) { + list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(list)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -167,11 +149,12 @@ int udp_get_port(struct sock *sk, unsigned short snum, goto gotit; } size = 0; - sk_for_each(sk2, node, head) - if (++size < best_size_so_far) { - best_size_so_far = size; - best = result; - } + sk_for_each(sk2, node, list) + if (++size >= best_size_so_far) + goto next; + best_size_so_far = size; + best = result; + next:; } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -187,44 +170,38 @@ int udp_get_port(struct sock *sk, unsigned short snum, gotit: udp_port_rover = snum = result; } else { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_for_each(sk2, node, head) - if (inet_sk(sk2)->num == snum && - sk2 != sk && - (!sk2->sk_reuse || !sk->sk_reuse) && - (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if - || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (*saddr_cmp)(sk, sk2) ) + sk_for_each(sk2, node, + &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet2 = inet_sk(sk2); + + if (inet2->num == snum && + sk2 != sk && + !ipv6_only_sock(sk2) && + (!sk2->sk_bound_dev_if || + !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!inet2->rcv_saddr || + !inet->rcv_saddr || + inet2->rcv_saddr == inet->rcv_saddr) && + (!sk2->sk_reuse || !sk->sk_reuse)) goto fail; + } } - inet_sk(sk)->num = snum; + inet->num = snum; if (sk_unhashed(sk)) { - head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - sk_add_node(sk, head); + struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + + sk_add_node(sk, h); sock_prot_inc_use(sk->sk_prot); } - error = 0; -fail: write_unlock_bh(&udp_hash_lock); - return error; -} - -static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) -{ - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - - return ( !ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr )); -} + return 0; -static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) -{ - return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +fail: + write_unlock_bh(&udp_hash_lock); + return 1; } - static void udp_v4_hash(struct sock *sk) { BUG(); @@ -452,7 +429,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) /* * Only one fragment on the socket. */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { skb->csum = offsetof(struct udphdr, check); uh->check = ~csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, IPPROTO_UDP, 0); @@ -471,7 +448,7 @@ static int udp_push_pending_frames(struct sock *sk, struct udp_sock *up) * fragments on the socket so that all csums of sk_buffs * should be together. */ - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { int offset = (unsigned char *)uh - skb->data; skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); @@ -626,7 +603,6 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; - security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; @@ -685,16 +661,6 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); - } return err; do_confirm: @@ -1014,7 +980,6 @@ static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb) static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { struct udp_sock *up = udp_sk(sk); - int rc; /* * Charge it to the socket, dropping if the queue is full. @@ -1061,10 +1026,7 @@ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); + if (sock_queue_rcv_skb(sk,skb)<0) { UDP_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return -1; @@ -1125,7 +1087,7 @@ static void udp_checksum_init(struct sk_buff *skb, struct udphdr *uh, { if (uh->check == 0) { skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { + } else if (skb->ip_summed == CHECKSUM_HW) { if (!udp_check(uh, ulen, saddr, daddr, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -1619,7 +1581,7 @@ EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_get_port); +EXPORT_SYMBOL(udp_port_rover); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); diff --git a/trunk/net/ipv4/xfrm4_input.c b/trunk/net/ipv4/xfrm4_input.c index 040e8475f295..817ed84511a6 100644 --- a/trunk/net/ipv4/xfrm4_input.c +++ b/trunk/net/ipv4/xfrm4_input.c @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { decaps = 1; break; } diff --git a/trunk/net/ipv4/xfrm4_mode_transport.c b/trunk/net/ipv4/xfrm4_mode_transport.c index 92676b7e4034..a9e6b3dd19c9 100644 --- a/trunk/net/ipv4/xfrm4_mode_transport.c +++ b/trunk/net/ipv4/xfrm4_mode_transport.c @@ -21,8 +21,9 @@ * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm4_transport_output(struct sk_buff *skb) { + struct xfrm_state *x; struct iphdr *iph; int ihl; @@ -32,6 +33,7 @@ static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) ihl = iph->ihl * 4; skb->h.raw += ihl; + x = skb->dst->xfrm; skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); return 0; } diff --git a/trunk/net/ipv4/xfrm4_mode_tunnel.c b/trunk/net/ipv4/xfrm4_mode_tunnel.c index e23c21d31a53..13cafbe56ce3 100644 --- a/trunk/net/ipv4/xfrm4_mode_tunnel.c +++ b/trunk/net/ipv4/xfrm4_mode_tunnel.c @@ -33,9 +33,10 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm4_tunnel_output(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; int flags; diff --git a/trunk/net/ipv4/xfrm4_output.c b/trunk/net/ipv4/xfrm4_output.c index 04403fb01a58..d16f863cf687 100644 --- a/trunk/net/ipv4/xfrm4_output.c +++ b/trunk/net/ipv4/xfrm4_output.c @@ -48,13 +48,13 @@ static int xfrm4_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); if (err) goto error_nolock; } - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(x, skb); + err = x->mode->output(skb); if (err) goto error; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + } while (x && !x->props.mode); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; diff --git a/trunk/net/ipv4/xfrm4_policy.c b/trunk/net/ipv4/xfrm4_policy.c index eabcd27b1767..8f50eae47d03 100644 --- a/trunk/net/ipv4/xfrm4_policy.c +++ b/trunk/net/ipv4/xfrm4_policy.c @@ -21,25 +21,6 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return __ip_route_output_key((struct rtable**)dst, fl); } -static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) -{ - struct rtable *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip4_u = { - .daddr = daddr->a4, - }, - }, - }; - - if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { - saddr->a4 = rt->rt_src; - dst_release(&rt->u.dst); - return 0; - } - return -EHOSTUNREACH; -} - static struct dst_entry * __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -52,7 +33,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { + xfrm_bundle_ok(xdst, fl, AF_INET)) { dst_clone(dst); break; } @@ -112,11 +93,10 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; - xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { + if (xfrm[i]->props.mode) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; @@ -155,7 +135,6 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; - dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -317,7 +296,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, - .get_saddr = xfrm4_get_saddr, .find_bundle = __xfrm4_find_bundle, .bundle_create = __xfrm4_bundle_create, .decode_session = _decode_session4, diff --git a/trunk/net/ipv4/xfrm4_state.c b/trunk/net/ipv4/xfrm4_state.c index fe2034494d08..81e1751c966e 100644 --- a/trunk/net/ipv4/xfrm4_state.c +++ b/trunk/net/ipv4/xfrm4_state.c @@ -42,15 +42,99 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; + if (tmpl->mode && x->props.saddr.a4 == 0) { + struct rtable *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip4_u = { + .daddr = x->id.daddr.a4, + } + } + }; + if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, + &fl_tunnel, AF_INET)) { + x->props.saddr.a4 = rt->rt_src; + dst_release(&rt->u.dst); + } + } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET; } +static struct xfrm_state * +__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) +{ + unsigned h = __xfrm4_spi_hash(daddr, spi, proto); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { + if (x->props.family == AF_INET && + spi == x->id.spi && + daddr->a4 == x->id.daddr.a4 && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; + } + } + return NULL; +} + +static struct xfrm_state * +__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, + int create) +{ + struct xfrm_state *x, *x0; + unsigned h = __xfrm4_dst_hash(daddr); + + x0 = NULL; + + list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { + if (x->props.family == AF_INET && + daddr->a4 == x->id.daddr.a4 && + mode == x->props.mode && + proto == x->id.proto && + saddr->a4 == x->props.saddr.a4 && + reqid == x->props.reqid && + x->km.state == XFRM_STATE_ACQ && + !x->id.spi) { + x0 = x; + break; + } + } + if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { + x0->sel.daddr.a4 = daddr->a4; + x0->sel.saddr.a4 = saddr->a4; + x0->sel.prefixlen_d = 32; + x0->sel.prefixlen_s = 32; + x0->props.saddr.a4 = saddr->a4; + x0->km.state = XFRM_STATE_ACQ; + x0->id.daddr.a4 = daddr->a4; + x0->id.proto = proto; + x0->props.family = AF_INET; + x0->props.mode = mode; + x0->props.reqid = reqid; + x0->props.family = AF_INET; + x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x0); + x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x0->timer); + xfrm_state_hold(x0); + list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); + wake_up(&km_waitq); + } + if (x0) + xfrm_state_hold(x0); + return x0; +} + static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, + .state_lookup = __xfrm4_state_lookup, + .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/trunk/net/ipv4/xfrm4_tunnel.c b/trunk/net/ipv4/xfrm4_tunnel.c index f110af5b1319..f8ceaa127c83 100644 --- a/trunk/net/ipv4/xfrm4_tunnel.c +++ b/trunk/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (x->props.mode != XFRM_MODE_TUNNEL) + if (!x->props.mode) return -EINVAL; if (x->encap) diff --git a/trunk/net/ipv6/Kconfig b/trunk/net/ipv6/Kconfig index a2d211da2aba..0ba06c0c5d39 100644 --- a/trunk/net/ipv6/Kconfig +++ b/trunk/net/ipv6/Kconfig @@ -98,15 +98,6 @@ config INET6_IPCOMP If unsure, say Y. -config IPV6_MIP6 - bool "IPv6: Mobility (EXPERIMENTAL)" - depends on IPV6 && EXPERIMENTAL - select XFRM - ---help--- - Support for IPv6 Mobility described in RFC 3775. - - If unsure, say N. - config INET6_XFRM_TUNNEL tristate select INET6_TUNNEL @@ -136,13 +127,6 @@ config INET6_XFRM_MODE_TUNNEL If unsure, say Y. -config INET6_XFRM_MODE_ROUTEOPTIMIZATION - tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)" - depends on IPV6 && EXPERIMENTAL - select XFRM - ---help--- - Support for MIPv6 route optimization mode. - config IPV6_TUNNEL tristate "IPv6: IPv6-in-IPv6 tunnel" select INET6_TUNNEL @@ -152,31 +136,3 @@ config IPV6_TUNNEL If unsure, say N. -config IPV6_SUBTREES - bool "IPv6: source address based routing" - depends on IPV6 && EXPERIMENTAL - ---help--- - Enable routing by source address or prefix. - - The destination address is still the primary routing key, so mixing - normal and source prefix specific routes in the same routing table - may sometimes lead to unintended routing behavior. This can be - avoided by defining different routing tables for the normal and - source prefix specific routes. - - If unsure, say N. - -config IPV6_MULTIPLE_TABLES - bool "IPv6: Multiple Routing Tables" - depends on IPV6 && EXPERIMENTAL - select FIB_RULES - ---help--- - Support multiple routing tables. - -config IPV6_ROUTE_FWMARK - bool "IPv6: use netfilter MARK value as routing key" - depends on IPV6_MULTIPLE_TABLES && NETFILTER - ---help--- - If you say Y here, you will be able to specify different routes for - packets with different mark values (see iptables(8), MARK target). - diff --git a/trunk/net/ipv6/Makefile b/trunk/net/ipv6/Makefile index 0213c6612b58..386e0a626948 100644 --- a/trunk/net/ipv6/Makefile +++ b/trunk/net/ipv6/Makefile @@ -13,9 +13,6 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o -ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o -ipv6-$(CONFIG_IPV6_MIP6) += mip6.o - ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o @@ -25,7 +22,6 @@ obj-$(CONFIG_INET6_XFRM_TUNNEL) += xfrm6_tunnel.o obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o -obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o diff --git a/trunk/net/ipv6/addrconf.c b/trunk/net/ipv6/addrconf.c index c18676352397..c7852b38e03e 100644 --- a/trunk/net/ipv6/addrconf.c +++ b/trunk/net/ipv6/addrconf.c @@ -48,7 +48,6 @@ #include #include #include -#include #include #include #include @@ -73,7 +72,6 @@ #include #include #include -#include #include #include @@ -119,6 +117,9 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_RWLOCK(addrconf_hash_lock); +/* Protects inet6 devices */ +DEFINE_RWLOCK(addrconf_lock); + static void addrconf_verify(unsigned long); static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); @@ -143,7 +144,7 @@ static int ipv6_chk_same_addr(const struct in6_addr *addr, struct net_device *de static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -struct ipv6_devconf ipv6_devconf __read_mostly = { +struct ipv6_devconf ipv6_devconf = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -172,10 +173,9 @@ struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif - .proxy_ndp = 0, }; -static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { +static struct ipv6_devconf ipv6_devconf_dflt = { .forwarding = 0, .hop_limit = IPV6_DEFAULT_HOPLIMIT, .mtu6 = IPV6_MIN_MTU, @@ -203,7 +203,6 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_ra_rt_info_max_plen = 0, #endif #endif - .proxy_ndp = 0, }; /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */ @@ -315,12 +314,6 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, /* Nobody refers to this device, we may destroy it. */ -static void in6_dev_finish_destroy_rcu(struct rcu_head *head) -{ - struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); - kfree(idev); -} - void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; @@ -335,7 +328,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) return; } snmp6_free_dev(idev); - call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); + kfree(idev); } static struct inet6_dev * ipv6_add_dev(struct net_device *dev) @@ -411,8 +404,9 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, ndev); + write_lock_bh(&addrconf_lock); + dev->ip6_ptr = ndev; + write_unlock_bh(&addrconf_lock); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -476,7 +470,7 @@ static void addrconf_forward_change(void) read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { - rcu_read_lock(); + read_lock(&addrconf_lock); idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); @@ -484,7 +478,7 @@ static void addrconf_forward_change(void) if (changed) dev_forward_change(idev); } - rcu_read_unlock(); + read_unlock(&addrconf_lock); } read_unlock(&dev_base_lock); } @@ -545,7 +539,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, int hash; int err = 0; - rcu_read_lock_bh(); + read_lock_bh(&addrconf_lock); if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -614,7 +608,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); write_unlock(&idev->lock); out2: - rcu_read_unlock_bh(); + read_unlock_bh(&addrconf_lock); if (likely(err == 0)) atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); @@ -740,7 +734,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { rt->rt6i_expires = expires; @@ -917,7 +911,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, memset(&hiscore, 0, sizeof(hiscore)); read_lock(&dev_base_lock); - rcu_read_lock(); + read_lock(&addrconf_lock); for (dev = dev_base; dev; dev=dev->next) { struct inet6_dev *idev; @@ -1038,27 +1032,9 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - /* Rule 4: Prefer home address */ -#ifdef CONFIG_IPV6_MIP6 - if (hiscore.rule < 4) { - if (ifa_result->flags & IFA_F_HOMEADDRESS) - hiscore.attrs |= IPV6_SADDR_SCORE_HOA; - hiscore.rule++; - } - if (ifa->flags & IFA_F_HOMEADDRESS) { - score.attrs |= IPV6_SADDR_SCORE_HOA; - if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { - score.rule = 4; - goto record_it; - } - } else { - if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) - continue; - } -#else + /* Rule 4: Prefer home address -- not implemented yet */ if (hiscore.rule < 4) hiscore.rule++; -#endif /* Rule 5: Prefer outgoing interface */ if (hiscore.rule < 5) { @@ -1147,7 +1123,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, } read_unlock_bh(&idev->lock); } - rcu_read_unlock(); + read_unlock(&addrconf_lock); read_unlock(&dev_base_lock); if (!ifa_result) @@ -1171,7 +1147,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) struct inet6_dev *idev; int err = -EADDRNOTAVAIL; - rcu_read_lock(); + read_lock(&addrconf_lock); if ((idev = __in6_dev_get(dev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1185,7 +1161,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) } read_unlock_bh(&idev->lock); } - rcu_read_unlock(); + read_unlock(&addrconf_lock); return err; } @@ -1486,7 +1462,7 @@ static void ipv6_regen_rndid(unsigned long data) struct inet6_dev *idev = (struct inet6_dev *) data; unsigned long expires; - rcu_read_lock_bh(); + read_lock_bh(&addrconf_lock); write_lock_bh(&idev->lock); if (idev->dead) @@ -1510,7 +1486,7 @@ static void ipv6_regen_rndid(unsigned long data) out: write_unlock_bh(&idev->lock); - rcu_read_unlock_bh(); + read_unlock_bh(&addrconf_lock); in6_dev_put(idev); } @@ -1531,56 +1507,59 @@ static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { - struct fib6_config cfg = { - .fc_table = RT6_TABLE_PREFIX, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_expires = expires, - .fc_dst_len = plen, - .fc_flags = RTF_UP | flags, - }; + struct in6_rtmsg rtmsg; - ipv6_addr_copy(&cfg.fc_dst, pfx); + memset(&rtmsg, 0, sizeof(rtmsg)); + ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx); + rtmsg.rtmsg_dst_len = plen; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_info = expires; + rtmsg.rtmsg_flags = RTF_UP|flags; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; /* Prevent useless cloning on PtP SIT. This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ - if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) - cfg.fc_flags |= RTF_NONEXTHOP; + if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) + rtmsg.rtmsg_flags |= RTF_NONEXTHOP; - ip6_route_add(&cfg); + ip6_route_add(&rtmsg, NULL, NULL, NULL); } /* Create "default" multicast route to the interface */ static void addrconf_add_mroute(struct net_device *dev) { - struct fib6_config cfg = { - .fc_table = RT6_TABLE_LOCAL, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_dst_len = 8, - .fc_flags = RTF_UP, - }; - - ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); + struct in6_rtmsg rtmsg; - ip6_route_add(&cfg); + memset(&rtmsg, 0, sizeof(rtmsg)); + ipv6_addr_set(&rtmsg.rtmsg_dst, + htonl(0xFF000000), 0, 0, 0); + rtmsg.rtmsg_dst_len = 8; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + rtmsg.rtmsg_ifindex = dev->ifindex; + rtmsg.rtmsg_flags = RTF_UP; + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ip6_route_add(&rtmsg, NULL, NULL, NULL); } static void sit_route_add(struct net_device *dev) { - struct fib6_config cfg = { - .fc_table = RT6_TABLE_MAIN, - .fc_metric = IP6_RT_PRIO_ADDRCONF, - .fc_ifindex = dev->ifindex, - .fc_dst_len = 96, - .fc_flags = RTF_UP | RTF_NONEXTHOP, - }; + struct in6_rtmsg rtmsg; + + memset(&rtmsg, 0, sizeof(rtmsg)); + + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; /* prefix length - 96 bits "::d.d.d.d" */ - ip6_route_add(&cfg); + rtmsg.rtmsg_dst_len = 96; + rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; + rtmsg.rtmsg_ifindex = dev->ifindex; + + ip6_route_add(&rtmsg, NULL, NULL, NULL); } static void addrconf_add_lroute(struct net_device *dev) @@ -1681,7 +1660,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (valid_lft == 0) { - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); rt = NULL; } else { rt->rt6i_expires = jiffies + rt_expires; @@ -1891,11 +1870,12 @@ int addrconf_set_dstaddr(void __user *arg) * Manual configuration of address on an interface */ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, - __u8 ifa_flags, __u32 prefered_lft, __u32 valid_lft) + __u32 prefered_lft, __u32 valid_lft) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + __u8 ifa_flags = 0; int scope; ASSERT_RTNL(); @@ -1907,6 +1887,9 @@ static int inet6_addr_add(int ifindex, struct in6_addr *pfx, int plen, if ((dev = __dev_get_by_index(ifindex)) == NULL) return -ENODEV; + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + if ((idev = addrconf_add_dev(dev)) == NULL) return -ENOBUFS; @@ -1988,7 +1971,7 @@ int addrconf_add_ifaddr(void __user *arg) rtnl_lock(); err = inet6_addr_add(ireq.ifr6_ifindex, &ireq.ifr6_addr, ireq.ifr6_prefixlen, - IFA_F_PERMANENT, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); rtnl_unlock(); return err; } @@ -2361,10 +2344,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) Do not dev_put! */ if (how == 1) { + write_lock_bh(&addrconf_lock); + dev->ip6_ptr = NULL; idev->dead = 1; - - /* protected by rtnl_lock */ - rcu_assign_pointer(dev->ip6_ptr, NULL); + write_unlock_bh(&addrconf_lock); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -2531,8 +2514,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags) spin_lock_bh(&ifp->lock); if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - !(ifp->flags&IFA_F_TENTATIVE) || - ifp->flags & IFA_F_NODAD) { + !(ifp->flags&IFA_F_TENTATIVE)) { ifp->flags &= ~IFA_F_TENTATIVE; spin_unlock_bh(&ifp->lock); read_unlock_bh(&idev->lock); @@ -2777,26 +2759,6 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ -#ifdef CONFIG_IPV6_MIP6 -/* Check if address is a home address configured on any interface. */ -int ipv6_chk_home_addr(struct in6_addr *addr) -{ - int ret = 0; - struct inet6_ifaddr * ifp; - u8 hash = ipv6_addr_hash(addr); - read_lock_bh(&addrconf_hash_lock); - for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { - if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && - (ifp->flags & IFA_F_HOMEADDRESS)) { - ret = 1; - break; - } - } - read_unlock_bh(&addrconf_hash_lock); - return ret; -} -#endif - /* * Periodic address status verification */ @@ -2907,68 +2869,66 @@ static void addrconf_verify(unsigned long foo) spin_unlock_bh(&addrconf_verify_lock); } -static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local) -{ - struct in6_addr *pfx = NULL; - - if (addr) - pfx = nla_data(addr); - - if (local) { - if (pfx && nla_memcmp(local, pfx, sizeof(*pfx))) - pfx = NULL; - else - pfx = nla_data(local); - } - - return pfx; -} - -static struct nla_policy ifa_ipv6_policy[IFA_MAX+1] __read_mostly = { - [IFA_ADDRESS] = { .len = sizeof(struct in6_addr) }, - [IFA_LOCAL] = { .len = sizeof(struct in6_addr) }, - [IFA_CACHEINFO] = { .len = sizeof(struct ifa_cacheinfo) }, -}; - static int inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; - ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = NULL; + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_LOCAL-1]); + } if (pfx == NULL) return -EINVAL; return inet6_addr_del(ifm->ifa_index, pfx, ifm->ifa_prefixlen); } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, - u32 prefered_lft, u32 valid_lft) +static int +inet6_addr_modify(int ifindex, struct in6_addr *pfx, + __u32 prefered_lft, __u32 valid_lft) { + struct inet6_ifaddr *ifp = NULL; + struct net_device *dev; + int ifa_flags = 0; + + if ((dev = __dev_get_by_index(ifindex)) == NULL) + return -ENODEV; + + if (!(dev->flags&IFF_UP)) + return -ENETDOWN; + if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; + ifp = ipv6_get_ifaddr(pfx, dev, 1); + if (ifp == NULL) + return -ENOENT; + if (valid_lft == INFINITY_LIFE_TIME) - ifa_flags |= IFA_F_PERMANENT; + ifa_flags = IFA_F_PERMANENT; else if (valid_lft >= 0x7FFFFFFF/HZ) valid_lft = 0x7FFFFFFF/HZ; if (prefered_lft == 0) - ifa_flags |= IFA_F_DEPRECATED; + ifa_flags = IFA_F_DEPRECATED; else if ((prefered_lft >= 0x7FFFFFFF/HZ) && (prefered_lft != INFINITY_LIFE_TIME)) prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED|IFA_F_PERMANENT)) | ifa_flags; + ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2976,6 +2936,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, spin_unlock_bh(&ifp->lock); if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); + in6_ifa_put(ifp); addrconf_verify(0); @@ -2985,189 +2946,172 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, static int inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) { - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *pfx; - struct inet6_ifaddr *ifa; - struct net_device *dev; - u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; - u8 ifa_flags; - int err; - - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - return err; + __u32 valid_lft = INFINITY_LIFE_TIME, prefered_lft = INFINITY_LIFE_TIME; - ifm = nlmsg_data(nlh); - pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); + pfx = NULL; + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*pfx)) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_ADDRESS-1]); + } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*pfx) || + (pfx && memcmp(pfx, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*pfx)))) + return -EINVAL; + pfx = RTA_DATA(rta[IFA_LOCAL-1]); + } if (pfx == NULL) return -EINVAL; - if (tb[IFA_CACHEINFO]) { + if (rta[IFA_CACHEINFO-1]) { struct ifa_cacheinfo *ci; - - ci = nla_data(tb[IFA_CACHEINFO]); + if (RTA_PAYLOAD(rta[IFA_CACHEINFO-1]) < sizeof(*ci)) + return -EINVAL; + ci = RTA_DATA(rta[IFA_CACHEINFO-1]); valid_lft = ci->ifa_valid; - preferred_lft = ci->ifa_prefered; - } else { - preferred_lft = INFINITY_LIFE_TIME; - valid_lft = INFINITY_LIFE_TIME; + prefered_lft = ci->ifa_prefered; } - dev = __dev_get_by_index(ifm->ifa_index); - if (dev == NULL) - return -ENODEV; - - /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); - - ifa = ipv6_get_ifaddr(pfx, dev, 1); - if (ifa == NULL) { - /* - * It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. - */ - return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, - ifa_flags, preferred_lft, valid_lft); + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + int ret; + ret = inet6_addr_modify(ifm->ifa_index, pfx, + prefered_lft, valid_lft); + if (ret == 0 || !(nlh->nlmsg_flags & NLM_F_CREATE)) + return ret; } - if (nlh->nlmsg_flags & NLM_F_EXCL || - !(nlh->nlmsg_flags & NLM_F_REPLACE)) - err = -EEXIST; - else - err = inet6_addr_modify(ifa, ifa_flags, preferred_lft, valid_lft); - - in6_ifa_put(ifa); - - return err; -} - -static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, - u8 scope, int ifindex) -{ - struct ifaddrmsg *ifm; + return inet6_addr_add(ifm->ifa_index, pfx, ifm->ifa_prefixlen, + prefered_lft, valid_lft); - ifm = nlmsg_data(nlh); - ifm->ifa_family = AF_INET6; - ifm->ifa_prefixlen = prefixlen; - ifm->ifa_flags = flags; - ifm->ifa_scope = scope; - ifm->ifa_index = ifindex; } -static int put_cacheinfo(struct sk_buff *skb, unsigned long cstamp, - unsigned long tstamp, u32 preferred, u32 valid) -{ - struct ifa_cacheinfo ci; - - ci.cstamp = (u32)(TIME_DELTA(cstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.tstamp = (u32)(TIME_DELTA(tstamp, INITIAL_JIFFIES) / HZ * 100 - + TIME_DELTA(tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); - ci.ifa_prefered = preferred; - ci.ifa_valid = valid; - - return nla_put(skb, IFA_CACHEINFO, sizeof(ci), &ci); -} - -static inline int rt_scope(int ifa_scope) -{ - if (ifa_scope & IFA_HOST) - return RT_SCOPE_HOST; - else if (ifa_scope & IFA_LINK) - return RT_SCOPE_LINK; - else if (ifa_scope & IFA_SITE) - return RT_SCOPE_SITE; - else - return RT_SCOPE_UNIVERSE; -} - -static inline int inet6_ifaddr_msgsize(void) -{ - return nlmsg_total_size(sizeof(struct ifaddrmsg) + - nla_total_size(16) + - nla_total_size(sizeof(struct ifa_cacheinfo)) + - 128); -} +/* Maximum length of ifa_cacheinfo attributes */ +#define INET6_IFADDR_RTA_SPACE \ + RTA_SPACE(16) /* IFA_ADDRESS */ + \ + RTA_SPACE(sizeof(struct ifa_cacheinfo)) /* CACHEINFO */ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 pid, u32 seq, int event, unsigned int flags) { + struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - u32 preferred, valid; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -ENOBUFS; - - put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), - ifa->idev->dev->ifindex); + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = ifa->prefix_len; + ifm->ifa_flags = ifa->flags; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ifa->scope&IFA_HOST) + ifm->ifa_scope = RT_SCOPE_HOST; + else if (ifa->scope&IFA_LINK) + ifm->ifa_scope = RT_SCOPE_LINK; + else if (ifa->scope&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifa->idev->dev->ifindex; + RTA_PUT(skb, IFA_ADDRESS, 16, &ifa->addr); if (!(ifa->flags&IFA_F_PERMANENT)) { - preferred = ifa->prefered_lft; - valid = ifa->valid_lft; - if (preferred != INFINITY_LIFE_TIME) { + ci.ifa_prefered = ifa->prefered_lft; + ci.ifa_valid = ifa->valid_lft; + if (ci.ifa_prefered != INFINITY_LIFE_TIME) { long tval = (jiffies - ifa->tstamp)/HZ; - preferred -= tval; - if (valid != INFINITY_LIFE_TIME) - valid -= tval; + ci.ifa_prefered -= tval; + if (ci.ifa_valid != INFINITY_LIFE_TIME) + ci.ifa_valid -= tval; } } else { - preferred = INFINITY_LIFE_TIME; - valid = INFINITY_LIFE_TIME; - } - - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0 || - put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) - return nlmsg_cancel(skb, nlh); + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + } + ci.cstamp = (__u32)(TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(ifa->cstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + ci.tstamp = (__u32)(TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) / HZ * 100 + + TIME_DELTA(ifa->tstamp, INITIAL_JIFFIES) % HZ * 100 / HZ); + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; - return nlmsg_end(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, u32 pid, u32 seq, int event, u16 flags) { + struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifmca->idev->dev->ifindex; - - if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) - scope = RT_SCOPE_SITE; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -ENOBUFS; - - put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || - put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = 128; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ipv6_addr_scope(&ifmca->mca_addr)&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifmca->idev->dev->ifindex; + RTA_PUT(skb, IFA_MULTICAST, 16, &ifmca->mca_addr); + ci.cstamp = (__u32)(TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifmca->mca_cstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.tstamp = (__u32)(TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifmca->mca_tstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; - return nlmsg_end(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, u32 pid, u32 seq, int event, unsigned int flags) { + struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - u8 scope = RT_SCOPE_UNIVERSE; - int ifindex = ifaca->aca_idev->dev->ifindex; - - if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) - scope = RT_SCOPE_SITE; - - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) - return -ENOBUFS; - - put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || - put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) - return nlmsg_cancel(skb, nlh); + struct ifa_cacheinfo ci; + unsigned char *b = skb->tail; + + nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*ifm), flags); + ifm = NLMSG_DATA(nlh); + ifm->ifa_family = AF_INET6; + ifm->ifa_prefixlen = 128; + ifm->ifa_flags = IFA_F_PERMANENT; + ifm->ifa_scope = RT_SCOPE_UNIVERSE; + if (ipv6_addr_scope(&ifaca->aca_addr)&IFA_SITE) + ifm->ifa_scope = RT_SCOPE_SITE; + ifm->ifa_index = ifaca->aca_idev->dev->ifindex; + RTA_PUT(skb, IFA_ANYCAST, 16, &ifaca->aca_addr); + ci.cstamp = (__u32)(TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifaca->aca_cstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.tstamp = (__u32)(TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) / HZ + * 100 + TIME_DELTA(ifaca->aca_tstamp, INITIAL_JIFFIES) % HZ + * 100 / HZ); + ci.ifa_prefered = INFINITY_LIFE_TIME; + ci.ifa_valid = INFINITY_LIFE_TIME; + RTA_PUT(skb, IFA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; - return nlmsg_end(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } enum addr_type_t @@ -3278,74 +3222,79 @@ static int inet6_dump_ifacaddr(struct sk_buff *skb, struct netlink_callback *cb) return inet6_dump_addr(skb, cb, type); } -static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr* nlh, - void *arg) +static int inet6_rtm_getaddr(struct sk_buff *in_skb, + struct nlmsghdr* nlh, void *arg) { - struct ifaddrmsg *ifm; - struct nlattr *tb[IFA_MAX+1]; + struct rtattr **rta = arg; + struct ifaddrmsg *ifm = NLMSG_DATA(nlh); struct in6_addr *addr = NULL; struct net_device *dev = NULL; struct inet6_ifaddr *ifa; struct sk_buff *skb; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); int err; - err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); - if (err < 0) - goto errout; - - addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL]); - if (addr == NULL) { - err = -EINVAL; - goto errout; + if (rta[IFA_ADDRESS-1]) { + if (RTA_PAYLOAD(rta[IFA_ADDRESS-1]) < sizeof(*addr)) + return -EINVAL; + addr = RTA_DATA(rta[IFA_ADDRESS-1]); } + if (rta[IFA_LOCAL-1]) { + if (RTA_PAYLOAD(rta[IFA_LOCAL-1]) < sizeof(*addr) || + (addr && memcmp(addr, RTA_DATA(rta[IFA_LOCAL-1]), sizeof(*addr)))) + return -EINVAL; + addr = RTA_DATA(rta[IFA_LOCAL-1]); + } + if (addr == NULL) + return -EINVAL; - ifm = nlmsg_data(nlh); if (ifm->ifa_index) dev = __dev_get_by_index(ifm->ifa_index); - if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) { - err = -EADDRNOTAVAIL; - goto errout; - } + if ((ifa = ipv6_get_ifaddr(addr, dev, 1)) == NULL) + return -EADDRNOTAVAIL; - if ((skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_KERNEL)) == NULL) { + if ((skb = alloc_skb(size, GFP_KERNEL)) == NULL) { err = -ENOBUFS; - goto errout_ifa; + goto out; } + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { - kfree_skb(skb); - goto errout_ifa; + err = -EMSGSIZE; + goto out_free; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -errout_ifa: + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: in6_ifa_put(ifa); -errout: return err; +out_free: + kfree_skb(skb); + goto out; } static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) { struct sk_buff *skb; - int err = -ENOBUFS; + int size = NLMSG_SPACE(sizeof(struct ifaddrmsg) + INET6_IFADDR_RTA_SPACE); - skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); - if (err < 0) { + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, ENOBUFS); + return; + } + if (inet6_fill_ifaddr(skb, ifa, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFADDR, EINVAL); + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFADDR; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFADDR, GFP_ATOMIC); } static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, @@ -3380,7 +3329,6 @@ static void inline ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ACCEPT_RA_RT_INFO_MAX_PLEN] = cnf->accept_ra_rt_info_max_plen; #endif #endif - array[DEVCONF_PROXY_NDP] = cnf->proxy_ndp; } /* Maximum length of ifinfomsg attributes */ @@ -3487,23 +3435,20 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) void inet6_ifinfo_notify(int event, struct inet6_dev *idev) { struct sk_buff *skb; - int payload = sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE; - int err = -ENOBUFS; + int size = NLMSG_SPACE(sizeof(struct ifinfomsg) + INET6_IFINFO_RTA_SPACE); - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); - if (skb == NULL) - goto errout; - - err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); - if (err < 0) { + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, ENOBUFS); + return; + } + if (inet6_fill_ifinfo(skb, idev, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_IFINFO, EINVAL); + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_IFADDR, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_IFADDR, err); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_IFINFO; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_IFINFO, GFP_ATOMIC); } /* Maximum length of prefix_cacheinfo attributes */ @@ -3555,23 +3500,20 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo) { struct sk_buff *skb; - int payload = sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE; - int err = -ENOBUFS; - - skb = nlmsg_new(nlmsg_total_size(payload), GFP_ATOMIC); - if (skb == NULL) - goto errout; + int size = NLMSG_SPACE(sizeof(struct prefixmsg) + INET6_PREFIX_RTA_SPACE); - err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); - if (err < 0) { + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) { + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, ENOBUFS); + return; + } + if (inet6_fill_prefix(skb, idev, pinfo, current->pid, 0, event, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_PREFIX, EINVAL); + return; } - - err = rtnl_notify(skb, 0, RTNLGRP_IPV6_PREFIX, NULL, GFP_ATOMIC); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_PREFIX, err); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_PREFIX; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_PREFIX, GFP_ATOMIC); } static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { @@ -3586,9 +3528,6 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, -#endif }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) @@ -3597,7 +3536,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: - ip6_ins_rt(ifp->rt); + ip6_ins_rt(ifp->rt, NULL, NULL, NULL); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); break; @@ -3606,7 +3545,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt)) + if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) dst_free(&ifp->rt->u.dst); break; } @@ -3614,10 +3553,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - rcu_read_lock_bh(); + read_lock_bh(&addrconf_lock); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - rcu_read_unlock_bh(); + read_unlock_bh(&addrconf_lock); } #ifdef CONFIG_SYSCTL @@ -3714,7 +3653,7 @@ static struct addrconf_sysctl_table ctl_table addrconf_conf_dir[2]; ctl_table addrconf_proto_dir[2]; ctl_table addrconf_root_dir[2]; -} addrconf_sysctl __read_mostly = { +} addrconf_sysctl = { .sysctl_header = NULL, .addrconf_vars = { { @@ -3903,14 +3842,6 @@ static struct addrconf_sysctl_table }, #endif #endif - { - .ctl_name = NET_IPV6_PROXY_NDP, - .procname = "proxy_ndp", - .data = &ipv6_devconf.proxy_ndp, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, { .ctl_name = 0, /* sentinel */ } diff --git a/trunk/net/ipv6/af_inet6.c b/trunk/net/ipv6/af_inet6.c index bf6e8aff19d4..ac85e9c532c2 100644 --- a/trunk/net/ipv6/af_inet6.c +++ b/trunk/net/ipv6/af_inet6.c @@ -59,9 +59,6 @@ #ifdef CONFIG_IPV6_TUNNEL #include #endif -#ifdef CONFIG_IPV6_MIP6 -#include -#endif #include #include @@ -70,7 +67,7 @@ MODULE_AUTHOR("Cast of dozens"); MODULE_DESCRIPTION("IPv6 protocol stack for Linux"); MODULE_LICENSE("GPL"); -int sysctl_ipv6_bindv6only __read_mostly; +int sysctl_ipv6_bindv6only; /* The inetsw table contains everything that inet_create needs to * build a new socket. @@ -640,7 +637,6 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; - security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; @@ -662,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - __ip6_dst_store(sk, dst, NULL, NULL); + __ip6_dst_store(sk, dst, NULL); } return 0; @@ -761,8 +757,6 @@ static int __init inet6_init(void) struct list_head *r; int err; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)); - #ifdef MODULE #if 0 /* FIXME --RR */ if (!mod_member_present(&__this_module, can_unload)) @@ -772,6 +766,11 @@ static int __init inet6_init(void) #endif #endif + if (sizeof(struct inet6_skb_parm) > sizeof(dummy_skb->cb)) { + printk(KERN_CRIT "inet6_proto_init: size fault\n"); + return -EINVAL; + } + err = proto_register(&tcpv6_prot, 1); if (err) goto out; @@ -857,9 +856,6 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); -#ifdef CONFIG_IPV6_MIP6 - mip6_init(); -#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -922,9 +918,6 @@ static void __exit inet6_exit(void) udp6_proc_exit(); tcp6_proc_exit(); raw6_proc_exit(); -#endif -#ifdef CONFIG_IPV6_MIP6 - mip6_fini(); #endif /* Cleanup code parts. */ sit_cleanup(); diff --git a/trunk/net/ipv6/ah6.c b/trunk/net/ipv6/ah6.c index b0d83e8e4252..00ffa7bc6c9f 100644 --- a/trunk/net/ipv6/ah6.c +++ b/trunk/net/ipv6/ah6.c @@ -74,66 +74,6 @@ static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr) return 0; } -#ifdef CONFIG_IPV6_MIP6 -/** - * ipv6_rearrange_destopt - rearrange IPv6 destination options header - * @iph: IPv6 header - * @destopt: destionation options header - */ -static void ipv6_rearrange_destopt(struct ipv6hdr *iph, struct ipv6_opt_hdr *destopt) -{ - u8 *opt = (u8 *)destopt; - int len = ipv6_optlen(destopt); - int off = 0; - int optlen = 0; - - off += 2; - len -= 2; - - while (len > 0) { - - switch (opt[off]) { - - case IPV6_TLV_PAD0: - optlen = 1; - break; - default: - if (len < 2) - goto bad; - optlen = opt[off+1]+2; - if (len < optlen) - goto bad; - - /* Rearrange the source address in @iph and the - * addresses in home address option for final source. - * See 11.3.2 of RFC 3775 for details. - */ - if (opt[off] == IPV6_TLV_HAO) { - struct in6_addr final_addr; - struct ipv6_destopt_hao *hao; - - hao = (struct ipv6_destopt_hao *)&opt[off]; - if (hao->length != sizeof(hao->addr)) { - if (net_ratelimit()) - printk(KERN_WARNING "destopt hao: invalid header length: %u\n", hao->length); - goto bad; - } - ipv6_addr_copy(&final_addr, &hao->addr); - ipv6_addr_copy(&hao->addr, &iph->saddr); - ipv6_addr_copy(&iph->saddr, &final_addr); - } - break; - } - - off += optlen; - len -= optlen; - } - /* Note: ok if len == 0 */ -bad: - return; -} -#endif - /** * ipv6_rearrange_rthdr - rearrange IPv6 routing header * @iph: IPv6 header @@ -173,7 +113,7 @@ static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr) ipv6_addr_copy(&iph->daddr, &final_addr); } -static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) +static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len) { union { struct ipv6hdr *iph; @@ -188,12 +128,8 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) while (exthdr.raw < end) { switch (nexthdr) { - case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 - if (dir == XFRM_POLICY_OUT) - ipv6_rearrange_destopt(iph, exthdr.opth); -#endif case NEXTHDR_HOP: + case NEXTHDR_DEST: if (!zero_out_mutable_opts(exthdr.opth)) { LIMIT_NETDEBUG( KERN_WARNING "overrun %sopts\n", @@ -228,9 +164,6 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr; char tmp_base[8]; struct { -#ifdef CONFIG_IPV6_MIP6 - struct in6_addr saddr; -#endif struct in6_addr daddr; char hdrs[0]; } *tmp_ext; @@ -255,15 +188,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) err = -ENOMEM; goto error; } -#ifdef CONFIG_IPV6_MIP6 - memcpy(tmp_ext, &top_iph->saddr, extlen); -#else memcpy(tmp_ext, &top_iph->daddr, extlen); -#endif err = ipv6_clear_mutable_options(top_iph, extlen - sizeof(*tmp_ext) + - sizeof(*top_iph), - XFRM_POLICY_OUT); + sizeof(*top_iph)); if (err) goto error_free_iph; } @@ -294,11 +222,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) memcpy(top_iph, tmp_base, sizeof(tmp_base)); if (tmp_ext) { -#ifdef CONFIG_IPV6_MIP6 - memcpy(&top_iph->saddr, tmp_ext, extlen); -#else memcpy(&top_iph->daddr, tmp_ext, extlen); -#endif error_free_iph: kfree(tmp_ext); } @@ -358,7 +282,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) if (!tmp_hdr) goto out; memcpy(tmp_hdr, skb->nh.raw, hdr_len); - if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len, XFRM_POLICY_IN)) + if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len)) goto free_out; skb->nh.ipv6h->priority = 0; skb->nh.ipv6h->flow_lbl[0] = 0; @@ -474,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; @@ -511,8 +435,7 @@ static struct xfrm_type ah6_type = .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, - .output = ah6_output, - .hdr_offset = xfrm6_find_1stfragopt, + .output = ah6_output }; static struct inet6_protocol ah6_protocol = { diff --git a/trunk/net/ipv6/anycast.c b/trunk/net/ipv6/anycast.c index a9604764e015..f6881d7a0385 100644 --- a/trunk/net/ipv6/anycast.c +++ b/trunk/net/ipv6/anycast.c @@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) int onlink; onlink = 0; - rcu_read_lock(); + read_lock(&addrconf_lock); idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); @@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) } read_unlock_bh(&idev->lock); } - rcu_read_unlock(); + read_unlock(&addrconf_lock); return onlink; } @@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) write_unlock_bh(&idev->lock); dst_hold(&rt->u.dst); - if (ip6_ins_rt(rt)) + if (ip6_ins_rt(rt, NULL, NULL, NULL)) dst_release(&rt->u.dst); addrconf_join_solict(dev, &aca->aca_addr); @@ -378,7 +378,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr) addrconf_leave_solict(idev, &aca->aca_addr); dst_hold(&aca->aca_rt->u.dst); - if (ip6_del_rt(aca->aca_rt)) + if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL)) dst_free(&aca->aca_rt->u.dst); else dst_release(&aca->aca_rt->u.dst); diff --git a/trunk/net/ipv6/datagram.c b/trunk/net/ipv6/datagram.c index 7206747022fc..3b55b4c8e2d1 100644 --- a/trunk/net/ipv6/datagram.c +++ b/trunk/net/ipv6/datagram.c @@ -156,8 +156,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); - if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; @@ -193,12 +191,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) ip6_dst_store(sk, dst, ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? - &np->saddr : -#endif - NULL); + &np->daddr : NULL); sk->sk_state = TCP_ESTABLISHED; out: @@ -648,13 +641,10 @@ int datagram_send_ctl(struct msghdr *msg, struct flowi *fl, rthdr = (struct ipv6_rt_hdr *)CMSG_DATA(cmsg); - switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: -#endif - break; - default: + /* + * TYPE 0 + */ + if (rthdr->type) { err = -EINVAL; goto exit_f; } diff --git a/trunk/net/ipv6/esp6.c b/trunk/net/ipv6/esp6.c index e78680a9985b..2ebfd281e721 100644 --- a/trunk/net/ipv6/esp6.c +++ b/trunk/net/ipv6/esp6.c @@ -99,13 +99,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) { - if (unlikely(!esp->conf.ivinitted)) { - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); - esp->conf.ivinitted = 1; - } + if (esp->conf.ivlen) crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); - } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -242,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -358,12 +353,12 @@ static int esp6_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - esp->conf.ivinitted = 0; + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; return 0; @@ -384,8 +379,7 @@ static struct xfrm_type esp6_type = .destructor = esp6_destroy, .get_max_size = esp6_get_max_size, .input = esp6_input, - .output = esp6_output, - .hdr_offset = xfrm6_find_1stfragopt, + .output = esp6_output }; static struct inet6_protocol esp6_protocol = { diff --git a/trunk/net/ipv6/exthdrs.c b/trunk/net/ipv6/exthdrs.c index 88c96b10684c..86dac106873b 100644 --- a/trunk/net/ipv6/exthdrs.c +++ b/trunk/net/ipv6/exthdrs.c @@ -43,54 +43,9 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 -#include -#endif #include -int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) -{ - int packet_len = skb->tail - skb->nh.raw; - struct ipv6_opt_hdr *hdr; - int len; - - if (offset + 2 > packet_len) - goto bad; - hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); - len = ((hdr->hdrlen + 1) << 3); - - if (offset + len > packet_len) - goto bad; - - offset += 2; - len -= 2; - - while (len > 0) { - int opttype = skb->nh.raw[offset]; - int optlen; - - if (opttype == type) - return offset; - - switch (opttype) { - case IPV6_TLV_PAD0: - optlen = 1; - break; - default: - optlen = skb->nh.raw[offset + 1] + 2; - if (optlen > len) - goto bad; - break; - } - offset += optlen; - len -= optlen; - } - /* not_found */ - bad: - return -1; -} - /* * Parsing tlv encoded headers. * @@ -101,7 +56,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) struct tlvtype_proc { int type; - int (*func)(struct sk_buff **skbp, int offset); + int (*func)(struct sk_buff *skb, int offset); }; /********************* @@ -110,10 +65,8 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; - switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -138,9 +91,8 @@ static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; int off = skb->h.raw - skb->nh.raw; int len = ((skb->h.raw[1]+1)<<3); @@ -170,13 +122,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skbp, off) == 0) + if (curr->func(skb, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skbp, off) == 0) + if (ip6_tlvopt_unknown(skb, off) == 0) return 0; } break; @@ -195,85 +147,8 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) Destination options header. *****************************/ -#ifdef CONFIG_IPV6_MIP6 -static int ipv6_dest_hao(struct sk_buff **skbp, int optoff) -{ - struct sk_buff *skb = *skbp; - struct ipv6_destopt_hao *hao; - struct inet6_skb_parm *opt = IP6CB(skb); - struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb->nh.raw; - struct in6_addr tmp_addr; - int ret; - - if (opt->dsthao) { - LIMIT_NETDEBUG(KERN_DEBUG "hao duplicated\n"); - goto discard; - } - opt->dsthao = opt->dst1; - opt->dst1 = 0; - - hao = (struct ipv6_destopt_hao *)(skb->nh.raw + optoff); - - if (hao->length != 16) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao invalid option length = %d\n", hao->length); - goto discard; - } - - if (!(ipv6_addr_type(&hao->addr) & IPV6_ADDR_UNICAST)) { - LIMIT_NETDEBUG( - KERN_DEBUG "hao is not an unicast addr: " NIP6_FMT "\n", NIP6(hao->addr)); - goto discard; - } - - ret = xfrm6_input_addr(skb, (xfrm_address_t *)&ipv6h->daddr, - (xfrm_address_t *)&hao->addr, IPPROTO_DSTOPTS); - if (unlikely(ret < 0)) - goto discard; - - if (skb_cloned(skb)) { - struct sk_buff *skb2 = skb_copy(skb, GFP_ATOMIC); - struct inet6_skb_parm *opt2; - - if (skb2 == NULL) - goto discard; - - opt2 = IP6CB(skb2); - memcpy(opt2, opt, sizeof(*opt2)); - - kfree_skb(skb); - - /* update all variable using below by copied skbuff */ - *skbp = skb = skb2; - hao = (struct ipv6_destopt_hao *)(skb2->nh.raw + optoff); - ipv6h = (struct ipv6hdr *)skb2->nh.raw; - } - - if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; - - ipv6_addr_copy(&tmp_addr, &ipv6h->saddr); - ipv6_addr_copy(&ipv6h->saddr, &hao->addr); - ipv6_addr_copy(&hao->addr, &tmp_addr); - - if (skb->tstamp.off_sec == 0) - __net_timestamp(skb); - - return 1; - - discard: - kfree_skb(skb); - return 0; -} -#endif - static struct tlvtype_proc tlvprocdestopt_lst[] = { -#ifdef CONFIG_IPV6_MIP6 - { - .type = IPV6_TLV_HAO, - .func = ipv6_dest_hao, - }, -#endif + /* No destination options are defined now */ {-1, NULL} }; @@ -281,9 +156,6 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 - __u16 dstbuf; -#endif if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) || !pskb_may_pull(skb, (skb->h.raw-skb->data)+((skb->h.raw[1]+1)<<3))) { @@ -294,19 +166,10 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; -#ifdef CONFIG_IPV6_MIP6 - dstbuf = opt->dst1; -#endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { - skb = *skbp; + if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { skb->h.raw += ((skb->h.raw[1]+1)<<3); - opt = IP6CB(skb); -#ifdef CONFIG_IPV6_MIP6 - opt->nhoff = dstbuf; -#else opt->nhoff = opt->dst1; -#endif return 1; } @@ -356,7 +219,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); - struct in6_addr *addr = NULL; + struct in6_addr *addr; struct in6_addr daddr; int n, i; @@ -381,23 +244,6 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { - switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: - /* Silently discard type 2 header unless it was - * processed by own - */ - if (!addr) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); - kfree_skb(skb); - return -1; - } - break; -#endif - default: - break; - } - opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; @@ -407,29 +253,17 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) return 1; } - switch (hdr->type) { - case IPV6_SRCRT_TYPE_0: - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); - return -1; - } - break; -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: - /* Silently discard invalid RTH type 2 */ - if (hdr->hdrlen != 2 || hdr->segments_left != 1) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - kfree_skb(skb); - return -1; - } - break; -#endif - default: + if (hdr->type != IPV6_SRCRT_TYPE_0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } + + if (hdr->hdrlen & 0x01) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); + return -1; + } /* * This is the routing header forwarding algorithm from @@ -460,7 +294,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) hdr = (struct ipv6_rt_hdr *) skb2->h.raw; } - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (skb->ip_summed == CHECKSUM_HW) skb->ip_summed = CHECKSUM_NONE; i = n - --hdr->segments_left; @@ -469,27 +303,6 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) addr = rthdr->addr; addr += i - 1; - switch (hdr->type) { -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: - if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, - (xfrm_address_t *)&skb->nh.ipv6h->saddr, - IPPROTO_ROUTING) < 0) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); - kfree_skb(skb); - return -1; - } - if (!ipv6_chk_home_addr(addr)) { - IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); - kfree_skb(skb); - return -1; - } - break; -#endif - default: - break; - } - if (ipv6_addr_is_multicast(addr)) { IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); @@ -608,10 +421,8 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) +static int ipv6_hop_ra(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; - if (skb->nh.raw[optoff+1] == 2) { IP6CB(skb)->ra = optoff; return 1; @@ -624,9 +435,8 @@ static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) +static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) { - struct sk_buff *skb = *skbp; u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { @@ -675,9 +485,8 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff **skbp) +int ipv6_parse_hopopts(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -693,10 +502,8 @@ int ipv6_parse_hopopts(struct sk_buff **skbp) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { - skb = *skbp; + if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { skb->h.raw += (skb->h.raw[1]+1)<<3; - opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; } diff --git a/trunk/net/ipv6/fib6_rules.c b/trunk/net/ipv6/fib6_rules.c deleted file mode 100644 index 34f5bfaddfc2..000000000000 --- a/trunk/net/ipv6/fib6_rules.c +++ /dev/null @@ -1,305 +0,0 @@ -/* - * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules - * - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation, version 2. - * - * Authors - * Thomas Graf - * Ville Nuorvala - */ - -#include -#include - -#include -#include -#include -#include - -struct fib6_rule -{ - struct fib_rule common; - struct rt6key src; - struct rt6key dst; -#ifdef CONFIG_IPV6_ROUTE_FWMARK - u32 fwmark; - u32 fwmask; -#endif - u8 tclass; -}; - -static struct fib_rules_ops fib6_rules_ops; - -static struct fib6_rule main_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0x7FFE, - .action = FR_ACT_TO_TBL, - .table = RT6_TABLE_MAIN, - }, -}; - -static struct fib6_rule local_rule = { - .common = { - .refcnt = ATOMIC_INIT(2), - .pref = 0, - .action = FR_ACT_TO_TBL, - .table = RT6_TABLE_LOCAL, - .flags = FIB_RULE_PERMANENT, - }, -}; - -static LIST_HEAD(fib6_rules); - -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) -{ - struct fib_lookup_arg arg = { - .lookup_ptr = lookup, - }; - - fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); - if (arg.rule) - fib_rule_put(arg.rule); - - if (arg.result) - return (struct dst_entry *) arg.result; - - dst_hold(&ip6_null_entry.u.dst); - return &ip6_null_entry.u.dst; -} - -static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) -{ - struct rt6_info *rt = NULL; - struct fib6_table *table; - pol_lookup_t lookup = arg->lookup_ptr; - - switch (rule->action) { - case FR_ACT_TO_TBL: - break; - case FR_ACT_UNREACHABLE: - rt = &ip6_null_entry; - goto discard_pkt; - default: - case FR_ACT_BLACKHOLE: - rt = &ip6_blk_hole_entry; - goto discard_pkt; - case FR_ACT_PROHIBIT: - rt = &ip6_prohibit_entry; - goto discard_pkt; - } - - table = fib6_get_table(rule->table); - if (table) - rt = lookup(table, flp, flags); - - if (rt != &ip6_null_entry) - goto out; - dst_release(&rt->u.dst); - rt = NULL; - goto out; - -discard_pkt: - dst_hold(&rt->u.dst); -out: - arg->result = rt; - return rt == NULL ? -EAGAIN : 0; -} - - -static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) -{ - struct fib6_rule *r = (struct fib6_rule *) rule; - - if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) - return 0; - - if ((flags & RT6_LOOKUP_F_HAS_SADDR) && - !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) - return 0; - - if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) - return 0; - -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if ((r->fwmark ^ fl->fl6_fwmark) & r->fwmask) - return 0; -#endif - - return 1; -} - -static struct nla_policy fib6_rule_policy[FRA_MAX+1] __read_mostly = { - [FRA_IFNAME] = { .type = NLA_STRING, .len = IFNAMSIZ - 1 }, - [FRA_PRIORITY] = { .type = NLA_U32 }, - [FRA_SRC] = { .len = sizeof(struct in6_addr) }, - [FRA_DST] = { .len = sizeof(struct in6_addr) }, - [FRA_FWMARK] = { .type = NLA_U32 }, - [FRA_FWMASK] = { .type = NLA_U32 }, - [FRA_TABLE] = { .type = NLA_U32 }, -}; - -static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - int err = -EINVAL; - struct fib6_rule *rule6 = (struct fib6_rule *) rule; - - if (frh->src_len > 128 || frh->dst_len > 128 || - (frh->tos & ~IPV6_FLOWINFO_MASK)) - goto errout; - - if (rule->action == FR_ACT_TO_TBL) { - if (rule->table == RT6_TABLE_UNSPEC) - goto errout; - - if (fib6_new_table(rule->table) == NULL) { - err = -ENOBUFS; - goto errout; - } - } - - if (tb[FRA_SRC]) - nla_memcpy(&rule6->src.addr, tb[FRA_SRC], - sizeof(struct in6_addr)); - - if (tb[FRA_DST]) - nla_memcpy(&rule6->dst.addr, tb[FRA_DST], - sizeof(struct in6_addr)); - -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK]) { - rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); - if (rule6->fwmark) { - /* - * if the mark value is non-zero, - * all bits are compared by default - * unless a mask is explicitly specified. - */ - rule6->fwmask = 0xFFFFFFFF; - } - } - - if (tb[FRA_FWMASK]) - rule6->fwmask = nla_get_u32(tb[FRA_FWMASK]); -#endif - - rule6->src.plen = frh->src_len; - rule6->dst.plen = frh->dst_len; - rule6->tclass = frh->tos; - - err = 0; -errout: - return err; -} - -static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, - struct nlattr **tb) -{ - struct fib6_rule *rule6 = (struct fib6_rule *) rule; - - if (frh->src_len && (rule6->src.plen != frh->src_len)) - return 0; - - if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) - return 0; - - if (frh->tos && (rule6->tclass != frh->tos)) - return 0; - - if (tb[FRA_SRC] && - nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) - return 0; - - if (tb[FRA_DST] && - nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) - return 0; - -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) - return 0; - - if (tb[FRA_FWMASK] && (rule6->fwmask != nla_get_u32(tb[FRA_FWMASK]))) - return 0; -#endif - - return 1; -} - -static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, - struct nlmsghdr *nlh, struct fib_rule_hdr *frh) -{ - struct fib6_rule *rule6 = (struct fib6_rule *) rule; - - frh->family = AF_INET6; - frh->dst_len = rule6->dst.plen; - frh->src_len = rule6->src.plen; - frh->tos = rule6->tclass; - - if (rule6->dst.plen) - NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr); - - if (rule6->src.plen) - NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr); - -#ifdef CONFIG_IPV6_ROUTE_FWMARK - if (rule6->fwmark) - NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); - - if (rule6->fwmask || rule6->fwmark) - NLA_PUT_U32(skb, FRA_FWMASK, rule6->fwmask); -#endif - - return 0; - -nla_put_failure: - return -ENOBUFS; -} - -int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - return fib_rules_dump(skb, cb, AF_INET6); -} - -static u32 fib6_rule_default_pref(void) -{ - return 0x3FFF; -} - -static struct fib_rules_ops fib6_rules_ops = { - .family = AF_INET6, - .rule_size = sizeof(struct fib6_rule), - .action = fib6_rule_action, - .match = fib6_rule_match, - .configure = fib6_rule_configure, - .compare = fib6_rule_compare, - .fill = fib6_rule_fill, - .default_pref = fib6_rule_default_pref, - .nlgroup = RTNLGRP_IPV6_RULE, - .policy = fib6_rule_policy, - .rules_list = &fib6_rules, - .owner = THIS_MODULE, -}; - -void __init fib6_rules_init(void) -{ - list_add_tail(&local_rule.common.list, &fib6_rules); - list_add_tail(&main_rule.common.list, &fib6_rules); - - fib_rules_register(&fib6_rules_ops); -} - -void fib6_rules_cleanup(void) -{ - fib_rules_unregister(&fib6_rules_ops); -} diff --git a/trunk/net/ipv6/icmp.c b/trunk/net/ipv6/icmp.c index 4ec876066b3f..356a8a7ef22a 100644 --- a/trunk/net/ipv6/icmp.c +++ b/trunk/net/ipv6/icmp.c @@ -151,7 +151,7 @@ static int is_ineligible(struct sk_buff *skb) return 0; } -static int sysctl_icmpv6_time __read_mostly = 1*HZ; +static int sysctl_icmpv6_time = 1*HZ; /* * Check the ICMP output rate limit @@ -273,29 +273,6 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st return 0; } -#ifdef CONFIG_IPV6_MIP6 -static void mip6_addr_swap(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct inet6_skb_parm *opt = IP6CB(skb); - struct ipv6_destopt_hao *hao; - struct in6_addr tmp; - int off; - - if (opt->dsthao) { - off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); - if (likely(off >= 0)) { - hao = (struct ipv6_destopt_hao *)(skb->nh.raw + off); - ipv6_addr_copy(&tmp, &iph->saddr); - ipv6_addr_copy(&iph->saddr, &hao->addr); - ipv6_addr_copy(&hao->addr, &tmp); - } - } -} -#else -static inline void mip6_addr_swap(struct sk_buff *skb) {} -#endif - /* * Send an ICMP message in response to a packet in error */ @@ -373,8 +350,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, return; } - mip6_addr_swap(skb); - memset(&fl, 0, sizeof(fl)); fl.proto = IPPROTO_ICMPV6; ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr); @@ -383,7 +358,6 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; - security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -498,7 +472,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; - security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -631,7 +604,7 @@ static int icmpv6_rcv(struct sk_buff **pskb) /* Perform checksum. */ switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, skb->csum)) break; diff --git a/trunk/net/ipv6/inet6_connection_sock.c b/trunk/net/ipv6/inet6_connection_sock.c index 827f41d1478b..bf491077b822 100644 --- a/trunk/net/ipv6/inet6_connection_sock.c +++ b/trunk/net/ipv6/inet6_connection_sock.c @@ -157,7 +157,6 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; - security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -186,7 +185,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL, NULL); + __ip6_dst_store(sk, dst, NULL); } skb->dst = dst_clone(dst); diff --git a/trunk/net/ipv6/ip6_fib.c b/trunk/net/ipv6/ip6_fib.c index 8fcae7a6510b..764221220afd 100644 --- a/trunk/net/ipv6/ip6_fib.c +++ b/trunk/net/ipv6/ip6_fib.c @@ -18,7 +18,6 @@ * Yuji SEKIYA @USAGI: Support default route on router node; * remove ip6_null_entry from the top of * routing table. - * Ville Nuorvala: Fixed routing subtrees. */ #include #include @@ -27,7 +26,6 @@ #include #include #include -#include #ifdef CONFIG_PROC_FS #include @@ -70,19 +68,19 @@ struct fib6_cleaner_t void *arg; }; -static DEFINE_RWLOCK(fib6_walker_lock); +DEFINE_RWLOCK(fib6_walker_lock); + #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S +#define SUBTREE(fn) ((fn)->subtree) #else #define FWS_INIT FWS_L +#define SUBTREE(fn) NULL #endif static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); -static struct rt6_info * fib6_find_prefix(struct fib6_node *fn); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); -static int fib6_walk(struct fib6_walker_t *w); -static int fib6_walk_continue(struct fib6_walker_t *w); /* * A routing update causes an increase of the serial number on the @@ -95,31 +93,13 @@ static __u32 rt_sernum; static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); -static struct fib6_walker_t fib6_walker_list = { +struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, .next = &fib6_walker_list, }; #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) -static inline void fib6_walker_link(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next = fib6_walker_list.next; - w->prev = &fib6_walker_list; - w->next->prev = w; - w->prev->next = w; - write_unlock_bh(&fib6_walker_lock); -} - -static inline void fib6_walker_unlink(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next->prev = w->prev; - w->prev->next = w->next; - w->prev = w->next = w; - write_unlock_bh(&fib6_walker_lock); -} static __inline__ u32 fib6_new_sernum(void) { u32 n = ++rt_sernum; @@ -167,253 +147,6 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } -static struct fib6_table fib6_main_tbl = { - .tb6_id = RT6_TABLE_MAIN, - .tb6_lock = RW_LOCK_UNLOCKED, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; - -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -#define FIB_TABLE_HASHSZ 256 -#else -#define FIB_TABLE_HASHSZ 1 -#endif -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; - -static void fib6_link_table(struct fib6_table *tb) -{ - unsigned int h; - - h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); - - /* - * No protection necessary, this is the only list mutatation - * operation, tables never disappear once they exist. - */ - hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); -} - -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -static struct fib6_table fib6_local_tbl = { - .tb6_id = RT6_TABLE_LOCAL, - .tb6_lock = RW_LOCK_UNLOCKED, - .tb6_root = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, - }, -}; - -static struct fib6_table *fib6_alloc_table(u32 id) -{ - struct fib6_table *table; - - table = kzalloc(sizeof(*table), GFP_ATOMIC); - if (table != NULL) { - table->tb6_id = id; - table->tb6_lock = RW_LOCK_UNLOCKED; - table->tb6_root.leaf = &ip6_null_entry; - table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; - } - - return table; -} - -struct fib6_table *fib6_new_table(u32 id) -{ - struct fib6_table *tb; - - if (id == 0) - id = RT6_TABLE_MAIN; - tb = fib6_get_table(id); - if (tb) - return tb; - - tb = fib6_alloc_table(id); - if (tb != NULL) - fib6_link_table(tb); - - return tb; -} - -struct fib6_table *fib6_get_table(u32 id) -{ - struct fib6_table *tb; - struct hlist_node *node; - unsigned int h; - - if (id == 0) - id = RT6_TABLE_MAIN; - h = id & (FIB_TABLE_HASHSZ - 1); - rcu_read_lock(); - hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { - if (tb->tb6_id == id) { - rcu_read_unlock(); - return tb; - } - } - rcu_read_unlock(); - - return NULL; -} - -static void __init fib6_tables_init(void) -{ - fib6_link_table(&fib6_main_tbl); - fib6_link_table(&fib6_local_tbl); -} - -#else - -struct fib6_table *fib6_new_table(u32 id) -{ - return fib6_get_table(id); -} - -struct fib6_table *fib6_get_table(u32 id) -{ - return &fib6_main_tbl; -} - -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) -{ - return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); -} - -static void __init fib6_tables_init(void) -{ - fib6_link_table(&fib6_main_tbl); -} - -#endif - -static int fib6_dump_node(struct fib6_walker_t *w) -{ - int res; - struct rt6_info *rt; - - for (rt = w->leaf; rt; rt = rt->u.next) { - res = rt6_dump_route(rt, w->args); - if (res < 0) { - /* Frame is full, suspend walking */ - w->leaf = rt; - return 1; - } - BUG_TRAP(res!=0); - } - w->leaf = NULL; - return 0; -} - -static void fib6_dump_end(struct netlink_callback *cb) -{ - struct fib6_walker_t *w = (void*)cb->args[2]; - - if (w) { - cb->args[2] = 0; - kfree(w); - } - cb->done = (void*)cb->args[3]; - cb->args[1] = 3; -} - -static int fib6_dump_done(struct netlink_callback *cb) -{ - fib6_dump_end(cb); - return cb->done ? cb->done(cb) : 0; -} - -static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, - struct netlink_callback *cb) -{ - struct fib6_walker_t *w; - int res; - - w = (void *)cb->args[2]; - w->root = &table->tb6_root; - - if (cb->args[4] == 0) { - read_lock_bh(&table->tb6_lock); - res = fib6_walk(w); - read_unlock_bh(&table->tb6_lock); - if (res > 0) - cb->args[4] = 1; - } else { - read_lock_bh(&table->tb6_lock); - res = fib6_walk_continue(w); - read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; - } - fib6_walker_unlink(w); - cb->args[4] = 0; - } -end: - return res; -} - -int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) -{ - unsigned int h, s_h; - unsigned int e = 0, s_e; - struct rt6_rtnl_dump_arg arg; - struct fib6_walker_t *w; - struct fib6_table *tb; - struct hlist_node *node; - int res = 0; - - s_h = cb->args[0]; - s_e = cb->args[1]; - - w = (void *)cb->args[2]; - if (w == NULL) { - /* New dump: - * - * 1. hook callback destructor. - */ - cb->args[3] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. - */ - w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) - return -ENOMEM; - w->func = fib6_dump_node; - cb->args[2] = (long)w; - } - - arg.skb = skb; - arg.cb = cb; - w->args = &arg; - - for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { - e = 0; - hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { - if (e < s_e) - goto next; - res = fib6_dump_table(tb, skb, cb); - if (res != 0) - goto out; -next: - e++; - } - } -out: - cb->args[1] = e; - cb->args[0] = h; - - res = res < 0 ? res : skb->len; - if (res <= 0) - fib6_dump_end(cb); - return res; -} /* * Routing Table @@ -610,7 +343,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info) + struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -665,7 +398,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, info); + inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); rt6_stats.fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { @@ -695,9 +428,10 @@ void fib6_force_start_gc(void) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, + struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { - struct fib6_node *fn, *pn = NULL; + struct fib6_node *fn; int err = -ENOMEM; fn = fib6_add_1(root, &rt->rt6i_dst.addr, sizeof(struct in6_addr), @@ -706,8 +440,6 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (fn == NULL) goto out; - pn = fn; - #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen) { struct fib6_node *sn; @@ -753,6 +485,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) /* Now link new subtree to main tree */ sfn->parent = fn; fn->subtree = sfn; + if (fn->leaf == NULL) { + fn->leaf = rt; + atomic_inc(&rt->rt6i_ref); + } } else { sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, sizeof(struct in6_addr), rt->rt6i_src.plen, @@ -762,42 +498,21 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) goto st_failure; } - if (fn->leaf == NULL) { - fn->leaf = rt; - atomic_inc(&rt->rt6i_ref); - } fn = sn; } #endif - err = fib6_add_rt2node(fn, rt, info); + err = fib6_add_rt2node(fn, rt, nlh, req); if (err == 0) { fib6_start_gc(rt); if (!(rt->rt6i_flags&RTF_CACHE)) - fib6_prune_clones(pn, rt); + fib6_prune_clones(fn, rt); } out: - if (err) { -#ifdef CONFIG_IPV6_SUBTREES - /* - * If fib6_add_1 has cleared the old leaf pointer in the - * super-tree leaf node we have to find a new one for it. - */ - if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn->leaf = fib6_find_prefix(pn); -#if RT6_DEBUG >= 2 - if (!pn->leaf) { - BUG_TRAP(pn->leaf != NULL); - pn->leaf = &ip6_null_entry; - } -#endif - atomic_inc(&pn->leaf->rt6i_ref); - } -#endif + if (err) dst_free(&rt->u.dst); - } return err; #ifdef CONFIG_IPV6_SUBTREES @@ -828,9 +543,6 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node *fn; int dir; - if (unlikely(args->offset == 0)) - return NULL; - /* * Descend on a tree */ @@ -852,26 +564,33 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, break; } - while(fn) { - if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { + while ((fn->fn_flags & RTN_ROOT) == 0) { +#ifdef CONFIG_IPV6_SUBTREES + if (fn->subtree) { + struct fib6_node *st; + struct lookup_args *narg; + + narg = args + 1; + + if (narg->addr) { + st = fib6_lookup_1(fn->subtree, narg); + + if (st && !(st->fn_flags & RTN_ROOT)) + return st; + } + } +#endif + + if (fn->fn_flags & RTN_RTINFO) { struct rt6key *key; key = (struct rt6key *) ((u8 *) fn->leaf + args->offset); - if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { -#ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) - fn = fib6_lookup_1(fn->subtree, args + 1); -#endif - if (!fn || fn->fn_flags & RTN_RTINFO) - return fn; - } + if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) + return fn; } - if (fn->fn_flags & RTN_ROOT) - break; - fn = fn->parent; } @@ -881,24 +600,18 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, struct fib6_node * fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr) { + struct lookup_args args[2]; struct fib6_node *fn; - struct lookup_args args[] = { - { - .offset = offsetof(struct rt6_info, rt6i_dst), - .addr = daddr, - }, + + args[0].offset = offsetof(struct rt6_info, rt6i_dst); + args[0].addr = daddr; + #ifdef CONFIG_IPV6_SUBTREES - { - .offset = offsetof(struct rt6_info, rt6i_src), - .addr = saddr, - }, + args[1].offset = offsetof(struct rt6_info, rt6i_src); + args[1].addr = saddr; #endif - { - .offset = 0, /* sentinel */ - } - }; - fn = fib6_lookup_1(root, daddr ? args : args + 1); + fn = fib6_lookup_1(root, args); if (fn == NULL || fn->fn_flags & RTN_TL_ROOT) fn = root; @@ -954,8 +667,10 @@ struct fib6_node * fib6_locate(struct fib6_node *root, #ifdef CONFIG_IPV6_SUBTREES if (src_len) { BUG_TRAP(saddr!=NULL); - if (fn && fn->subtree) - fn = fib6_locate_1(fn->subtree, saddr, src_len, + if (fn == NULL) + fn = fn->subtree; + if (fn) + fn = fib6_locate_1(fn, saddr, src_len, offsetof(struct rt6_info, rt6i_src)); } #endif @@ -984,7 +699,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) if(fn->right) return fn->right->leaf; - fn = FIB6_SUBTREE(fn); + fn = SUBTREE(fn); } return NULL; } @@ -1015,7 +730,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) if (fn->right) child = fn->right, children |= 1; if (fn->left) child = fn->left, children |= 2; - if (children == 3 || FIB6_SUBTREE(fn) + if (children == 3 || SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ || (children && fn->fn_flags&RTN_ROOT) @@ -1034,9 +749,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES - if (FIB6_SUBTREE(pn) == fn) { + if (SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); - FIB6_SUBTREE(pn) = NULL; + SUBTREE(pn) = NULL; nstate = FWS_L; } else { BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); @@ -1084,7 +799,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) + if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -1094,7 +809,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) } static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nl_info *info) + struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; @@ -1150,11 +865,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (atomic_read(&rt->rt6i_ref) != 1) BUG(); } - inet6_rt_notify(RTM_DELROUTE, rt, info); + inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); rt6_release(rt); } -int fib6_del(struct rt6_info *rt, struct nl_info *info) +int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -1170,18 +885,8 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) BUG_TRAP(fn->fn_flags&RTN_RTINFO); - if (!(rt->rt6i_flags&RTF_CACHE)) { - struct fib6_node *pn = fn; -#ifdef CONFIG_IPV6_SUBTREES - /* clones of this route might be in another subtree */ - if (rt->rt6i_src.plen) { - while (!(pn->fn_flags&RTN_ROOT)) - pn = pn->parent; - pn = pn->parent; - } -#endif - fib6_prune_clones(pn, rt); - } + if (!(rt->rt6i_flags&RTF_CACHE)) + fib6_prune_clones(fn, rt); /* * Walk the leaf entries looking for ourself @@ -1189,7 +894,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { if (*rtp == rt) { - fib6_del_route(fn, rtp, info); + fib6_del_route(fn, rtp, nlh, _rtattr, req); return 0; } } @@ -1220,7 +925,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * <0 -> walk is terminated by an error. */ -static int fib6_walk_continue(struct fib6_walker_t *w) +int fib6_walk_continue(struct fib6_walker_t *w) { struct fib6_node *fn, *pn; @@ -1237,8 +942,8 @@ static int fib6_walk_continue(struct fib6_walker_t *w) switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: - if (FIB6_SUBTREE(fn)) { - w->node = FIB6_SUBTREE(fn); + if (SUBTREE(fn)) { + w->node = SUBTREE(fn); continue; } w->state = FWS_L; @@ -1272,7 +977,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) pn = fn->parent; w->node = pn; #ifdef CONFIG_IPV6_SUBTREES - if (FIB6_SUBTREE(pn) == fn) { + if (SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); w->state = FWS_L; continue; @@ -1294,7 +999,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) } } -static int fib6_walk(struct fib6_walker_t *w) +int fib6_walk(struct fib6_walker_t *w) { int res; @@ -1318,7 +1023,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL); + res = fib6_del(rt, NULL, NULL, NULL); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); @@ -1344,9 +1049,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -static void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) +void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) { struct fib6_cleaner_t c; @@ -1359,25 +1064,6 @@ static void fib6_clean_tree(struct fib6_node *root, fib6_walk(&c.w); } -void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) -{ - struct fib6_table *table; - struct hlist_node *node; - unsigned int h; - - rcu_read_lock(); - for (h = 0; h < FIB_TABLE_HASHSZ; h++) { - hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], - tb6_hlist) { - write_lock_bh(&table->tb6_lock); - fib6_clean_tree(&table->tb6_root, func, prune, arg); - write_unlock_bh(&table->tb6_lock); - } - } - rcu_read_unlock(); -} - static int fib6_prune_clone(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & RTF_CACHE) { @@ -1456,8 +1142,11 @@ void fib6_run_gc(unsigned long dummy) } gc_args.more = 0; + + write_lock_bh(&rt6_lock); ndisc_dst_gc(&gc_args.more); - fib6_clean_all(fib6_age, 0, NULL); + fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); + write_unlock_bh(&rt6_lock); if (gc_args.more) mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); @@ -1472,10 +1161,10 @@ void __init fib6_init(void) { fib6_node_kmem = kmem_cache_create("fib6_nodes", sizeof(struct fib6_node), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - - fib6_tables_init(); + if (!fib6_node_kmem) + panic("cannot create fib6_nodes cache"); } void fib6_gc_cleanup(void) diff --git a/trunk/net/ipv6/ip6_input.c b/trunk/net/ipv6/ip6_input.c index 6b8e6d76a58b..25c2a9e03895 100644 --- a/trunk/net/ipv6/ip6_input.c +++ b/trunk/net/ipv6/ip6_input.c @@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(&skb) < 0) { + if (ipv6_parse_hopopts(skb) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } diff --git a/trunk/net/ipv6/ip6_output.c b/trunk/net/ipv6/ip6_output.c index 66716911962e..4fb47a252913 100644 --- a/trunk/net/ipv6/ip6_output.c +++ b/trunk/net/ipv6/ip6_output.c @@ -308,56 +308,6 @@ static int ip6_call_ra_chain(struct sk_buff *skb, int sel) return 0; } -static int ip6_forward_proxy_check(struct sk_buff *skb) -{ - struct ipv6hdr *hdr = skb->nh.ipv6h; - u8 nexthdr = hdr->nexthdr; - int offset; - - if (ipv6_ext_hdr(nexthdr)) { - offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); - if (offset < 0) - return 0; - } else - offset = sizeof(struct ipv6hdr); - - if (nexthdr == IPPROTO_ICMPV6) { - struct icmp6hdr *icmp6; - - if (!pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) - return 0; - - icmp6 = (struct icmp6hdr *)(skb->nh.raw + offset); - - switch (icmp6->icmp6_type) { - case NDISC_ROUTER_SOLICITATION: - case NDISC_ROUTER_ADVERTISEMENT: - case NDISC_NEIGHBOUR_SOLICITATION: - case NDISC_NEIGHBOUR_ADVERTISEMENT: - case NDISC_REDIRECT: - /* For reaction involving unicast neighbor discovery - * message destined to the proxied address, pass it to - * input function. - */ - return 1; - default: - break; - } - } - - /* - * The proxying router can't forward traffic sent to a link-local - * address, so signal the sender and discard the packet. This - * behavior is clarified by the MIPv6 specification. - */ - if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) { - dst_link_failure(skb); - return -1; - } - - return 0; -} - static inline int ip6_forward_finish(struct sk_buff *skb) { return dst_output(skb); @@ -412,18 +362,6 @@ int ip6_forward(struct sk_buff *skb) return -ETIMEDOUT; } - /* XXX: idev->cnf.proxy_ndp? */ - if (ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) { - int proxied = ip6_forward_proxy_check(skb); - if (proxied > 0) - return ip6_input(skb); - else if (proxied < 0) { - IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); - goto drop; - } - } - if (!xfrm6_route_forward(skb)) { IP6_INC_STATS(IPSTATS_MIB_INDISCARDS); goto drop; @@ -537,25 +475,17 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) switch (**nexthdr) { case NEXTHDR_HOP: - break; case NEXTHDR_ROUTING: - found_rhdr = 1; - break; case NEXTHDR_DEST: -#ifdef CONFIG_IPV6_MIP6 - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - break; -#endif - if (found_rhdr) - return offset; + if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1; + if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset; + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); break; default : return offset; } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); } return offset; @@ -796,14 +726,6 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) return err; } -static inline int ip6_rt_check(struct rt6key *rt_key, - struct in6_addr *fl_addr, - struct in6_addr *addr_cache) -{ - return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); -} - static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, struct flowi *fl) @@ -819,8 +741,8 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * that we do not support routing by source, TOS, * and MSG_DONTROUTE --ANK (980726) * - * 1. ip6_rt_check(): If route was host route, - * check that cached destination is current. + * 1. If route was host route, check that + * cached destination is current. * If it is network route, we still may * check its validity using saved pointer * to the last used address: daddr_cache. @@ -831,11 +753,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || -#ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || -#endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + if (((rt->rt6i_dst.plen != 128 || + !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr)) + && (np->daddr_cache == NULL || + !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache))) + || (fl->oif && fl->oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -944,7 +866,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->h.raw = skb->data + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; skb->csum = 0; sk->sk_sndmsg_off = 0; } @@ -1041,7 +963,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { diff --git a/trunk/net/ipv6/ipcomp6.c b/trunk/net/ipv6/ipcomp6.c index ad9c6e824e62..a81e9e9d93bd 100644 --- a/trunk/net/ipv6/ipcomp6.c +++ b/trunk/net/ipv6/ipcomp6.c @@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = XFRM_MODE_TUNNEL; + t->props.mode = 1; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode == XFRM_MODE_TUNNEL) + if (x->props.mode) x->props.header_len += sizeof(struct ipv6hdr); mutex_lock(&ipcomp6_resource_mutex); @@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; @@ -461,7 +461,6 @@ static struct xfrm_type ipcomp6_type = .destructor = ipcomp6_destroy, .input = ipcomp6_input, .output = ipcomp6_output, - .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ipcomp6_protocol = diff --git a/trunk/net/ipv6/ipv6_sockglue.c b/trunk/net/ipv6/ipv6_sockglue.c index 4f3bb7fcc8b5..a5eaaf693abf 100644 --- a/trunk/net/ipv6/ipv6_sockglue.c +++ b/trunk/net/ipv6/ipv6_sockglue.c @@ -407,16 +407,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, /* routing header option needs extra check */ if (optname == IPV6_RTHDR && opt->srcrt) { struct ipv6_rt_hdr *rthdr = opt->srcrt; - switch (rthdr->type) { - case IPV6_SRCRT_TYPE_0: -#ifdef CONFIG_IPV6_MIP6 - case IPV6_SRCRT_TYPE_2: -#endif - break; - default: + if (rthdr->type) goto sticky_done; - } - if ((rthdr->hdrlen & 1) || (rthdr->hdrlen >> 1) != rthdr->segments_left) goto sticky_done; diff --git a/trunk/net/ipv6/ipv6_syms.c b/trunk/net/ipv6/ipv6_syms.c index 0e8e0676a033..dd4d1ce77769 100644 --- a/trunk/net/ipv6/ipv6_syms.c +++ b/trunk/net/ipv6/ipv6_syms.c @@ -14,6 +14,7 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); +EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); EXPORT_SYMBOL(inet6_register_protosw); @@ -30,8 +31,6 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); -EXPORT_SYMBOL(xfrm6_input_addr); -EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff --git a/trunk/net/ipv6/mcast.c b/trunk/net/ipv6/mcast.c index 3b114e3fa2f8..639eb20c9f1f 100644 --- a/trunk/net/ipv6/mcast.c +++ b/trunk/net/ipv6/mcast.c @@ -171,7 +171,7 @@ static int ip6_mc_leave_src(struct sock *sk, struct ipv6_mc_socklist *iml, #define IPV6_MLD_MAX_MSF 64 -int sysctl_mld_max_msf __read_mostly = IPV6_MLD_MAX_MSF; +int sysctl_mld_max_msf = IPV6_MLD_MAX_MSF; /* * socket join on multicast group diff --git a/trunk/net/ipv6/mip6.c b/trunk/net/ipv6/mip6.c deleted file mode 100644 index 99d116caecda..000000000000 --- a/trunk/net/ipv6/mip6.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Authors: - * Noriaki TAKAMIYA @USAGI - * Masahide NAKAMURA @USAGI - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) -{ - return x->coaddr; -} - -static inline unsigned int calc_padlen(unsigned int len, unsigned int n) -{ - return (n - len + 16) & 0x7; -} - -static inline void *mip6_padn(__u8 *data, __u8 padlen) -{ - if (!data) - return NULL; - if (padlen == 1) { - data[0] = MIP6_OPT_PAD_1; - } else if (padlen > 1) { - data[0] = MIP6_OPT_PAD_N; - data[1] = padlen - 2; - if (padlen > 2) - memset(data+2, 0, data[1]); - } - return data + padlen; -} - -static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) -{ - icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); -} - -static int mip6_mh_len(int type) -{ - int len = 0; - - switch (type) { - case IP6_MH_TYPE_BRR: - len = 0; - break; - case IP6_MH_TYPE_HOTI: - case IP6_MH_TYPE_COTI: - case IP6_MH_TYPE_BU: - case IP6_MH_TYPE_BACK: - len = 1; - break; - case IP6_MH_TYPE_HOT: - case IP6_MH_TYPE_COT: - case IP6_MH_TYPE_BERROR: - len = 2; - break; - } - return len; -} - -int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) -{ - struct ip6_mh *mh; - int mhlen; - - if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || - !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) - return -1; - - mh = (struct ip6_mh *)skb->h.raw; - - if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", - mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); - return -1; - } - mhlen = (mh->ip6mh_hdrlen + 1) << 3; - - if (skb->ip_summed == CHECKSUM_COMPLETE) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb->csum)) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); - skb->ip_summed = CHECKSUM_NONE; - } - } - if (skb->ip_summed == CHECKSUM_NONE) { - if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, - &skb->nh.ipv6h->daddr, - mhlen, IPPROTO_MH, - skb_checksum(skb, 0, mhlen, 0))) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed " - "[" NIP6_FMT " > " NIP6_FMT "]\n", - NIP6(skb->nh.ipv6h->saddr), - NIP6(skb->nh.ipv6h->daddr)); - return -1; - } - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - - if (mh->ip6mh_proto != IPPROTO_NONE) { - LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", - mh->ip6mh_proto); - mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); - return -1; - } - - return 0; -} - -struct mip6_report_rate_limiter { - spinlock_t lock; - struct timeval stamp; - int iif; - struct in6_addr src; - struct in6_addr dst; -}; - -static struct mip6_report_rate_limiter mip6_report_rl = { - .lock = SPIN_LOCK_UNLOCKED -}; - -static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; - - if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && - !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; - - return destopt->nexthdr; -} - -/* Destination Option Header is inserted. - * IP Header's src address is replaced with Home Address Option in - * Destination Option Header. - */ -static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - struct ipv6_destopt_hdr *dstopt; - struct ipv6_destopt_hao *hao; - u8 nexthdr; - int len; - - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); - - nexthdr = *skb->nh.raw; - *skb->nh.raw = IPPROTO_DSTOPTS; - - dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; - dstopt->nexthdr = nexthdr; - - hao = mip6_padn((char *)(dstopt + 1), - calc_padlen(sizeof(*dstopt), 6)); - - hao->type = IPV6_TLV_HAO; - hao->length = sizeof(*hao) - 2; - BUG_TRAP(hao->length == 16); - - len = ((char *)hao - (char *)dstopt) + sizeof(*hao); - - memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); - memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); - - BUG_TRAP(len == x->props.header_len); - dstopt->hdrlen = (x->props.header_len >> 3) - 1; - - return 0; -} - -static inline int mip6_report_rl_allow(struct timeval *stamp, - struct in6_addr *dst, - struct in6_addr *src, int iif) -{ - int allow = 0; - - spin_lock_bh(&mip6_report_rl.lock); - if (mip6_report_rl.stamp.tv_sec != stamp->tv_sec || - mip6_report_rl.stamp.tv_usec != stamp->tv_usec || - mip6_report_rl.iif != iif || - !ipv6_addr_equal(&mip6_report_rl.src, src) || - !ipv6_addr_equal(&mip6_report_rl.dst, dst)) { - mip6_report_rl.stamp.tv_sec = stamp->tv_sec; - mip6_report_rl.stamp.tv_usec = stamp->tv_usec; - mip6_report_rl.iif = iif; - ipv6_addr_copy(&mip6_report_rl.src, src); - ipv6_addr_copy(&mip6_report_rl.dst, dst); - allow = 1; - } - spin_unlock_bh(&mip6_report_rl.lock); - return allow; -} - -static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, struct flowi *fl) -{ - struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb; - struct ipv6_destopt_hao *hao = NULL; - struct xfrm_selector sel; - int offset; - struct timeval stamp; - int err = 0; - - if (unlikely(fl->proto == IPPROTO_MH && - fl->fl_mh_type <= IP6_MH_TYPE_MAX)) - goto out; - - if (likely(opt->dsthao)) { - offset = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO); - if (likely(offset >= 0)) - hao = (struct ipv6_destopt_hao *)(skb->nh.raw + offset); - } - - skb_get_timestamp(skb, &stamp); - - if (!mip6_report_rl_allow(&stamp, &skb->nh.ipv6h->daddr, - hao ? &hao->addr : &skb->nh.ipv6h->saddr, - opt->iif)) - goto out; - - memset(&sel, 0, sizeof(sel)); - memcpy(&sel.daddr, (xfrm_address_t *)&skb->nh.ipv6h->daddr, - sizeof(sel.daddr)); - sel.prefixlen_d = 128; - memcpy(&sel.saddr, (xfrm_address_t *)&skb->nh.ipv6h->saddr, - sizeof(sel.saddr)); - sel.prefixlen_s = 128; - sel.family = AF_INET6; - sel.proto = fl->proto; - sel.dport = xfrm_flowi_dport(fl); - if (sel.dport) - sel.dport_mask = ~((__u16)0); - sel.sport = xfrm_flowi_sport(fl); - if (sel.sport) - sel.sport_mask = ~((__u16)0); - sel.ifindex = fl->oif; - - err = km_report(IPPROTO_DSTOPTS, &sel, - (hao ? (xfrm_address_t *)&hao->addr : NULL)); - - out: - return err; -} - -static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); - unsigned int packet_len = skb->tail - skb->nh.raw; - int found_rhdr = 0; - - *nexthdr = &skb->nh.ipv6h->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: - /* - * HAO MUST NOT appear more than once. - * XXX: It is better to try to find by the end of - * XXX: packet if HAO exists. - */ - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { - LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); - return offset; - } - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); - } - - return offset; -} - -static int mip6_destopt_init_state(struct xfrm_state *x) -{ - if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, - x->id.spi); - return -EINVAL; - } - if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); - return -EINVAL; - } - - x->props.header_len = sizeof(struct ipv6_destopt_hdr) + - calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + - sizeof(struct ipv6_destopt_hao); - BUG_TRAP(x->props.header_len == 24); - - return 0; -} - -/* - * Do nothing about destroying since it has no specific operation for - * destination options header unlike IPsec protocols. - */ -static void mip6_destopt_destroy(struct xfrm_state *x) -{ -} - -static struct xfrm_type mip6_destopt_type = -{ - .description = "MIP6DESTOPT", - .owner = THIS_MODULE, - .proto = IPPROTO_DSTOPTS, - .flags = XFRM_TYPE_NON_FRAGMENT, - .init_state = mip6_destopt_init_state, - .destructor = mip6_destopt_destroy, - .input = mip6_destopt_input, - .output = mip6_destopt_output, - .reject = mip6_destopt_reject, - .hdr_offset = mip6_destopt_offset, - .local_addr = mip6_xfrm_addr, -}; - -static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) -{ - struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; - - if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && - !ipv6_addr_any((struct in6_addr *)x->coaddr)) - return -ENOENT; - - return rt2->rt_hdr.nexthdr; -} - -/* Routing Header type 2 is inserted. - * IP Header's dst address is replaced with Routing Header's Home Address. - */ -static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - struct rt2_hdr *rt2; - u8 nexthdr; - - iph = (struct ipv6hdr *)skb->data; - iph->payload_len = htons(skb->len - sizeof(*iph)); - - nexthdr = *skb->nh.raw; - *skb->nh.raw = IPPROTO_ROUTING; - - rt2 = (struct rt2_hdr *)skb->h.raw; - rt2->rt_hdr.nexthdr = nexthdr; - rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; - rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; - rt2->rt_hdr.segments_left = 1; - memset(&rt2->reserved, 0, sizeof(rt2->reserved)); - - BUG_TRAP(rt2->rt_hdr.hdrlen == 2); - - memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); - memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); - - return 0; -} - -static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, - u8 **nexthdr) -{ - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); - unsigned int packet_len = skb->tail - skb->nh.raw; - int found_rhdr = 0; - - *nexthdr = &skb->nh.ipv6h->nexthdr; - - while (offset + 1 <= packet_len) { - - switch (**nexthdr) { - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - if (offset + 3 <= packet_len) { - struct ipv6_rt_hdr *rt; - rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); - if (rt->type != 0) - return offset; - } - found_rhdr = 1; - break; - case NEXTHDR_DEST: - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - return offset; - - if (found_rhdr) - return offset; - - break; - default: - return offset; - } - - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); - } - - return offset; -} - -static int mip6_rthdr_init_state(struct xfrm_state *x) -{ - if (x->id.spi) { - printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, - x->id.spi); - return -EINVAL; - } - if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - printk(KERN_INFO "%s: state's mode is not %u: %u\n", - __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); - return -EINVAL; - } - - x->props.header_len = sizeof(struct rt2_hdr); - - return 0; -} - -/* - * Do nothing about destroying since it has no specific operation for routing - * header type 2 unlike IPsec protocols. - */ -static void mip6_rthdr_destroy(struct xfrm_state *x) -{ -} - -static struct xfrm_type mip6_rthdr_type = -{ - .description = "MIP6RT", - .owner = THIS_MODULE, - .proto = IPPROTO_ROUTING, - .flags = XFRM_TYPE_NON_FRAGMENT, - .init_state = mip6_rthdr_init_state, - .destructor = mip6_rthdr_destroy, - .input = mip6_rthdr_input, - .output = mip6_rthdr_output, - .hdr_offset = mip6_rthdr_offset, - .remote_addr = mip6_xfrm_addr, -}; - -int __init mip6_init(void) -{ - printk(KERN_INFO "Mobile IPv6\n"); - - if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); - goto mip6_destopt_xfrm_fail; - } - if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { - printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); - goto mip6_rthdr_xfrm_fail; - } - return 0; - - mip6_rthdr_xfrm_fail: - xfrm_unregister_type(&mip6_destopt_type, AF_INET6); - mip6_destopt_xfrm_fail: - return -EAGAIN; -} - -void __exit mip6_fini(void) -{ - if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); - if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) - printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); -} diff --git a/trunk/net/ipv6/ndisc.c b/trunk/net/ipv6/ndisc.c index 0304b5fe8d6a..b50055b9278d 100644 --- a/trunk/net/ipv6/ndisc.c +++ b/trunk/net/ipv6/ndisc.c @@ -62,7 +62,6 @@ #include #endif -#include #include #include #include @@ -412,8 +411,7 @@ static void pndisc_destructor(struct pneigh_entry *n) */ static inline void ndisc_flow_init(struct flowi *fl, u8 type, - struct in6_addr *saddr, struct in6_addr *daddr, - int oif) + struct in6_addr *saddr, struct in6_addr *daddr) { memset(fl, 0, sizeof(*fl)); ipv6_addr_copy(&fl->fl6_src, saddr); @@ -421,8 +419,6 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; - fl->oif = oif; - security_sk_classify_flow(ndisc_socket->sk, fl); } static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, @@ -454,8 +450,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, src_addr = &tmpaddr; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr, - dev->ifindex); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -496,7 +491,7 @@ static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, msg->icmph.icmp6_unused = 0; msg->icmph.icmp6_router = router; msg->icmph.icmp6_solicited = solicited; - msg->icmph.icmp6_override = override; + msg->icmph.icmp6_override = !!override; /* Set the target address. */ ipv6_addr_copy(&msg->target, solicited_addr); @@ -545,8 +540,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, saddr = &addr_buf; } - ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr, - dev->ifindex); + ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr); dst = ndisc_dst_alloc(dev, neigh, daddr, ip6_output); if (!dst) @@ -621,8 +615,7 @@ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr, int len; int err; - ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr, - dev->ifindex); + ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr); dst = ndisc_dst_alloc(dev, NULL, daddr, ip6_output); if (!dst) @@ -736,10 +729,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; - struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; - int is_router; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -824,9 +815,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && - (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) && - (pneigh = pneigh_lookup(&nd_tbl, - &msg->target, dev, 0)) != NULL)) { + pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -847,14 +836,12 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } - is_router = !!(pneigh ? pneigh->flags & NTF_ROUTER : idev->cnf.forwarding); - if (dad) { struct in6_addr maddr; ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &msg->target, - is_router, 0, (ifp != NULL), 1); + idev->cnf.forwarding, 0, (ifp != NULL), 1); goto out; } @@ -875,7 +862,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->hard_header) { ndisc_send_na(dev, neigh, saddr, &msg->target, - is_router, + idev->cnf.forwarding, 1, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); @@ -958,20 +945,6 @@ static void ndisc_recv_na(struct sk_buff *skb) if (neigh->nud_state & NUD_FAILED) goto out; - /* - * Don't update the neighbor cache entry on a proxy NA from - * ourselves because either the proxied node is off link or it - * has already sent a NA to us. - */ - if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) && - ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) { - /* XXX: idev->cnf.prixy_ndp */ - WARN_ON(skb->dst != NULL && - ((struct rt6_info *)skb->dst)->rt6i_idev); - goto out; - } - neigh_update(neigh, lladdr, msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE, NEIGH_UPDATE_F_WEAK_OVERRIDE| @@ -986,7 +959,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct rt6_info *rt; rt = rt6_get_dflt_router(saddr, dev); if (rt) - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); } out: @@ -1139,7 +1112,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt && lifetime == 0) { neigh_clone(neigh); - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); rt = NULL; } @@ -1371,8 +1344,7 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); if (neigh) { - rt6_redirect(dest, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr, neigh, lladdr, + rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, on_link); neigh_release(neigh); } @@ -1408,8 +1380,7 @@ void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh, return; } - ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr, - dev->ifindex); + ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr); dst = ip6_route_output(NULL, &fl); if (dst == NULL) diff --git a/trunk/net/ipv6/netfilter.c b/trunk/net/ipv6/netfilter.c index 580b1aba6722..395a417ba955 100644 --- a/trunk/net/ipv6/netfilter.c +++ b/trunk/net/ipv6/netfilter.c @@ -87,7 +87,7 @@ unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int csum = 0; switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: + case CHECKSUM_HW: if (hook != NF_IP6_PRE_ROUTING && hook != NF_IP6_LOCAL_IN) break; if (!csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, diff --git a/trunk/net/ipv6/netfilter/Makefile b/trunk/net/ipv6/netfilter/Makefile index ac1dfebde175..eeeb57d4c9c5 100644 --- a/trunk/net/ipv6/netfilter/Makefile +++ b/trunk/net/ipv6/netfilter/Makefile @@ -5,7 +5,7 @@ # Link order matters here. obj-$(CONFIG_IP6_NF_IPTABLES) += ip6_tables.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o -obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o +obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o ip6t_dst.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/trunk/net/ipv6/netfilter/ip6_queue.c b/trunk/net/ipv6/netfilter/ip6_queue.c index 9510c24ca8d2..968a14be0d05 100644 --- a/trunk/net/ipv6/netfilter/ip6_queue.c +++ b/trunk/net/ipv6/netfilter/ip6_queue.c @@ -56,15 +56,15 @@ struct ipq_queue_entry { typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); -static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE; -static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT; +static unsigned char copy_mode = IPQ_COPY_NONE; +static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; static DEFINE_RWLOCK(queue_lock); -static int peer_pid __read_mostly; -static unsigned int copy_range __read_mostly; +static int peer_pid; +static unsigned int copy_range; static unsigned int queue_total; static unsigned int queue_dropped = 0; static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl __read_mostly; +static struct sock *ipqnl; static LIST_HEAD(queue_list); static DEFINE_MUTEX(ipqnl_mutex); @@ -206,9 +206,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) break; case IPQ_COPY_PACKET: - if ((entry->skb->ip_summed == CHECKSUM_PARTIAL || - entry->skb->ip_summed == CHECKSUM_COMPLETE) && - (*errp = skb_checksum_help(entry->skb))) { + if (entry->skb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entry->skb, + entry->info->outdev == NULL))) { read_unlock_bh(&queue_lock); return NULL; } diff --git a/trunk/net/ipv6/netfilter/ip6_tables.c b/trunk/net/ipv6/netfilter/ip6_tables.c index 4ab368fa0b8f..c9d6b23cd3f7 100644 --- a/trunk/net/ipv6/netfilter/ip6_tables.c +++ b/trunk/net/ipv6/netfilter/ip6_tables.c @@ -70,6 +70,9 @@ do { \ #define IP_NF_ASSERT(x) #endif + +#include + #if 0 /* All the better to debug you with... */ #define static @@ -217,7 +220,8 @@ ip6t_error(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { if (net_ratelimit()) printk("ip6_tables: error: `%s'\n", (char *)targinfo); @@ -254,7 +258,8 @@ ip6t_do_table(struct sk_buff **pskb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct xt_table *table) + struct xt_table *table, + void *userdata) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); int offset = 0; @@ -344,7 +349,8 @@ ip6t_do_table(struct sk_buff **pskb, in, out, hook, t->u.kernel.target, - t->data); + t->data, + userdata); #ifdef CONFIG_NETFILTER_DEBUG if (((struct ip6t_entry *)table_base)->comefrom @@ -501,7 +507,8 @@ cleanup_match(struct ip6t_entry_match *m, unsigned int *i) return 1; if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->u.kernel.match, m->data); + m->u.kernel.match->destroy(m->u.kernel.match, m->data, + m->u.match_size - sizeof(*m)); module_put(m->u.kernel.match->me); return 0; } @@ -554,6 +561,7 @@ check_match(struct ip6t_entry_match *m, if (m->u.kernel.match->checkentry && !m->u.kernel.match->checkentry(name, ipv6, match, m->data, + m->u.match_size - sizeof(*m), hookmask)) { duprintf("ip_tables: check failed for `%s'.\n", m->u.kernel.match->name); @@ -610,10 +618,12 @@ check_entry(struct ip6t_entry *e, const char *name, unsigned int size, if (t->u.kernel.target == &ip6t_standard_target) { if (!standard_check(t, size)) { ret = -EINVAL; - goto err; + goto cleanup_matches; } } else if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(name, e, target, t->data, + t->u.target_size + - sizeof(*t), e->comefrom)) { duprintf("ip_tables: check failed for `%s'.\n", t->u.kernel.target->name); @@ -685,7 +695,8 @@ cleanup_entry(struct ip6t_entry *e, unsigned int *i) IP6T_MATCH_ITERATE(e, cleanup_match, NULL); t = ip6t_get_target(e); if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); + t->u.kernel.target->destroy(t->u.kernel.target, t->data, + t->u.target_size - sizeof(*t)); module_put(t->u.kernel.target->me); return 0; } @@ -1341,6 +1352,7 @@ icmp6_checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_icmp *icmpinfo = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_HL.c b/trunk/net/ipv6/netfilter/ip6t_HL.c index 435750f664dd..b8eff8ee69b1 100644 --- a/trunk/net/ipv6/netfilter/ip6t_HL.c +++ b/trunk/net/ipv6/netfilter/ip6t_HL.c @@ -22,10 +22,11 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { struct ipv6hdr *ip6h; const struct ip6t_HL_info *info = targinfo; + u_int16_t diffs[2]; int new_hl; if (!skb_make_writable(pskb, (*pskb)->len)) @@ -52,8 +53,11 @@ static unsigned int ip6t_hl_target(struct sk_buff **pskb, break; } - if (new_hl != ip6h->hop_limit) + if (new_hl != ip6h->hop_limit) { + diffs[0] = htons(((unsigned)ip6h->hop_limit) << 8) ^ 0xFFFF; ip6h->hop_limit = new_hl; + diffs[1] = htons(((unsigned)ip6h->hop_limit) << 8); + } return IP6T_CONTINUE; } @@ -62,6 +66,7 @@ static int ip6t_hl_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct ip6t_HL_info *info = targinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_LOG.c b/trunk/net/ipv6/netfilter/ip6t_LOG.c index 0cf537d30185..73c6300109d6 100644 --- a/trunk/net/ipv6/netfilter/ip6t_LOG.c +++ b/trunk/net/ipv6/netfilter/ip6t_LOG.c @@ -427,7 +427,8 @@ ip6t_log_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ip6t_log_info *loginfo = targinfo; struct nf_loginfo li; @@ -451,6 +452,7 @@ static int ip6t_log_checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_log_info *loginfo = targinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_REJECT.c b/trunk/net/ipv6/netfilter/ip6t_REJECT.c index 311eae82feb3..8629ba195d2d 100644 --- a/trunk/net/ipv6/netfilter/ip6t_REJECT.c +++ b/trunk/net/ipv6/netfilter/ip6t_REJECT.c @@ -96,7 +96,6 @@ static void send_reset(struct sk_buff *oldskb) ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; - security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; @@ -180,7 +179,8 @@ static unsigned int reject6_target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct ip6t_reject_info *reject = targinfo; @@ -223,6 +223,7 @@ static int check(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { const struct ip6t_reject_info *rejinfo = targinfo; @@ -255,7 +256,9 @@ static struct ip6t_target ip6t_reject_reg = { static int __init ip6t_reject_init(void) { - return ip6t_register_target(&ip6t_reject_reg); + if (ip6t_register_target(&ip6t_reject_reg)) + return -EINVAL; + return 0; } static void __exit ip6t_reject_fini(void) diff --git a/trunk/net/ipv6/netfilter/ip6t_ah.c b/trunk/net/ipv6/netfilter/ip6t_ah.c index ec1b1608156c..2f7bb20c758b 100644 --- a/trunk/net/ipv6/netfilter/ip6t_ah.c +++ b/trunk/net/ipv6/netfilter/ip6t_ah.c @@ -102,6 +102,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_ah *ahinfo = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_dst.c b/trunk/net/ipv6/netfilter/ip6t_dst.c new file mode 100644 index 000000000000..9422413d0571 --- /dev/null +++ b/trunk/net/ipv6/netfilter/ip6t_dst.c @@ -0,0 +1,220 @@ +/* Kernel module to match Hop-by-Hop and Destination parameters. */ + +/* (C) 2001-2002 Andras Kis-Szabo + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define HOPBYHOP 0 + +MODULE_LICENSE("GPL"); +#if HOPBYHOP +MODULE_DESCRIPTION("IPv6 HbH match"); +#else +MODULE_DESCRIPTION("IPv6 DST match"); +#endif +MODULE_AUTHOR("Andras Kis-Szabo "); + +#if 0 +#define DEBUGP printk +#else +#define DEBUGP(format, args...) +#endif + +/* + * (Type & 0xC0) >> 6 + * 0 -> ignorable + * 1 -> must drop the packet + * 2 -> send ICMP PARM PROB regardless and drop packet + * 3 -> Send ICMP if not a multicast address and drop packet + * (Type & 0x20) >> 5 + * 0 -> invariant + * 1 -> can change the routing + * (Type & 0x1F) Type + * 0 -> Pad1 (only 1 byte!) + * 1 -> PadN LENGTH info (total length = length + 2) + * C0 | 2 -> JUMBO 4 x x x x ( xxxx > 64k ) + * 5 -> RTALERT 2 x x + */ + +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + struct ipv6_opt_hdr _optsh, *oh; + const struct ip6t_opts *optinfo = matchinfo; + unsigned int temp; + unsigned int ptr; + unsigned int hdrlen = 0; + unsigned int ret = 0; + u8 _opttype, *tp = NULL; + u8 _optlen, *lp = NULL; + unsigned int optlen; + +#if HOPBYHOP + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) +#else + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) +#endif + return 0; + + oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); + if (oh == NULL) { + *hotdrop = 1; + return 0; + } + + hdrlen = ipv6_optlen(oh); + if (skb->len - ptr < hdrlen) { + /* Packet smaller than it's length field */ + return 0; + } + + DEBUGP("IPv6 OPTS LEN %u %u ", hdrlen, oh->hdrlen); + + DEBUGP("len %02X %04X %02X ", + optinfo->hdrlen, hdrlen, + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN)))); + + ret = (oh != NULL) && + (!(optinfo->flags & IP6T_OPTS_LEN) || + ((optinfo->hdrlen == hdrlen) ^ + !!(optinfo->invflags & IP6T_OPTS_INV_LEN))); + + ptr += 2; + hdrlen -= 2; + if (!(optinfo->flags & IP6T_OPTS_OPTS)) { + return ret; + } else if (optinfo->flags & IP6T_OPTS_NSTRICT) { + DEBUGP("Not strict - not implemented"); + } else { + DEBUGP("Strict "); + DEBUGP("#%d ", optinfo->optsnr); + for (temp = 0; temp < optinfo->optsnr; temp++) { + /* type field exists ? */ + if (hdrlen < 1) + break; + tp = skb_header_pointer(skb, ptr, sizeof(_opttype), + &_opttype); + if (tp == NULL) + break; + + /* Type check */ + if (*tp != (optinfo->opts[temp] & 0xFF00) >> 8) { + DEBUGP("Tbad %02X %02X\n", + *tp, + (optinfo->opts[temp] & 0xFF00) >> 8); + return 0; + } else { + DEBUGP("Tok "); + } + /* Length check */ + if (*tp) { + u16 spec_len; + + /* length field exists ? */ + if (hdrlen < 2) + break; + lp = skb_header_pointer(skb, ptr + 1, + sizeof(_optlen), + &_optlen); + if (lp == NULL) + break; + spec_len = optinfo->opts[temp] & 0x00FF; + + if (spec_len != 0x00FF && spec_len != *lp) { + DEBUGP("Lbad %02X %04X\n", *lp, + spec_len); + return 0; + } + DEBUGP("Lok "); + optlen = *lp + 2; + } else { + DEBUGP("Pad1\n"); + optlen = 1; + } + + /* Step to the next */ + DEBUGP("len%04X \n", optlen); + + if ((ptr > skb->len - optlen || hdrlen < optlen) && + (temp < optinfo->optsnr - 1)) { + DEBUGP("new pointer is too large! \n"); + break; + } + ptr += optlen; + hdrlen -= optlen; + } + if (temp == optinfo->optsnr) + return ret; + else + return 0; + } + + return 0; +} + +/* Called when user tries to insert an entry of this type. */ +static int +checkentry(const char *tablename, + const void *info, + const struct xt_match *match, + void *matchinfo, + unsigned int matchinfosize, + unsigned int hook_mask) +{ + const struct ip6t_opts *optsinfo = matchinfo; + + if (optsinfo->invflags & ~IP6T_OPTS_INV_MASK) { + DEBUGP("ip6t_opts: unknown flags %X\n", optsinfo->invflags); + return 0; + } + return 1; +} + +static struct ip6t_match opts_match = { +#if HOPBYHOP + .name = "hbh", +#else + .name = "dst", +#endif + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, +}; + +static int __init ip6t_dst_init(void) +{ + return ip6t_register_match(&opts_match); +} + +static void __exit ip6t_dst_fini(void) +{ + ip6t_unregister_match(&opts_match); +} + +module_init(ip6t_dst_init); +module_exit(ip6t_dst_fini); diff --git a/trunk/net/ipv6/netfilter/ip6t_frag.c b/trunk/net/ipv6/netfilter/ip6t_frag.c index 78d9c8b9e28a..06768c84bd31 100644 --- a/trunk/net/ipv6/netfilter/ip6t_frag.c +++ b/trunk/net/ipv6/netfilter/ip6t_frag.c @@ -119,6 +119,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_frag *fraginfo = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_hbh.c b/trunk/net/ipv6/netfilter/ip6t_hbh.c index d32a205e3af2..374f1be85c0d 100644 --- a/trunk/net/ipv6/netfilter/ip6t_hbh.c +++ b/trunk/net/ipv6/netfilter/ip6t_hbh.c @@ -19,10 +19,15 @@ #include #include +#define HOPBYHOP 1 + MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("IPv6 opts match"); +#if HOPBYHOP +MODULE_DESCRIPTION("IPv6 HbH match"); +#else +MODULE_DESCRIPTION("IPv6 DST match"); +#endif MODULE_AUTHOR("Andras Kis-Szabo "); -MODULE_ALIAS("ip6t_dst"); #if 0 #define DEBUGP printk @@ -66,7 +71,11 @@ match(const struct sk_buff *skb, u8 _optlen, *lp = NULL; unsigned int optlen; - if (ipv6_find_hdr(skb, &ptr, match->data, NULL) < 0) +#if HOPBYHOP + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_HOP, NULL) < 0) +#else + if (ipv6_find_hdr(skb, &ptr, NEXTHDR_DEST, NULL) < 0) +#endif return 0; oh = skb_header_pointer(skb, ptr, sizeof(_optsh), &_optsh); @@ -173,6 +182,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_opts *optsinfo = matchinfo; @@ -184,35 +194,26 @@ checkentry(const char *tablename, return 1; } -static struct xt_match opts_match[] = { - { - .name = "hbh", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, - .data = NEXTHDR_HOP, - }, - { - .name = "dst", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct ip6t_opts), - .checkentry = checkentry, - .me = THIS_MODULE, - .data = NEXTHDR_DEST, - }, +static struct ip6t_match opts_match = { +#if HOPBYHOP + .name = "hbh", +#else + .name = "dst", +#endif + .match = match, + .matchsize = sizeof(struct ip6t_opts), + .checkentry = checkentry, + .me = THIS_MODULE, }; static int __init ip6t_hbh_init(void) { - return xt_register_matches(opts_match, ARRAY_SIZE(opts_match)); + return ip6t_register_match(&opts_match); } static void __exit ip6t_hbh_fini(void) { - xt_unregister_matches(opts_match, ARRAY_SIZE(opts_match)); + ip6t_unregister_match(&opts_match); } module_init(ip6t_hbh_init); diff --git a/trunk/net/ipv6/netfilter/ip6t_ipv6header.c b/trunk/net/ipv6/netfilter/ip6t_ipv6header.c index 3093c398002f..9375eeb1369f 100644 --- a/trunk/net/ipv6/netfilter/ip6t_ipv6header.c +++ b/trunk/net/ipv6/netfilter/ip6t_ipv6header.c @@ -128,6 +128,7 @@ ipv6header_checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ipv6header_info *info = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_owner.c b/trunk/net/ipv6/netfilter/ip6t_owner.c index 4eb9bbc4ebc3..5d047990cd44 100644 --- a/trunk/net/ipv6/netfilter/ip6t_owner.c +++ b/trunk/net/ipv6/netfilter/ip6t_owner.c @@ -57,6 +57,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_owner_info *info = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6t_rt.c b/trunk/net/ipv6/netfilter/ip6t_rt.c index bcb2e168a5bc..fbb0184a41d8 100644 --- a/trunk/net/ipv6/netfilter/ip6t_rt.c +++ b/trunk/net/ipv6/netfilter/ip6t_rt.c @@ -197,6 +197,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct ip6t_rt *rtinfo = matchinfo; diff --git a/trunk/net/ipv6/netfilter/ip6table_filter.c b/trunk/net/ipv6/netfilter/ip6table_filter.c index 2fc07c74decf..60976c0c58e8 100644 --- a/trunk/net/ipv6/netfilter/ip6table_filter.c +++ b/trunk/net/ipv6/netfilter/ip6table_filter.c @@ -108,7 +108,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); } static unsigned int @@ -128,7 +128,7 @@ ip6t_local_out_hook(unsigned int hook, } #endif - return ip6t_do_table(pskb, hook, in, out, &packet_filter); + return ip6t_do_table(pskb, hook, in, out, &packet_filter, NULL); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/trunk/net/ipv6/netfilter/ip6table_mangle.c b/trunk/net/ipv6/netfilter/ip6table_mangle.c index 386ea260e767..03a13eab1dae 100644 --- a/trunk/net/ipv6/netfilter/ip6table_mangle.c +++ b/trunk/net/ipv6/netfilter/ip6table_mangle.c @@ -138,7 +138,7 @@ ip6t_route_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_mangler); + return ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); } static unsigned int @@ -174,14 +174,18 @@ ip6t_local_hook(unsigned int hook, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *) (*pskb)->nh.ipv6h); - ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler); + ret = ip6t_do_table(pskb, hook, in, out, &packet_mangler, NULL); if (ret != NF_DROP && ret != NF_STOLEN && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) || (*pskb)->nfmark != nfmark - || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) - return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; + || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { + + /* something which could affect routing has changed */ + + DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); + } return ret; } diff --git a/trunk/net/ipv6/netfilter/ip6table_raw.c b/trunk/net/ipv6/netfilter/ip6table_raw.c index b4154da575c0..61a7c58e99f8 100644 --- a/trunk/net/ipv6/netfilter/ip6table_raw.c +++ b/trunk/net/ipv6/netfilter/ip6table_raw.c @@ -122,7 +122,7 @@ ip6t_hook(unsigned int hook, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return ip6t_do_table(pskb, hook, in, out, &packet_raw); + return ip6t_do_table(pskb, hook, in, out, &packet_raw, NULL); } static struct nf_hook_ops ip6t_ops[] = { diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index e5e53fff9e38..c2ab38ff46af 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -335,7 +335,7 @@ static struct nf_hook_ops ipv6_conntrack_ops[] = { /* From nf_conntrack_proto_icmpv6.c */ extern unsigned int nf_ct_icmpv6_timeout; -/* From nf_conntrack_reasm.c */ +/* From nf_conntrack_frag6.c */ extern unsigned int nf_ct_frag6_timeout; extern unsigned int nf_ct_frag6_low_thresh; extern unsigned int nf_ct_frag6_high_thresh; diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/trunk/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index 34d447208ffd..ef18a7b7014b 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -33,7 +33,7 @@ #include #include -unsigned long nf_ct_icmpv6_timeout __read_mostly = 30*HZ; +unsigned long nf_ct_icmpv6_timeout = 30*HZ; #if 0 #define DEBUGP printk diff --git a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c index bf93c1ea6be9..00d5583807f7 100644 --- a/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/trunk/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -54,9 +54,9 @@ #define NF_CT_FRAG6_LOW_THRESH 196608 /* == 192*1024 */ #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT -unsigned int nf_ct_frag6_high_thresh __read_mostly = 256*1024; -unsigned int nf_ct_frag6_low_thresh __read_mostly = 192*1024; -unsigned long nf_ct_frag6_timeout __read_mostly = IPV6_FRAG_TIMEOUT; +unsigned int nf_ct_frag6_high_thresh = 256*1024; +unsigned int nf_ct_frag6_low_thresh = 192*1024; +unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT; struct nf_ct_frag6_skb_cb { @@ -408,7 +408,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, return -1; } - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (skb->ip_summed == CHECKSUM_HW) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr + 1) - skb->nh.raw, @@ -640,7 +640,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) + else if (head->ip_summed == CHECKSUM_HW) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &nf_ct_frag6_mem); @@ -652,7 +652,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->nh.ipv6h->payload_len = htons(payload_len); /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) + if (head->ip_summed == CHECKSUM_HW) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); fq->fragments = NULL; diff --git a/trunk/net/ipv6/raw.c b/trunk/net/ipv6/raw.c index d09329ca3267..15b862d8acab 100644 --- a/trunk/net/ipv6/raw.c +++ b/trunk/net/ipv6/raw.c @@ -50,9 +50,6 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 -#include -#endif #include #include @@ -172,32 +169,8 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { - int filtered; - delivered = 1; - switch (nexthdr) { - case IPPROTO_ICMPV6: - filtered = icmpv6_filter(sk, skb); - break; -#ifdef CONFIG_IPV6_MIP6 - case IPPROTO_MH: - /* XXX: To validate MH only once for each packet, - * this is placed here. It should be after checking - * xfrm policy, however it doesn't. The checking xfrm - * policy is placed in rawv6_rcv() because it is - * required for each socket. - */ - filtered = mip6_mh_filter(sk, skb); - break; -#endif - default: - filtered = 0; - break; - } - - if (filtered < 0) - break; - if (filtered == 0) { + if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -361,7 +334,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) if (!rp->checksum) skb->ip_summed = CHECKSUM_UNNECESSARY; - if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (skb->ip_summed == CHECKSUM_HW) { skb_postpull_rcsum(skb, skb->nh.raw, skb->h.raw - skb->nh.raw); if (!csum_ipv6_magic(&skb->nh.ipv6h->saddr, @@ -609,9 +582,6 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) struct iovec *iov; u8 __user *type = NULL; u8 __user *code = NULL; -#ifdef CONFIG_IPV6_MIP6 - u8 len = 0; -#endif int probed = 0; int i; @@ -643,20 +613,6 @@ static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg) probed = 1; } break; -#ifdef CONFIG_IPV6_MIP6 - case IPPROTO_MH: - if (iov->iov_base && iov->iov_len < 1) - break; - /* check if type field is readable or not. */ - if (iov->iov_len > 2 - len) { - u8 __user *p = iov->iov_base; - get_user(fl->fl_mh_type, &p[2 - len]); - probed = 1; - } else - len += iov->iov_len; - - break; -#endif default: probed = 1; break; @@ -803,7 +759,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; - security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/trunk/net/ipv6/reassembly.c b/trunk/net/ipv6/reassembly.c index f39bbedd1327..4e299c69e1c6 100644 --- a/trunk/net/ipv6/reassembly.c +++ b/trunk/net/ipv6/reassembly.c @@ -53,10 +53,10 @@ #include #include -int sysctl_ip6frag_high_thresh __read_mostly = 256*1024; -int sysctl_ip6frag_low_thresh __read_mostly = 192*1024; +int sysctl_ip6frag_high_thresh = 256*1024; +int sysctl_ip6frag_low_thresh = 192*1024; -int sysctl_ip6frag_time __read_mostly = IPV6_FRAG_TIMEOUT; +int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; struct ip6frag_skb_cb { @@ -152,7 +152,7 @@ static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, } static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval __read_mostly = 10 * 60 * HZ; +int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; static void ip6_frag_secret_rebuild(unsigned long dummy) { @@ -433,7 +433,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return; } - if (skb->ip_summed == CHECKSUM_COMPLETE) + if (skb->ip_summed == CHECKSUM_HW) skb->csum = csum_sub(skb->csum, csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); @@ -647,7 +647,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, head->len += fp->len; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_COMPLETE) + else if (head->ip_summed == CHECKSUM_HW) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; atomic_sub(fp->truesize, &ip6_frag_mem); @@ -662,7 +662,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, *skb_in = head; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_COMPLETE) + if (head->ip_summed == CHECKSUM_HW) head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); diff --git a/trunk/net/ipv6/route.c b/trunk/net/ipv6/route.c index d6b4b4f48d18..d9baca062d24 100644 --- a/trunk/net/ipv6/route.c +++ b/trunk/net/ipv6/route.c @@ -22,8 +22,6 @@ * routers in REACHABLE, STALE, DELAY or PROBE states). * - always select the same router if it is (probably) * reachable. otherwise, round-robin the list. - * Ville Nuorvala - * Fixed routing subtrees. */ #include @@ -37,6 +35,7 @@ #include #include #include +#include #include #ifdef CONFIG_PROC_FS @@ -55,7 +54,6 @@ #include #include #include -#include #include @@ -76,6 +74,9 @@ #define CLONE_OFFLINK_ROUTE 0 +#define RT6_SELECT_F_IFACE 0x1 +#define RT6_SELECT_F_REACHABLE 0x2 + static int ip6_rt_max_size = 4096; static int ip6_rt_gc_min_interval = HZ / 2; static int ip6_rt_gc_timeout = 60*HZ; @@ -139,49 +140,15 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - -struct rt6_info ip6_prohibit_entry = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .dev = &loopback_dev, - .obsolete = -1, - .error = -EACCES, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_discard, - .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_prohibit_entry, - } - }, - .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), +struct fib6_node ip6_routing_table = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, }; -struct rt6_info ip6_blk_hole_entry = { - .u = { - .dst = { - .__refcnt = ATOMIC_INIT(1), - .__use = 1, - .dev = &loopback_dev, - .obsolete = -1, - .error = -EINVAL, - .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, - .input = ip6_pkt_discard, - .output = ip6_pkt_discard_out, - .ops = &ip6_dst_ops, - .path = (struct dst_entry*)&ip6_blk_hole_entry, - } - }, - .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), - .rt6i_metric = ~(u32) 0, - .rt6i_ref = ATOMIC_INIT(1), -}; +/* Protects all the ip6 fib */ + +DEFINE_RWLOCK(rt6_lock); -#endif /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) @@ -221,14 +188,8 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) time_after(jiffies, rt->rt6i_expires)); } -static inline int rt6_need_strict(struct in6_addr *daddr) -{ - return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); -} - /* - * Route lookup. Any table->tb6_lock is implied. + * Route lookup. Any rt6_lock is implied. */ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, @@ -337,7 +298,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int m, n; m = rt6_check_dev(rt, oif); - if (!m && (strict & RT6_LOOKUP_F_IFACE)) + if (!m && (strict & RT6_SELECT_F_IFACE)) return -1; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; @@ -345,7 +306,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, n = rt6_check_neigh(rt); if (n > 1) m |= 16; - else if (!n && strict & RT6_LOOKUP_F_REACHABLE) + else if (!n && strict & RT6_SELECT_F_REACHABLE) return -1; return m; } @@ -385,7 +346,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, } if (!match && - (strict & RT6_LOOKUP_F_REACHABLE) && + (strict & RT6_SELECT_F_REACHABLE) && last && last != rt0) { /* no entries matched; do round-robin */ static DEFINE_SPINLOCK(lock); @@ -456,7 +417,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); if (rt && !lifetime) { - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); rt = NULL; } @@ -480,95 +441,44 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -#define BACKTRACK(saddr) \ -do { \ - if (rt == &ip6_null_entry) { \ - struct fib6_node *pn; \ - while (fn) { \ - if (fn->fn_flags & RTN_TL_ROOT) \ - goto out; \ - pn = fn->parent; \ - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \ - fn = fib6_lookup(pn->subtree, NULL, saddr); \ - else \ - fn = pn; \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ - } \ -} while(0) - -static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, - struct flowi *fl, int flags) +struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, + int oif, int strict) { struct fib6_node *fn; struct rt6_info *rt; - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); -restart: - rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags); - BACKTRACK(&fl->fl6_src); -out: + read_lock_bh(&rt6_lock); + fn = fib6_lookup(&ip6_routing_table, daddr, saddr); + rt = rt6_device_match(fn->leaf, oif, strict); dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); - - rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; + read_unlock_bh(&rt6_lock); - return rt; - -} - -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) -{ - struct flowi fl = { - .oif = oif, - .nl_u = { - .ip6_u = { - .daddr = *daddr, - /* TODO: saddr */ - }, - }, - }; - struct dst_entry *dst; - int flags = strict ? RT6_LOOKUP_F_IFACE : 0; - - dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); - if (dst->error == 0) - return (struct rt6_info *) dst; - - dst_release(dst); - + rt->u.dst.lastuse = jiffies; + if (rt->u.dst.error == 0) + return rt; + dst_release(&rt->u.dst); return NULL; } -/* ip6_ins_rt is called with FREE table->tb6_lock. +/* ip6_ins_rt is called with FREE rt6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; - struct fib6_table *table; - table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info); - write_unlock_bh(&table->tb6_lock); + write_lock_bh(&rt6_lock); + err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); + write_unlock_bh(&rt6_lock); return err; } -int ip6_ins_rt(struct rt6_info *rt) -{ - return __ip6_ins_rt(rt, NULL); -} - static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -622,39 +532,51 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, - struct flowi *fl, int flags) +#define BACKTRACK() \ +if (rt == &ip6_null_entry) { \ + while ((fn = fn->parent) != NULL) { \ + if (fn->fn_flags & RTN_ROOT) { \ + goto out; \ + } \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ + } \ +} + + +void ip6_route_input(struct sk_buff *skb) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict = 0; + int strict; int attempts = 3; int err; - int reachable = RT6_LOOKUP_F_REACHABLE; + int reachable = RT6_SELECT_F_REACHABLE; - strict |= flags & RT6_LOOKUP_F_IFACE; + strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; relookup: - read_lock_bh(&table->tb6_lock); + read_lock_bh(&rt6_lock); restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr); restart: - rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); - BACKTRACK(&fl->fl6_src); + rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); + BACKTRACK(); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); + nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); else { #if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &fl->fl6_dst); + nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); #else goto out2; #endif @@ -665,7 +587,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt); + err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); if (!err) goto out2; } @@ -674,7 +596,7 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, goto out2; /* - * Race condition! In the gap, when table->tb6_lock was + * Race condition! In the gap, when rt6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -686,63 +608,40 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - - return rt; -} - -void ip6_route_input(struct sk_buff *skb) -{ - struct ipv6hdr *iph = skb->nh.ipv6h; - struct flowi fl = { - .iif = skb->dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = iph->saddr, -#ifdef CONFIG_IPV6_ROUTE_FWMARK - .fwmark = skb->nfmark, -#endif - .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, - }, - }, - .proto = iph->nexthdr, - }; - int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0; - - skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); + skb->dst = (struct dst_entry *) rt; + return; } -static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, - struct flowi *fl, int flags) +struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict = 0; + int strict; int attempts = 3; int err; - int reachable = RT6_LOOKUP_F_REACHABLE; + int reachable = RT6_SELECT_F_REACHABLE; - strict |= flags & RT6_LOOKUP_F_IFACE; + strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; relookup: - read_lock_bh(&table->tb6_lock); + read_lock_bh(&rt6_lock); restart_2: - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); restart: rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); - BACKTRACK(&fl->fl6_src); + BACKTRACK(); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); @@ -759,7 +658,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt); + err = ip6_ins_rt(nrt, NULL, NULL, NULL); if (!err) goto out2; } @@ -768,7 +667,7 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, goto out2; /* - * Race condition! In the gap, when table->tb6_lock was + * Race condition! In the gap, when rt6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -780,21 +679,11 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - return rt; -} - -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) -{ - int flags = 0; - - if (rt6_need_strict(&fl->fl6_dst)) - flags |= RT6_LOOKUP_F_IFACE; - - return fib6_rule_lookup(fl, flags, ip6_pol_route_output); + return &rt->u.dst; } @@ -820,7 +709,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); else dst_release(dst); } @@ -858,6 +747,8 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu) } } +/* Protected by rt6_lock. */ +static struct dst_entry *ndisc_dst_gc_list; static int ipv6_get_mtu(struct net_device *dev); static inline unsigned int ipv6_advmss(unsigned int mtu) @@ -878,9 +769,6 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) return mtu; } -static struct dst_entry *ndisc_dst_gc_list; -static DEFINE_SPINLOCK(ndisc_lock); - struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, struct in6_addr *addr, @@ -921,10 +809,10 @@ struct dst_entry *ndisc_dst_alloc(struct net_device *dev, rt->rt6i_dst.plen = 128; #endif - spin_lock_bh(&ndisc_lock); + write_lock_bh(&rt6_lock); rt->u.dst.next = ndisc_dst_gc_list; ndisc_dst_gc_list = &rt->u.dst; - spin_unlock_bh(&ndisc_lock); + write_unlock_bh(&rt6_lock); fib6_force_start_gc(); @@ -938,11 +826,8 @@ int ndisc_dst_gc(int *more) int freed; next = NULL; - freed = 0; - - spin_lock_bh(&ndisc_lock); pprev = &ndisc_dst_gc_list; - + freed = 0; while ((dst = *pprev) != NULL) { if (!atomic_read(&dst->__refcnt)) { *pprev = dst->next; @@ -954,8 +839,6 @@ int ndisc_dst_gc(int *more) } } - spin_unlock_bh(&ndisc_lock); - return freed; } @@ -1016,24 +899,28 @@ int ipv6_get_hoplimit(struct net_device *dev) * */ -int ip6_route_add(struct fib6_config *cfg) +int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; + struct rtmsg *r; + struct rtattr **rta; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; - struct fib6_table *table; int addr_type; - if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) + rta = (struct rtattr **) _rtattr; + + if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) return -EINVAL; #ifndef CONFIG_IPV6_SUBTREES - if (cfg->fc_src_len) + if (rtmsg->rtmsg_src_len) return -EINVAL; #endif - if (cfg->fc_ifindex) { + if (rtmsg->rtmsg_ifindex) { err = -ENODEV; - dev = dev_get_by_index(cfg->fc_ifindex); + dev = dev_get_by_index(rtmsg->rtmsg_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1041,14 +928,8 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - if (cfg->fc_metric == 0) - cfg->fc_metric = IP6_RT_PRIO_USER; - - table = fib6_new_table(cfg->fc_table); - if (table == NULL) { - err = -ENOBUFS; - goto out; - } + if (rtmsg->rtmsg_metric == 0) + rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; rt = ip6_dst_alloc(); @@ -1058,13 +939,14 @@ int ip6_route_add(struct fib6_config *cfg) } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); - - if (cfg->fc_protocol == RTPROT_UNSPEC) - cfg->fc_protocol = RTPROT_BOOT; - rt->rt6i_protocol = cfg->fc_protocol; + rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); + if (nlh && (r = NLMSG_DATA(nlh))) { + rt->rt6i_protocol = r->rtm_protocol; + } else { + rt->rt6i_protocol = RTPROT_BOOT; + } - addr_type = ipv6_addr_type(&cfg->fc_dst); + addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); if (addr_type & IPV6_ADDR_MULTICAST) rt->u.dst.input = ip6_mc_input; @@ -1073,22 +955,24 @@ int ip6_route_add(struct fib6_config *cfg) rt->u.dst.output = ip6_output; - ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); - rt->rt6i_dst.plen = cfg->fc_dst_len; + ipv6_addr_prefix(&rt->rt6i_dst.addr, + &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); + rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; if (rt->rt6i_dst.plen == 128) rt->u.dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); - rt->rt6i_src.plen = cfg->fc_src_len; + ipv6_addr_prefix(&rt->rt6i_src.addr, + &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); + rt->rt6i_src.plen = rtmsg->rtmsg_src_len; #endif - rt->rt6i_metric = cfg->fc_metric; + rt->rt6i_metric = rtmsg->rtmsg_metric; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes */ - if ((cfg->fc_flags & RTF_REJECT) || + if ((rtmsg->rtmsg_flags&RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != &loopback_dev) { @@ -1111,12 +995,12 @@ int ip6_route_add(struct fib6_config *cfg) goto install_route; } - if (cfg->fc_flags & RTF_GATEWAY) { + if (rtmsg->rtmsg_flags & RTF_GATEWAY) { struct in6_addr *gw_addr; int gwa_type; - gw_addr = &cfg->fc_gateway; - ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); + gw_addr = &rtmsg->rtmsg_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -1133,7 +1017,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); + grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1165,7 +1049,7 @@ int ip6_route_add(struct fib6_config *cfg) if (dev == NULL) goto out; - if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { + if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); if (IS_ERR(rt->rt6i_nexthop)) { err = PTR_ERR(rt->rt6i_nexthop); @@ -1174,24 +1058,24 @@ int ip6_route_add(struct fib6_config *cfg) } } - rt->rt6i_flags = cfg->fc_flags; + rt->rt6i_flags = rtmsg->rtmsg_flags; install_route: - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla->nla_type; - - if (type) { - if (type > RTAX_MAX) { + if (rta && rta[RTA_METRICS-1]) { + int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); + struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); + + while (RTA_OK(attr, attrlen)) { + unsigned flavor = attr->rta_type; + if (flavor) { + if (flavor > RTAX_MAX) { err = -EINVAL; goto out; } - - rt->u.dst.metrics[type - 1] = nla_get_u32(nla); + rt->u.dst.metrics[flavor-1] = + *(u32 *)RTA_DATA(attr); } + attr = RTA_NEXT(attr, attrlen); } } @@ -1203,8 +1087,7 @@ int ip6_route_add(struct fib6_config *cfg) rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; - rt->rt6i_table = table; - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); + return ip6_ins_rt(rt, nlh, _rtattr, req); out: if (dev) @@ -1216,65 +1099,51 @@ int ip6_route_add(struct fib6_config *cfg) return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) +int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { int err; - struct fib6_table *table; - if (rt == &ip6_null_entry) - return -ENOENT; + write_lock_bh(&rt6_lock); - table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); - - err = fib6_del(rt, info); + err = fib6_del(rt, nlh, _rtattr, req); dst_release(&rt->u.dst); - write_unlock_bh(&table->tb6_lock); + write_unlock_bh(&rt6_lock); return err; } -int ip6_del_rt(struct rt6_info *rt) +static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { - return __ip6_del_rt(rt, NULL); -} - -static int ip6_route_del(struct fib6_config *cfg) -{ - struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(cfg->fc_table); - if (table == NULL) - return err; + read_lock_bh(&rt6_lock); - read_lock_bh(&table->tb6_lock); - - fn = fib6_locate(&table->tb6_root, - &cfg->fc_dst, cfg->fc_dst_len, - &cfg->fc_src, cfg->fc_src_len); + fn = fib6_locate(&ip6_routing_table, + &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, + &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); if (fn) { for (rt = fn->leaf; rt; rt = rt->u.next) { - if (cfg->fc_ifindex && + if (rtmsg->rtmsg_ifindex && (rt->rt6i_dev == NULL || - rt->rt6i_dev->ifindex != cfg->fc_ifindex)) + rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) continue; - if (cfg->fc_flags & RTF_GATEWAY && - !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) + if (rtmsg->rtmsg_flags&RTF_GATEWAY && + !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) continue; - if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) + if (rtmsg->rtmsg_metric && + rtmsg->rtmsg_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); - return __ip6_del_rt(rt, &cfg->fc_nlinfo); + return ip6_del_rt(rt, nlh, _rtattr, req); } } - read_unlock_bh(&table->tb6_lock); + read_unlock_bh(&rt6_lock); return err; } @@ -1282,18 +1151,13 @@ static int ip6_route_del(struct fib6_config *cfg) /* * Handle redirects */ -struct ip6rd_flowi { - struct flowi fl; - struct in6_addr gateway; -}; - -static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, - struct flowi *fl, - int flags) +void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, + struct neighbour *neigh, u8 *lladdr, int on_link) { - struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl; - struct rt6_info *rt; + struct rt6_info *rt, *nrt = NULL; + int strict; struct fib6_node *fn; + struct netevent_redirect netevent; /* * Get the "current" route for this destination and @@ -1305,9 +1169,10 @@ static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, * is a bit fuzzy and one might need to check all possible * routes. */ + strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); - read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); + read_lock_bh(&rt6_lock); + fn = fib6_lookup(&ip6_routing_table, dest, NULL); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* @@ -1322,60 +1187,29 @@ static struct rt6_info *__ip6_route_redirect(struct fib6_table *table, continue; if (!(rt->rt6i_flags & RTF_GATEWAY)) continue; - if (fl->oif != rt->rt6i_dev->ifindex) + if (neigh->dev != rt->rt6i_dev) continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) + if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) continue; break; } + if (rt) + dst_hold(&rt->u.dst); + else if (strict) { + while ((fn = fn->parent) != NULL) { + if (fn->fn_flags & RTN_ROOT) + break; + if (fn->fn_flags & RTN_RTINFO) + goto restart; + } + } + read_unlock_bh(&rt6_lock); - if (!rt) - rt = &ip6_null_entry; - BACKTRACK(&fl->fl6_src); -out: - dst_hold(&rt->u.dst); - - read_unlock_bh(&table->tb6_lock); - - return rt; -}; - -static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, - struct in6_addr *src, - struct in6_addr *gateway, - struct net_device *dev) -{ - struct ip6rd_flowi rdfl = { - .fl = { - .oif = dev->ifindex, - .nl_u = { - .ip6_u = { - .daddr = *dest, - .saddr = *src, - }, - }, - }, - .gateway = *gateway, - }; - int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0; - - return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); -} - -void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, - struct in6_addr *saddr, - struct neighbour *neigh, u8 *lladdr, int on_link) -{ - struct rt6_info *rt, *nrt = NULL; - struct netevent_redirect netevent; - - rt = ip6_route_redirect(dest, src, saddr, neigh->dev); - - if (rt == &ip6_null_entry) { + if (!rt) { if (net_ratelimit()) printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop " "for redirect target\n"); - goto out; + return; } /* @@ -1418,7 +1252,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); - if (ip6_ins_rt(nrt)) + if (ip6_ins_rt(nrt, NULL, NULL, NULL)) goto out; netevent.old = &rt->u.dst; @@ -1426,7 +1260,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { - ip6_del_rt(rt); + ip6_del_rt(rt, NULL, NULL, NULL); return; } @@ -1508,7 +1342,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - ip6_ins_rt(nrt); + ip6_ins_rt(nrt, NULL, NULL, NULL); } out: dst_release(&rt->u.dst); @@ -1544,7 +1378,6 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif - rt->rt6i_table = ort->rt6i_table; } return rt; } @@ -1555,14 +1388,9 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle { struct fib6_node *fn; struct rt6_info *rt = NULL; - struct fib6_table *table; - - table = fib6_get_table(RT6_TABLE_INFO); - if (table == NULL) - return NULL; - write_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); + write_lock_bh(&rt6_lock); + fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1577,7 +1405,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle break; } out: - write_unlock_bh(&table->tb6_lock); + write_unlock_bh(&rt6_lock); return rt; } @@ -1585,23 +1413,21 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct in6_addr *gwaddr, int ifindex, unsigned pref) { - struct fib6_config cfg = { - .fc_table = RT6_TABLE_INFO, - .fc_metric = 1024, - .fc_ifindex = ifindex, - .fc_dst_len = prefixlen, - .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | - RTF_UP | RTF_PREF(pref), - }; - - ipv6_addr_copy(&cfg.fc_dst, prefix); - ipv6_addr_copy(&cfg.fc_gateway, gwaddr); + struct in6_rtmsg rtmsg; + memset(&rtmsg, 0, sizeof(rtmsg)); + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); + rtmsg.rtmsg_dst_len = prefixlen; + ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); + rtmsg.rtmsg_metric = 1024; + rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); /* We should treat it as a default route if prefix length is 0. */ if (!prefixlen) - cfg.fc_flags |= RTF_DEFAULT; + rtmsg.rtmsg_flags |= RTF_DEFAULT; + rtmsg.rtmsg_ifindex = ifindex; - ip6_route_add(&cfg); + ip6_route_add(&rtmsg, NULL, NULL, NULL); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1610,14 +1436,12 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; - struct fib6_table *table; + struct fib6_node *fn; - table = fib6_get_table(RT6_TABLE_DFLT); - if (table == NULL) - return NULL; + fn = &ip6_routing_table; - write_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { + write_lock_bh(&rt6_lock); + for (rt = fn->leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -1625,7 +1449,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d } if (rt) dst_hold(&rt->u.dst); - write_unlock_bh(&table->tb6_lock); + write_unlock_bh(&rt6_lock); return rt; } @@ -1633,65 +1457,43 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { - struct fib6_config cfg = { - .fc_table = RT6_TABLE_DFLT, - .fc_metric = 1024, - .fc_ifindex = dev->ifindex, - .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | - RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - }; + struct in6_rtmsg rtmsg; - ipv6_addr_copy(&cfg.fc_gateway, gwaddr); + memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); + rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); + rtmsg.rtmsg_metric = 1024; + rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | + RTF_PREF(pref); - ip6_route_add(&cfg); + rtmsg.rtmsg_ifindex = dev->ifindex; + ip6_route_add(&rtmsg, NULL, NULL, NULL); return rt6_get_dflt_router(gwaddr, dev); } void rt6_purge_dflt_routers(void) { struct rt6_info *rt; - struct fib6_table *table; - - /* NOTE: Keep consistent with rt6_get_dflt_router */ - table = fib6_get_table(RT6_TABLE_DFLT); - if (table == NULL) - return; restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { + read_lock_bh(&rt6_lock); + for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); - goto restart; - } - } - read_unlock_bh(&table->tb6_lock); -} -static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, - struct fib6_config *cfg) -{ - memset(cfg, 0, sizeof(*cfg)); + read_unlock_bh(&rt6_lock); - cfg->fc_table = RT6_TABLE_MAIN; - cfg->fc_ifindex = rtmsg->rtmsg_ifindex; - cfg->fc_metric = rtmsg->rtmsg_metric; - cfg->fc_expires = rtmsg->rtmsg_info; - cfg->fc_dst_len = rtmsg->rtmsg_dst_len; - cfg->fc_src_len = rtmsg->rtmsg_src_len; - cfg->fc_flags = rtmsg->rtmsg_flags; + ip6_del_rt(rt, NULL, NULL, NULL); - ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); - ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); - ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); + goto restart; + } + } + read_unlock_bh(&rt6_lock); } int ipv6_route_ioctl(unsigned int cmd, void __user *arg) { - struct fib6_config cfg; struct in6_rtmsg rtmsg; int err; @@ -1704,16 +1506,14 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) sizeof(struct in6_rtmsg)); if (err) return -EFAULT; - - rtmsg_to_fib6_config(&rtmsg, &cfg); - + rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&cfg); + err = ip6_route_add(&rtmsg, NULL, NULL, NULL); break; case SIOCDELRT: - err = ip6_route_del(&cfg); + err = ip6_route_del(&rtmsg, NULL, NULL, NULL); break; default: err = -EINVAL; @@ -1787,7 +1587,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; - rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); @@ -1806,7 +1605,9 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) void rt6_ifdown(struct net_device *dev) { - fib6_clean_all(fib6_ifdown, 0, dev); + write_lock_bh(&rt6_lock); + fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); + write_unlock_bh(&rt6_lock); } struct rt6_mtu_change_arg @@ -1856,124 +1657,90 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) void rt6_mtu_change(struct net_device *dev, unsigned mtu) { - struct rt6_mtu_change_arg arg = { - .dev = dev, - .mtu = mtu, - }; + struct rt6_mtu_change_arg arg; - fib6_clean_all(rt6_mtu_change_route, 0, &arg); + arg.dev = dev; + arg.mtu = mtu; + read_lock_bh(&rt6_lock); + fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); + read_unlock_bh(&rt6_lock); } -static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { - [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) }, - [RTA_OIF] = { .type = NLA_U32 }, - [RTA_IIF] = { .type = NLA_U32 }, - [RTA_PRIORITY] = { .type = NLA_U32 }, - [RTA_METRICS] = { .type = NLA_NESTED }, -}; - -static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, - struct fib6_config *cfg) +static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, + struct in6_rtmsg *rtmsg) { - struct rtmsg *rtm; - struct nlattr *tb[RTA_MAX+1]; - int err; + memset(rtmsg, 0, sizeof(*rtmsg)); - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); - if (err < 0) - goto errout; + rtmsg->rtmsg_dst_len = r->rtm_dst_len; + rtmsg->rtmsg_src_len = r->rtm_src_len; + rtmsg->rtmsg_flags = RTF_UP; + if (r->rtm_type == RTN_UNREACHABLE) + rtmsg->rtmsg_flags |= RTF_REJECT; - err = -EINVAL; - rtm = nlmsg_data(nlh); - memset(cfg, 0, sizeof(*cfg)); - - cfg->fc_table = rtm->rtm_table; - cfg->fc_dst_len = rtm->rtm_dst_len; - cfg->fc_src_len = rtm->rtm_src_len; - cfg->fc_flags = RTF_UP; - cfg->fc_protocol = rtm->rtm_protocol; - - if (rtm->rtm_type == RTN_UNREACHABLE) - cfg->fc_flags |= RTF_REJECT; - - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; - cfg->fc_nlinfo.nlh = nlh; - - if (tb[RTA_GATEWAY]) { - nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); - cfg->fc_flags |= RTF_GATEWAY; + if (rta[RTA_GATEWAY-1]) { + if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); + rtmsg->rtmsg_flags |= RTF_GATEWAY; } - - if (tb[RTA_DST]) { - int plen = (rtm->rtm_dst_len + 7) >> 3; - - if (nla_len(tb[RTA_DST]) < plen) - goto errout; - - nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); + if (rta[RTA_DST-1]) { + if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); } - - if (tb[RTA_SRC]) { - int plen = (rtm->rtm_src_len + 7) >> 3; - - if (nla_len(tb[RTA_SRC]) < plen) - goto errout; - - nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); + if (rta[RTA_SRC-1]) { + if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); } - - if (tb[RTA_OIF]) - cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); - - if (tb[RTA_PRIORITY]) - cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); - - if (tb[RTA_METRICS]) { - cfg->fc_mx = nla_data(tb[RTA_METRICS]); - cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); + if (rta[RTA_OIF-1]) { + if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) + return -EINVAL; + memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); } - - if (tb[RTA_TABLE]) - cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); - - err = 0; -errout: - return err; + if (rta[RTA_PRIORITY-1]) { + if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) + return -EINVAL; + memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + } + return 0; } int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib6_config cfg; - int err; - - err = rtm_to_fib6_config(skb, nlh, &cfg); - if (err < 0) - return err; + struct rtmsg *r = NLMSG_DATA(nlh); + struct in6_rtmsg rtmsg; - return ip6_route_del(&cfg); + if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) + return -EINVAL; + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib6_config cfg; - int err; - - err = rtm_to_fib6_config(skb, nlh, &cfg); - if (err < 0) - return err; + struct rtmsg *r = NLMSG_DATA(nlh); + struct in6_rtmsg rtmsg; - return ip6_route_add(&cfg); + if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) + return -EINVAL; + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); } +struct rt6_rtnl_dump_arg +{ + struct sk_buff *skb; + struct netlink_callback *cb; +}; + static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, int prefix, unsigned int flags) { struct rtmsg *rtm; - struct nlmsghdr *nlh; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; struct rta_cacheinfo ci; - u32 table; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -1982,21 +1749,13 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); - if (nlh == NULL) - return -ENOBUFS; - - rtm = nlmsg_data(nlh); + nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags); + rtm = NLMSG_DATA(nlh); rtm->rtm_family = AF_INET6; rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; - if (rt->rt6i_table) - table = rt->rt6i_table->tb6_id; - else - table = RT6_TABLE_UNSPEC; - rtm->rtm_table = table; - NLA_PUT_U32(skb, RTA_TABLE, table); + rtm->rtm_table = RT_TABLE_MAIN; if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) @@ -2017,35 +1776,31 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - NLA_PUT(skb, RTA_DST, 16, dst); + RTA_PUT(skb, RTA_DST, 16, dst); rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); + RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr); #ifdef CONFIG_IPV6_SUBTREES if (src) { - NLA_PUT(skb, RTA_SRC, 16, src); + RTA_PUT(skb, RTA_SRC, 16, src); rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len) - NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); + RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr); #endif if (iif) - NLA_PUT_U32(skb, RTA_IIF, iif); + RTA_PUT(skb, RTA_IIF, 4, &iif); else if (dst) { struct in6_addr saddr_buf; if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0) - NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); + RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf); } - if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) - goto nla_put_failure; - + goto rtattr_failure; if (rt->u.dst.neighbour) - NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); - + RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key); if (rt->u.dst.dev) - NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex); - - NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric); + RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex); + RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric); ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); if (rt->rt6i_expires) ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies); @@ -2057,21 +1812,23 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, ci.rta_id = 0; ci.rta_ts = 0; ci.rta_tsage = 0; - NLA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); + nlh->nlmsg_len = skb->tail - b; + return skb->len; - return nlmsg_end(skb, nlh); - -nla_put_failure: - return nlmsg_cancel(skb, nlh); +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } -int rt6_dump_route(struct rt6_info *rt, void *p_arg) +static int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; int prefix; - if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) { - struct rtmsg *rtm = nlmsg_data(arg->cb->nlh); + if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) { + struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh); prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0; } else prefix = 0; @@ -2081,108 +1838,189 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, NLM_F_MULTI); } -int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +static int fib6_dump_node(struct fib6_walker_t *w) { - struct nlattr *tb[RTA_MAX+1]; + int res; struct rt6_info *rt; - struct sk_buff *skb; - struct rtmsg *rtm; - struct flowi fl; - int err, iif = 0; - err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); - if (err < 0) - goto errout; - - err = -EINVAL; - memset(&fl, 0, sizeof(fl)); + for (rt = w->leaf; rt; rt = rt->u.next) { + res = rt6_dump_route(rt, w->args); + if (res < 0) { + /* Frame is full, suspend walking */ + w->leaf = rt; + return 1; + } + BUG_TRAP(res!=0); + } + w->leaf = NULL; + return 0; +} - if (tb[RTA_SRC]) { - if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr)) - goto errout; +static void fib6_dump_end(struct netlink_callback *cb) +{ + struct fib6_walker_t *w = (void*)cb->args[0]; - ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC])); + if (w) { + cb->args[0] = 0; + fib6_walker_unlink(w); + kfree(w); } + cb->done = (void*)cb->args[1]; + cb->args[1] = 0; +} - if (tb[RTA_DST]) { - if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr)) - goto errout; +static int fib6_dump_done(struct netlink_callback *cb) +{ + fib6_dump_end(cb); + return cb->done ? cb->done(cb) : 0; +} + +int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct rt6_rtnl_dump_arg arg; + struct fib6_walker_t *w; + int res; - ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST])); + arg.skb = skb; + arg.cb = cb; + + w = (void*)cb->args[0]; + if (w == NULL) { + /* New dump: + * + * 1. hook callback destructor. + */ + cb->args[1] = (long)cb->done; + cb->done = fib6_dump_done; + + /* + * 2. allocate and initialize walker. + */ + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (w == NULL) + return -ENOMEM; + RT6_TRACE("dump<%p", w); + w->root = &ip6_routing_table; + w->func = fib6_dump_node; + w->args = &arg; + cb->args[0] = (long)w; + read_lock_bh(&rt6_lock); + res = fib6_walk(w); + read_unlock_bh(&rt6_lock); + } else { + w->args = &arg; + read_lock_bh(&rt6_lock); + res = fib6_walk_continue(w); + read_unlock_bh(&rt6_lock); } +#if RT6_DEBUG >= 3 + if (res <= 0 && skb->len == 0) + RT6_TRACE("%p>dump end\n", w); +#endif + res = res < 0 ? res : skb->len; + /* res < 0 is an error. (really, impossible) + res == 0 means that dump is complete, but skb still can contain data. + res > 0 dump is not complete, but frame is full. + */ + /* Destroy walker, if dump of this table is complete. */ + if (res <= 0) + fib6_dump_end(cb); + return res; +} + +int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) +{ + struct rtattr **rta = arg; + int iif = 0; + int err = -ENOBUFS; + struct sk_buff *skb; + struct flowi fl; + struct rt6_info *rt; - if (tb[RTA_IIF]) - iif = nla_get_u32(tb[RTA_IIF]); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto out; + + /* Reserve room for dummy headers, this skb can pass + through good chunk of routing engine. + */ + skb->mac.raw = skb->data; + skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); - if (tb[RTA_OIF]) - fl.oif = nla_get_u32(tb[RTA_OIF]); + memset(&fl, 0, sizeof(fl)); + if (rta[RTA_SRC-1]) + ipv6_addr_copy(&fl.fl6_src, + (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1])); + if (rta[RTA_DST-1]) + ipv6_addr_copy(&fl.fl6_dst, + (struct in6_addr*)RTA_DATA(rta[RTA_DST-1])); + + if (rta[RTA_IIF-1]) + memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); if (iif) { struct net_device *dev; dev = __dev_get_by_index(iif); if (!dev) { err = -ENODEV; - goto errout; + goto out_free; } } - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (skb == NULL) { - err = -ENOBUFS; - goto errout; - } + fl.oif = 0; + if (rta[RTA_OIF-1]) + memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); - /* Reserve room for dummy headers, this skb can pass - through good chunk of routing engine. - */ - skb->mac.raw = skb->data; - skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr)); + rt = (struct rt6_info*)ip6_route_output(NULL, &fl); - rt = (struct rt6_info*) ip6_route_output(NULL, &fl); skb->dst = &rt->u.dst; - err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif, + NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; + err = rt6_fill_node(skb, rt, + &fl.fl6_dst, &fl.fl6_src, + iif, RTM_NEWROUTE, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, 0, 0); if (err < 0) { - kfree_skb(skb); - goto errout; + err = -EMSGSIZE; + goto out_free; } - err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -errout: + err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + if (err > 0) + err = 0; +out: return err; +out_free: + kfree_skb(skb); + goto out; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, + struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = 0, seq = 0; - struct nlmsghdr *nlh = NULL; - int payload = sizeof(struct rtmsg) + 256; - int err = -ENOBUFS; - - if (info) { - pid = info->pid; - nlh = info->nlh; - if (nlh) - seq = nlh->nlmsg_seq; + int size = NLMSG_SPACE(sizeof(struct rtmsg)+256); + u32 pid = current->pid; + u32 seq = 0; + + if (req) + pid = req->pid; + if (nlh) + seq = nlh->nlmsg_seq; + + skb = alloc_skb(size, gfp_any()); + if (!skb) { + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS); + return; } - - skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); - if (skb == NULL) - goto errout; - - err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0); - if (err < 0) { + if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) { kfree_skb(skb); - goto errout; + netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL); + return; } - - err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any()); -errout: - if (err < 0) - rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err); + NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE; + netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any()); } /* @@ -2258,13 +2096,16 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) { - struct rt6_proc_arg arg = { - .buffer = buffer, - .offset = offset, - .length = length, - }; + struct rt6_proc_arg arg; + arg.buffer = buffer; + arg.offset = offset; + arg.length = length; + arg.skip = 0; + arg.len = 0; - fib6_clean_all(rt6_info_route, 0, &arg); + read_lock_bh(&rt6_lock); + fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); + read_unlock_bh(&rt6_lock); *start = buffer; if (offset) @@ -2419,9 +2260,13 @@ void __init ip6_route_init(void) { struct proc_dir_entry *p; - ip6_dst_ops.kmem_cachep = - kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0, - SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); + ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache", + sizeof(struct rt6_info), + 0, SLAB_HWCACHE_ALIGN, + NULL, NULL); + if (!ip6_dst_ops.kmem_cachep) + panic("cannot create ip6_dst_cache"); + fib6_init(); #ifdef CONFIG_PROC_FS p = proc_net_create("ipv6_route", 0, rt6_proc_info); @@ -2433,16 +2278,10 @@ void __init ip6_route_init(void) #ifdef CONFIG_XFRM xfrm6_init(); #endif -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - fib6_rules_init(); -#endif } void ip6_route_cleanup(void) { -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - fib6_rules_cleanup(); -#endif #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); proc_net_remove("rt6_stats"); diff --git a/trunk/net/ipv6/tcp_ipv6.c b/trunk/net/ipv6/tcp_ipv6.c index 2546fc9f0a78..802a1a6b1037 100644 --- a/trunk/net/ipv6/tcp_ipv6.c +++ b/trunk/net/ipv6/tcp_ipv6.c @@ -251,8 +251,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = &final; } - security_sk_classify_flow(sk, &fl); - err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; @@ -272,7 +270,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(sk, dst, NULL, NULL); + __ip6_dst_store(sk, dst, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -376,7 +374,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; - security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; @@ -470,7 +467,6 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -545,7 +541,7 @@ static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb) struct ipv6_pinfo *np = inet6_sk(sk); struct tcphdr *th = skb->h.th; - if (skb->ip_summed == CHECKSUM_PARTIAL) { + if (skb->ip_summed == CHECKSUM_HW) { th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); } else { @@ -570,7 +566,7 @@ static int tcp_v6_gso_send_check(struct sk_buff *skb) th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len, IPPROTO_TCP, 0); skb->csum = offsetof(struct tcphdr, check); - skb->ip_summed = CHECKSUM_PARTIAL; + skb->ip_summed = CHECKSUM_HW; return 0; } @@ -629,7 +625,6 @@ static void tcp_v6_send_reset(struct sk_buff *skb) fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { @@ -696,7 +691,6 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; - security_skb_classify_flow(skb, &fl); if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { @@ -826,8 +820,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; - security_inet_conn_request(sk, skb, req); - if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -931,7 +923,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; @@ -954,7 +945,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ newsk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(newsk, dst, NULL, NULL); + __ip6_dst_store(newsk, dst, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1033,7 +1024,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, static int tcp_v6_checksum_init(struct sk_buff *skb) { - if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (skb->ip_summed == CHECKSUM_HW) { if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr, &skb->nh.ipv6h->daddr,skb->csum)) { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -1075,7 +1066,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 0)) goto discard; /* @@ -1232,7 +1223,7 @@ static int tcp_v6_rcv(struct sk_buff **pskb) if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 0)) goto discard_and_relse; skb->dev = NULL; diff --git a/trunk/net/ipv6/udp.c b/trunk/net/ipv6/udp.c index 9662561701d1..3d54f246411e 100644 --- a/trunk/net/ipv6/udp.c +++ b/trunk/net/ipv6/udp.c @@ -61,9 +61,81 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) +/* Grrr, addr_type already calculated by caller, but I don't want + * to add some silly "cookie" argument to this method just for that. + */ +static int udp_v6_get_port(struct sock *sk, unsigned short snum) { - return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); + struct sock *sk2; + struct hlist_node *node; + + write_lock_bh(&udp_hash_lock); + if (snum == 0) { + int best_size_so_far, best, result, i; + + if (udp_port_rover > sysctl_local_port_range[1] || + udp_port_rover < sysctl_local_port_range[0]) + udp_port_rover = sysctl_local_port_range[0]; + best_size_so_far = 32767; + best = result = udp_port_rover; + for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { + int size; + struct hlist_head *list; + + list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(list)) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + goto gotit; + } + size = 0; + sk_for_each(sk2, node, list) + if (++size >= best_size_so_far) + goto next; + best_size_so_far = size; + best = result; + next:; + } + result = best; + for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { + if (result > sysctl_local_port_range[1]) + result = sysctl_local_port_range[0] + + ((result - sysctl_local_port_range[0]) & + (UDP_HTABLE_SIZE - 1)); + if (!udp_lport_inuse(result)) + break; + } + if (i >= (1 << 16) / UDP_HTABLE_SIZE) + goto fail; +gotit: + udp_port_rover = snum = result; + } else { + sk_for_each(sk2, node, + &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { + if (inet_sk(sk2)->num == snum && + sk2 != sk && + (!sk2->sk_bound_dev_if || + !sk->sk_bound_dev_if || + sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (!sk2->sk_reuse || !sk->sk_reuse) && + ipv6_rcv_saddr_equal(sk, sk2)) + goto fail; + } + } + + inet_sk(sk)->num = snum; + if (sk_unhashed(sk)) { + sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); + sock_prot_inc_use(sk->sk_prot); + } + write_unlock_bh(&udp_hash_lock); + return 0; + +fail: + write_unlock_bh(&udp_hash_lock); + return 1; } static void udp_v6_hash(struct sock *sk) @@ -273,8 +345,6 @@ static void udpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) { - int rc; - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { kfree_skb(skb); return -1; @@ -286,10 +356,7 @@ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) return 0; } - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP6_INC_STATS_BH(UDP_MIB_RCVBUFERRORS); + if (sock_queue_rcv_skb(sk,skb)<0) { UDP6_INC_STATS_BH(UDP_MIB_INERRORS); kfree_skb(skb); return 0; @@ -408,7 +475,7 @@ static int udpv6_rcv(struct sk_buff **pskb) uh = skb->h.uh; } - if (skb->ip_summed == CHECKSUM_COMPLETE && + if (skb->ip_summed == CHECKSUM_HW && !csum_ipv6_magic(saddr, daddr, ulen, IPPROTO_UDP, skb->csum)) skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -715,8 +782,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, connected = 0; } - security_sk_classify_flow(sk, fl); - err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; @@ -775,12 +840,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (connected) { ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL, -#ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? - &np->saddr : -#endif - NULL); + &np->daddr : NULL); } else { dst_release(dst); } @@ -795,16 +855,6 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, UDP6_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS); return len; } - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP6_INC_STATS_USER(UDP_MIB_SNDBUFERRORS); - } return err; do_confirm: diff --git a/trunk/net/ipv6/xfrm6_input.c b/trunk/net/ipv6/xfrm6_input.c index a40a05789013..0405d74ff910 100644 --- a/trunk/net/ipv6/xfrm6_input.c +++ b/trunk/net/ipv6/xfrm6_input.c @@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ + if (x->props.mode) { /* XXX */ decaps = 1; break; } @@ -138,111 +138,3 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } - -int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, - xfrm_address_t *saddr, u8 proto) -{ - struct xfrm_state *x = NULL; - int wildcard = 0; - struct in6_addr any; - xfrm_address_t *xany; - struct xfrm_state *xfrm_vec_one = NULL; - int nh = 0; - int i = 0; - - ipv6_addr_set(&any, 0, 0, 0, 0); - xany = (xfrm_address_t *)&any; - - for (i = 0; i < 3; i++) { - xfrm_address_t *dst, *src; - switch (i) { - case 0: - dst = daddr; - src = saddr; - break; - case 1: - /* lookup state with wild-card source address */ - wildcard = 1; - dst = daddr; - src = xany; - break; - case 2: - default: - /* lookup state with wild-card addresses */ - wildcard = 1; /* XXX */ - dst = xany; - src = xany; - break; - } - - x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); - if (!x) - continue; - - spin_lock(&x->lock); - - if (wildcard) { - if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - } - - if (unlikely(x->km.state != XFRM_STATE_VALID)) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - if (xfrm_state_check_expire(x)) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - - nh = x->type->input(x, skb); - if (nh <= 0) { - spin_unlock(&x->lock); - xfrm_state_put(x); - x = NULL; - continue; - } - - x->curlft.bytes += skb->len; - x->curlft.packets++; - - spin_unlock(&x->lock); - - xfrm_vec_one = x; - break; - } - - if (!xfrm_vec_one) - goto drop; - - /* Allocate new secpath or COW existing one. */ - if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { - struct sec_path *sp; - sp = secpath_dup(skb->sp); - if (!sp) - goto drop; - if (skb->sp) - secpath_put(skb->sp); - skb->sp = sp; - } - - if (1 + skb->sp->len > XFRM_MAX_DEPTH) - goto drop; - - skb->sp->xvec[skb->sp->len] = xfrm_vec_one; - skb->sp->len ++; - - return 1; -drop: - if (xfrm_vec_one) - xfrm_state_put(xfrm_vec_one); - return -1; -} diff --git a/trunk/net/ipv6/xfrm6_mode_ro.c b/trunk/net/ipv6/xfrm6_mode_ro.c deleted file mode 100644 index 6031c16d46ca..000000000000 --- a/trunk/net/ipv6/xfrm6_mode_ro.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * xfrm6_mode_ro.c - Route optimization mode for IPv6. - * - * Copyright (C)2003-2006 Helsinki University of Technology - * Copyright (C)2003-2006 USAGI/WIDE Project - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ -/* - * Authors: - * Noriaki TAKAMIYA @USAGI - * Masahide NAKAMURA @USAGI - */ - -#include -#include -#include -#include -#include -#include -#include - -/* Add route optimization header space. - * - * The IP header and mutable extension headers will be moved forward to make - * space for the route optimization header. - * - * On exit, skb->h will be set to the start of the encapsulation header to be - * filled in by x->type->output and skb->nh will be set to the nextheader field - * of the extension header directly preceding the encapsulation header, or in - * its absence, that of the top IP header. The value of skb->data will always - * point to the top IP header. - */ -static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) -{ - struct ipv6hdr *iph; - u8 *prevhdr; - int hdr_len; - - skb_push(skb, x->props.header_len); - iph = skb->nh.ipv6h; - - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); - skb->nh.raw = prevhdr - x->props.header_len; - skb->h.raw = skb->data + hdr_len; - memmove(skb->data, iph, hdr_len); - return 0; -} - -/* - * Do nothing about routing optimization header unlike IPsec. - */ -static int xfrm6_ro_input(struct xfrm_state *x, struct sk_buff *skb) -{ - return 0; -} - -static struct xfrm_mode xfrm6_ro_mode = { - .input = xfrm6_ro_input, - .output = xfrm6_ro_output, - .owner = THIS_MODULE, - .encap = XFRM_MODE_ROUTEOPTIMIZATION, -}; - -static int __init xfrm6_ro_init(void) -{ - return xfrm_register_mode(&xfrm6_ro_mode, AF_INET6); -} - -static void __exit xfrm6_ro_exit(void) -{ - int err; - - err = xfrm_unregister_mode(&xfrm6_ro_mode, AF_INET6); - BUG_ON(err); -} - -module_init(xfrm6_ro_init); -module_exit(xfrm6_ro_exit); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_ROUTEOPTIMIZATION); diff --git a/trunk/net/ipv6/xfrm6_mode_transport.c b/trunk/net/ipv6/xfrm6_mode_transport.c index 3a4b39b12bad..711d713e36d8 100644 --- a/trunk/net/ipv6/xfrm6_mode_transport.c +++ b/trunk/net/ipv6/xfrm6_mode_transport.c @@ -25,8 +25,9 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_transport_output(struct sk_buff *skb) { + struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; @@ -34,7 +35,7 @@ static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, x->props.header_len); iph = skb->nh.ipv6h; - hdr_len = x->type->hdr_offset(x, skb, &prevhdr); + hdr_len = ip6_find_1stfragopt(skb, &prevhdr); skb->nh.raw = prevhdr - x->props.header_len; skb->h.raw = skb->data + hdr_len; memmove(skb->data, iph, hdr_len); diff --git a/trunk/net/ipv6/xfrm6_mode_tunnel.c b/trunk/net/ipv6/xfrm6_mode_tunnel.c index 5e7d8a7d6414..8af79be2edca 100644 --- a/trunk/net/ipv6/xfrm6_mode_tunnel.c +++ b/trunk/net/ipv6/xfrm6_mode_tunnel.c @@ -37,9 +37,10 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) +static int xfrm6_tunnel_output(struct sk_buff *skb) { struct dst_entry *dst = skb->dst; + struct xfrm_state *x = dst->xfrm; struct ipv6hdr *iph, *top_iph; int dsfield; diff --git a/trunk/net/ipv6/xfrm6_output.c b/trunk/net/ipv6/xfrm6_output.c index c260ea104c52..c8c8b44a0f58 100644 --- a/trunk/net/ipv6/xfrm6_output.c +++ b/trunk/net/ipv6/xfrm6_output.c @@ -17,12 +17,6 @@ #include #include -int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, - u8 **prevhdr) -{ - return ip6_find_1stfragopt(skb, prevhdr); -} - static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; @@ -47,13 +41,13 @@ static int xfrm6_output_one(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int err; - if (skb->ip_summed == CHECKSUM_PARTIAL) { - err = skb_checksum_help(skb); + if (skb->ip_summed == CHECKSUM_HW) { + err = skb_checksum_help(skb, 0); if (err) goto error_nolock; } - if (x->props.mode == XFRM_MODE_TUNNEL) { + if (x->props.mode) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -65,7 +59,7 @@ static int xfrm6_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(x, skb); + err = x->mode->output(skb); if (err) goto error; @@ -75,8 +69,6 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; - if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) - x->lastused = (u64)xtime.tv_sec; spin_unlock_bh(&x->lock); @@ -88,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); + } while (x && !x->props.mode); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; diff --git a/trunk/net/ipv6/xfrm6_policy.c b/trunk/net/ipv6/xfrm6_policy.c index 6a252e2134d1..73cd250aecbb 100644 --- a/trunk/net/ipv6/xfrm6_policy.c +++ b/trunk/net/ipv6/xfrm6_policy.c @@ -18,9 +18,6 @@ #include #include #include -#ifdef CONFIG_IPV6_MIP6 -#include -#endif static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; @@ -34,26 +31,6 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return err; } -static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) -{ - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)&daddr->a6, - }, - }, - }; - - if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, - (struct in6_addr *)&saddr->a6); - dst_release(&rt->u.dst); - return 0; - } - return -EHOSTUNREACH; -} - static struct dst_entry * __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -73,9 +50,7 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(xdst, fl, AF_INET6, - (xdst->u.rt6.rt6i_dst.plen != 128 || - xdst->u.rt6.rt6i_src.plen != 128))) { + xfrm_bundle_ok(xdst, fl, AF_INET6)) { dst_clone(dst); break; } @@ -84,40 +59,6 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } -static inline struct in6_addr* -__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) -{ - return (x->type->remote_addr) ? - (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->id.daddr; -} - -static inline struct in6_addr* -__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) -{ - return (x->type->local_addr) ? - (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : - (struct in6_addr*)&x->props.saddr; -} - -static inline void -__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen += x->props.header_len; - else - *len += x->props.header_len; -} - -static inline void -__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) -{ - if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) - *nflen -= x->props.header_len; - else - *len -= x->props.header_len; -} - /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -142,7 +83,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int int i; int err = 0; int header_len = 0; - int nfheader_len = 0; int trailer_len = 0; dst = dst_prev = NULL; @@ -169,18 +109,17 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; - xdst->genid = xfrm[i]->genid; if (rt->rt6i_node) xdst->route_cookie = rt->rt6i_node->fn_sernum; dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); - local = __xfrm6_bundle_addr_local(xfrm[i], local); + if (xfrm[i]->props.mode) { + remote = (struct in6_addr*)&xfrm[i]->id.daddr; + local = (struct in6_addr*)&xfrm[i]->props.saddr; tunnel = 1; } - __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); + header_len += xfrm[i]->props.header_len; trailer_len += xfrm[i]->props.trailer_len; if (tunnel) { @@ -215,7 +154,6 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; - dst_prev->nfheader_len = nfheader_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -234,7 +172,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt6.rt6i_src = rt0->rt6i_src; x->u.rt6.rt6i_idev = rt0->rt6i_idev; in6_dev_hold(rt0->rt6i_idev); - __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); + header_len -= x->u.dst.xfrm->props.header_len; trailer_len -= x->u.dst.xfrm->props.trailer_len; } @@ -294,18 +232,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; -#ifdef CONFIG_IPV6_MIP6 - case IPPROTO_MH: - if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { - struct ip6_mh *mh; - mh = (struct ip6_mh *)exthdr; - - fl->fl_mh_type = mh->ip6mh_type; - } - fl->proto = nexthdr; - return; -#endif - /* XXX Why are there these headers? */ case IPPROTO_AH: case IPPROTO_ESP: @@ -382,7 +308,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, - .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, diff --git a/trunk/net/ipv6/xfrm6_state.c b/trunk/net/ipv6/xfrm6_state.c index 711bfafb2472..b33296b3f6de 100644 --- a/trunk/net/ipv6/xfrm6_state.c +++ b/trunk/net/ipv6/xfrm6_state.c @@ -42,135 +42,102 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); + if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { + struct rt6_info *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)daddr, + } + } + }; + if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, + &fl_tunnel, AF_INET6)) { + ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, + (struct in6_addr *)&x->props.saddr); + dst_release(&rt->u.dst); + } + } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET6; } -static int -__xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) +static struct xfrm_state * +__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) { - int i; - int j = 0; - - /* Rule 1: select IPsec transport except AH */ - for (i = 0; i < n; i++) { - if (src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto != IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; - - /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->props.mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->props.mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (j == n) - goto end; -#endif - - /* Rule 3: select IPsec transport AH */ - for (i = 0; i < n; i++) { - if (src[i] && - src[i]->props.mode == XFRM_MODE_TRANSPORT && - src[i]->id.proto == IPPROTO_AH) { - dst[j++] = src[i]; - src[i] = NULL; + unsigned h = __xfrm6_spi_hash(daddr, spi, proto); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { + if (x->props.family == AF_INET6 && + spi == x->id.spi && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; } } - if (j == n) - goto end; - - /* Rule 4: select IPsec tunnel */ - for (i = 0; i < n; i++) { - if (src[i] && - src[i]->props.mode == XFRM_MODE_TUNNEL) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (likely(j == n)) - goto end; - - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - - end: - return 0; + return NULL; } -static int -__xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) +static struct xfrm_state * +__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, + xfrm_address_t *daddr, xfrm_address_t *saddr, + int create) { - int i; - int j = 0; - - /* Rule 1: select IPsec transport */ - for (i = 0; i < n; i++) { - if (src[i]->mode == XFRM_MODE_TRANSPORT) { - dst[j++] = src[i]; - src[i] = NULL; - } + struct xfrm_state *x, *x0; + unsigned h = __xfrm6_dst_hash(daddr); + + x0 = NULL; + + list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { + if (x->props.family == AF_INET6 && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + mode == x->props.mode && + proto == x->id.proto && + ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && + reqid == x->props.reqid && + x->km.state == XFRM_STATE_ACQ && + !x->id.spi) { + x0 = x; + break; + } } - if (j == n) - goto end; - - /* Rule 2: select MIPv6 RO or inbound trigger */ -#ifdef CONFIG_IPV6_MIP6 - for (i = 0; i < n; i++) { - if (src[i] && - (src[i]->mode == XFRM_MODE_ROUTEOPTIMIZATION || - src[i]->mode == XFRM_MODE_IN_TRIGGER)) { - dst[j++] = src[i]; - src[i] = NULL; - } + if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { + ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, + (struct in6_addr *)saddr); + x0->sel.prefixlen_d = 128; + x0->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, + (struct in6_addr *)saddr); + x0->km.state = XFRM_STATE_ACQ; + ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, + (struct in6_addr *)daddr); + x0->id.proto = proto; + x0->props.family = AF_INET6; + x0->props.mode = mode; + x0->props.reqid = reqid; + x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x0); + x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x0->timer); + xfrm_state_hold(x0); + list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); + wake_up(&km_waitq); } - if (j == n) - goto end; -#endif - - /* Rule 3: select IPsec tunnel */ - for (i = 0; i < n; i++) { - if (src[i] && - src[i]->mode == XFRM_MODE_TUNNEL) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - if (likely(j == n)) - goto end; - - /* Final rule */ - for (i = 0; i < n; i++) { - if (src[i]) { - dst[j++] = src[i]; - src[i] = NULL; - } - } - - end: - return 0; + if (x0) + xfrm_state_hold(x0); + return x0; } static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, - .tmpl_sort = __xfrm6_tmpl_sort, - .state_sort = __xfrm6_state_sort, + .state_lookup = __xfrm6_state_lookup, + .find_acq = __xfrm6_find_acq, }; void __init xfrm6_state_init(void) diff --git a/trunk/net/ipv6/xfrm6_tunnel.c b/trunk/net/ipv6/xfrm6_tunnel.c index 59685ee8f700..c8f9369c2a87 100644 --- a/trunk/net/ipv6/xfrm6_tunnel.c +++ b/trunk/net/ipv6/xfrm6_tunnel.c @@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int xfrm6_tunnel_init_state(struct xfrm_state *x) { - if (x->props.mode != XFRM_MODE_TUNNEL) + if (!x->props.mode) return -EINVAL; if (x->encap) diff --git a/trunk/net/key/af_key.c b/trunk/net/key/af_key.c index 83b443ddc72f..3a95b2ee4690 100644 --- a/trunk/net/key/af_key.c +++ b/trunk/net/key/af_key.c @@ -1731,8 +1731,7 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, - (void*)&reqid) != -EEXIST) + if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -1766,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) } /* addresses present only in tunnel mode */ - if (t->mode == XFRM_MODE_TUNNEL) { + if (t->mode) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -1998,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i int req_size; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode == XFRM_MODE_TUNNEL) + if (t->mode) req_size += 2*socklen; else size -= 2*socklen; @@ -2014,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; - if (t->mode == XFRM_MODE_TUNNEL) { + if (t->mode) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -2269,8 +2268,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, - &sel, tmp.security, 1); + xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2332,7 +2330,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (dir >= XFRM_POLICY_MAX) return -EINVAL; - xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, + xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2380,7 +2378,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; - return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); + return xfrm_policy_walk(dump_sp, &data); } static int key_notify_policy_flush(struct km_event *c) @@ -2407,8 +2405,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; - xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); - c.data.type = XFRM_POLICY_TYPE_MAIN; + xfrm_policy_flush(); c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; @@ -2670,9 +2667,6 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { - if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) - return 0; - switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); @@ -2681,8 +2675,6 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: - if (c->data.type != XFRM_POLICY_TYPE_MAIN) - break; return key_notify_policy_flush(c); default: printk("pfkey: Unknown policy event %d\n", c->event); @@ -2716,9 +2708,6 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct #endif int sockaddr_size; int size; - struct sadb_x_sec_ctx *sec_ctx; - struct xfrm_sec_ctx *xfrm_ctx; - int ctx_size = 0; sockaddr_size = pfkey_sockaddr_size(x->props.family); if (!sockaddr_size) @@ -2734,11 +2723,6 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) size += count_esp_combs(t); - if ((xfrm_ctx = x->security)) { - ctx_size = PFKEY_ALIGN8(xfrm_ctx->ctx_len); - size += sizeof(struct sadb_x_sec_ctx) + ctx_size; - } - skb = alloc_skb(size + 16, GFP_ATOMIC); if (skb == NULL) return -ENOMEM; @@ -2834,31 +2818,17 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct else if (x->id.proto == IPPROTO_ESP) dump_esp_combs(skb, t); - /* security context */ - if (xfrm_ctx) { - sec_ctx = (struct sadb_x_sec_ctx *) skb_put(skb, - sizeof(struct sadb_x_sec_ctx) + ctx_size); - sec_ctx->sadb_x_sec_len = - (sizeof(struct sadb_x_sec_ctx) + ctx_size) / sizeof(uint64_t); - sec_ctx->sadb_x_sec_exttype = SADB_X_EXT_SEC_CTX; - sec_ctx->sadb_x_ctx_doi = xfrm_ctx->ctx_doi; - sec_ctx->sadb_x_ctx_alg = xfrm_ctx->ctx_alg; - sec_ctx->sadb_x_ctx_len = xfrm_ctx->ctx_len; - memcpy(sec_ctx + 1, xfrm_ctx->ctx_str, - xfrm_ctx->ctx_len); - } - return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, +static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (sk->sk_family) { + switch (family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2899,7 +2869,7 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = sk->sk_family; + xp->family = family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2915,10 +2885,8 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) { - *dir = -EINVAL; + sec_ctx->sadb_x_sec_len) goto out; - } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2928,11 +2896,6 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, if (*dir) goto out; } - else { - *dir = security_xfrm_sock_policy_alloc(xp, sk); - if (*dir) - goto out; - } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/trunk/net/netfilter/Kconfig b/trunk/net/netfilter/Kconfig index 0a28d2c5c44f..a9894ddfd72a 100644 --- a/trunk/net/netfilter/Kconfig +++ b/trunk/net/netfilter/Kconfig @@ -148,18 +148,6 @@ config NETFILTER_XT_TARGET_CONNMARK . The module will be called ipt_CONNMARK.o. If unsure, say `N'. -config NETFILTER_XT_TARGET_DSCP - tristate '"DSCP" target support' - depends on NETFILTER_XTABLES - depends on IP_NF_MANGLE || IP6_NF_MANGLE - help - This option adds a `DSCP' target, which allows you to manipulate - the IPv4/IPv6 header DSCP field (differentiated services codepoint). - - The DSCP field can have any value between 0x0 and 0x3f inclusive. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_TARGET_MARK tristate '"MARK" target support' depends on NETFILTER_XTABLES @@ -275,17 +263,6 @@ config NETFILTER_XT_MATCH_DCCP If you want to compile it as a module, say M here and read . If unsure, say `N'. -config NETFILTER_XT_MATCH_DSCP - tristate '"DSCP" match support' - depends on NETFILTER_XTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4/IPv6 header DSCP field (differentiated services codepoint). - - The DSCP field can have any value between 0x0 and 0x3f inclusive. - - To compile it as a module, choose M here. If unsure, say N. - config NETFILTER_XT_MATCH_ESP tristate '"ESP" match support' depends on NETFILTER_XTABLES diff --git a/trunk/net/netfilter/Makefile b/trunk/net/netfilter/Makefile index a74be492fd0a..6fa4b7580458 100644 --- a/trunk/net/netfilter/Makefile +++ b/trunk/net/netfilter/Makefile @@ -25,7 +25,6 @@ obj-$(CONFIG_NETFILTER_XTABLES) += x_tables.o xt_tcpudp.o # targets obj-$(CONFIG_NETFILTER_XT_TARGET_CLASSIFY) += xt_CLASSIFY.o obj-$(CONFIG_NETFILTER_XT_TARGET_CONNMARK) += xt_CONNMARK.o -obj-$(CONFIG_NETFILTER_XT_TARGET_DSCP) += xt_DSCP.o obj-$(CONFIG_NETFILTER_XT_TARGET_MARK) += xt_MARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_NOTRACK) += xt_NOTRACK.o @@ -38,7 +37,6 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNMARK) += xt_connmark.o obj-$(CONFIG_NETFILTER_XT_MATCH_CONNTRACK) += xt_conntrack.o obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o -obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o diff --git a/trunk/net/netfilter/core.c b/trunk/net/netfilter/core.c index d80b935b3a92..5d29d5e23624 100644 --- a/trunk/net/netfilter/core.c +++ b/trunk/net/netfilter/core.c @@ -182,7 +182,7 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb, ret = -EPERM; } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) { NFDEBUG("nf_hook: Verdict = QUEUE.\n"); - if (!nf_queue(*pskb, elem, pf, hook, indev, outdev, okfn, + if (!nf_queue(pskb, elem, pf, hook, indev, outdev, okfn, verdict >> NF_VERDICT_BITS)) goto next_hook; } @@ -222,28 +222,6 @@ int skb_make_writable(struct sk_buff **pskb, unsigned int writable_len) } EXPORT_SYMBOL(skb_make_writable); -u_int16_t nf_csum_update(u_int32_t oldval, u_int32_t newval, u_int32_t csum) -{ - u_int32_t diff[] = { oldval, newval }; - - return csum_fold(csum_partial((char *)diff, sizeof(diff), ~csum)); -} -EXPORT_SYMBOL(nf_csum_update); - -u_int16_t nf_proto_csum_update(struct sk_buff *skb, - u_int32_t oldval, u_int32_t newval, - u_int16_t csum, int pseudohdr) -{ - if (skb->ip_summed != CHECKSUM_PARTIAL) { - csum = nf_csum_update(oldval, newval, csum); - if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) - skb->csum = nf_csum_update(oldval, newval, skb->csum); - } else if (pseudohdr) - csum = ~nf_csum_update(oldval, newval, ~csum); - - return csum; -} -EXPORT_SYMBOL(nf_proto_csum_update); /* This does not belong here, but locally generated errors need it if connection tracking in use: without this, connection may not be in hash table, and hence diff --git a/trunk/net/netfilter/nf_conntrack_core.c b/trunk/net/netfilter/nf_conntrack_core.c index 093b3ddc513c..8f2261965a68 100644 --- a/trunk/net/netfilter/nf_conntrack_core.c +++ b/trunk/net/netfilter/nf_conntrack_core.c @@ -57,6 +57,7 @@ #include #include #include +#include #define NF_CONNTRACK_VERSION "0.5.0" @@ -73,17 +74,17 @@ atomic_t nf_conntrack_count = ATOMIC_INIT(0); void (*nf_conntrack_destroyed)(struct nf_conn *conntrack) = NULL; LIST_HEAD(nf_conntrack_expect_list); -struct nf_conntrack_protocol **nf_ct_protos[PF_MAX] __read_mostly; -struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX] __read_mostly; +struct nf_conntrack_protocol **nf_ct_protos[PF_MAX]; +struct nf_conntrack_l3proto *nf_ct_l3protos[PF_MAX]; static LIST_HEAD(helpers); -unsigned int nf_conntrack_htable_size __read_mostly = 0; -int nf_conntrack_max __read_mostly; -struct list_head *nf_conntrack_hash __read_mostly; -static kmem_cache_t *nf_conntrack_expect_cachep __read_mostly; +unsigned int nf_conntrack_htable_size = 0; +int nf_conntrack_max; +struct list_head *nf_conntrack_hash; +static kmem_cache_t *nf_conntrack_expect_cachep; struct nf_conn nf_conntrack_untracked; -unsigned int nf_ct_log_invalid __read_mostly; +unsigned int nf_ct_log_invalid; static LIST_HEAD(unconfirmed); -static int nf_conntrack_vmalloc __read_mostly; +static int nf_conntrack_vmalloc; static unsigned int nf_conntrack_next_id; static unsigned int nf_conntrack_expect_next_id; @@ -538,10 +539,15 @@ void nf_ct_remove_expectations(struct nf_conn *ct) static void clean_from_lists(struct nf_conn *ct) { + unsigned int ho, hr; + DEBUGP("clean_from_lists(%p)\n", ct); ASSERT_WRITE_LOCK(&nf_conntrack_lock); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list); + + ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); + LIST_DELETE(&nf_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); + LIST_DELETE(&nf_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); /* Destroy all pending expectations */ nf_ct_remove_expectations(ct); @@ -611,6 +617,16 @@ static void death_by_timeout(unsigned long ul_conntrack) nf_ct_put(ct); } +static inline int +conntrack_tuple_cmp(const struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack) +{ + ASSERT_READ_LOCK(&nf_conntrack_lock); + return nf_ct_tuplehash_to_ctrack(i) != ignored_conntrack + && nf_ct_tuple_equal(tuple, &i->tuple); +} + struct nf_conntrack_tuple_hash * __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, const struct nf_conn *ignored_conntrack) @@ -620,8 +636,7 @@ __nf_conntrack_find(const struct nf_conntrack_tuple *tuple, ASSERT_READ_LOCK(&nf_conntrack_lock); list_for_each_entry(h, &nf_conntrack_hash[hash], list) { - if (nf_ct_tuplehash_to_ctrack(h) != ignored_conntrack && - nf_ct_tuple_equal(tuple, &h->tuple)) { + if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { NF_CT_STAT_INC(found); return h; } @@ -652,10 +667,10 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct, unsigned int repl_hash) { ct->id = ++nf_conntrack_next_id; - list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list, - &nf_conntrack_hash[hash]); - list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list, - &nf_conntrack_hash[repl_hash]); + list_prepend(&nf_conntrack_hash[hash], + &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + list_prepend(&nf_conntrack_hash[repl_hash], + &ct->tuplehash[IP_CT_DIR_REPLY].list); } void nf_conntrack_hash_insert(struct nf_conn *ct) @@ -675,9 +690,7 @@ int __nf_conntrack_confirm(struct sk_buff **pskb) { unsigned int hash, repl_hash; - struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; - struct nf_conn_help *help; enum ip_conntrack_info ctinfo; ct = nf_ct_get(*pskb, &ctinfo); @@ -707,41 +720,41 @@ __nf_conntrack_confirm(struct sk_buff **pskb) /* See if there's one in the list already, including reverse: NAT could have grabbed it without realizing, since we're not in the hash. If there is, we lost race. */ - list_for_each_entry(h, &nf_conntrack_hash[hash], list) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - &h->tuple)) - goto out; - list_for_each_entry(h, &nf_conntrack_hash[repl_hash], list) - if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, - &h->tuple)) - goto out; - - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); + if (!LIST_FIND(&nf_conntrack_hash[hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) + && !LIST_FIND(&nf_conntrack_hash[repl_hash], + conntrack_tuple_cmp, + struct nf_conntrack_tuple_hash *, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { + struct nf_conn_help *help; + /* Remove from unconfirmed list */ + list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - __nf_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - NF_CT_STAT_INC(insert); - write_unlock_bh(&nf_conntrack_lock); - help = nfct_help(ct); - if (help && help->helper) - nf_conntrack_event_cache(IPCT_HELPER, *pskb); + __nf_conntrack_hash_insert(ct, hash, repl_hash); + /* Timer relative to confirmation time, not original + setting time, otherwise we'd get timer wrap in + weird delay cases. */ + ct->timeout.expires += jiffies; + add_timer(&ct->timeout); + atomic_inc(&ct->ct_general.use); + set_bit(IPS_CONFIRMED_BIT, &ct->status); + NF_CT_STAT_INC(insert); + write_unlock_bh(&nf_conntrack_lock); + help = nfct_help(ct); + if (help && help->helper) + nf_conntrack_event_cache(IPCT_HELPER, *pskb); #ifdef CONFIG_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - nf_conntrack_event_cache(IPCT_NATINFO, *pskb); + if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || + test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_NATINFO, *pskb); #endif - nf_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); - return NF_ACCEPT; + nf_conntrack_event_cache(master_ct(ct) ? + IPCT_RELATED : IPCT_NEW, *pskb); + return NF_ACCEPT; + } -out: NF_CT_STAT_INC(insert_failed); write_unlock_bh(&nf_conntrack_lock); return NF_DROP; @@ -764,21 +777,24 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple, /* There's a small race here where we may free a just-assured connection. Too bad: we're in trouble anyway. */ +static inline int unreplied(const struct nf_conntrack_tuple_hash *i) +{ + return !(test_bit(IPS_ASSURED_BIT, + &nf_ct_tuplehash_to_ctrack(i)->status)); +} + static int early_drop(struct list_head *chain) { /* Traverse backwards: gives us oldest, which is roughly LRU */ struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct = NULL, *tmp; + struct nf_conn *ct = NULL; int dropped = 0; read_lock_bh(&nf_conntrack_lock); - list_for_each_entry_reverse(h, chain, list) { - tmp = nf_ct_tuplehash_to_ctrack(h); - if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) { - ct = tmp; - atomic_inc(&ct->ct_general.use); - break; - } + h = LIST_FIND_B(chain, unreplied, struct nf_conntrack_tuple_hash *); + if (h) { + ct = nf_ct_tuplehash_to_ctrack(h); + atomic_inc(&ct->ct_general.use); } read_unlock_bh(&nf_conntrack_lock); @@ -794,16 +810,18 @@ static int early_drop(struct list_head *chain) return dropped; } +static inline int helper_cmp(const struct nf_conntrack_helper *i, + const struct nf_conntrack_tuple *rtuple) +{ + return nf_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); +} + static struct nf_conntrack_helper * __nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_helper *h; - - list_for_each_entry(h, &helpers, list) { - if (nf_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask)) - return h; - } - return NULL; + return LIST_FIND(&helpers, helper_cmp, + struct nf_conntrack_helper *, + tuple); } struct nf_conntrack_helper * @@ -848,15 +866,11 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, nf_conntrack_hash_rnd_initted = 1; } - /* We don't want any race condition at early drop stage */ - atomic_inc(&nf_conntrack_count); - if (nf_conntrack_max - && atomic_read(&nf_conntrack_count) > nf_conntrack_max) { + && atomic_read(&nf_conntrack_count) >= nf_conntrack_max) { unsigned int hash = hash_conntrack(orig); /* Try dropping from this hash chain. */ if (!early_drop(&nf_conntrack_hash[hash])) { - atomic_dec(&nf_conntrack_count); if (net_ratelimit()) printk(KERN_WARNING "nf_conntrack: table full, dropping" @@ -907,12 +921,10 @@ __nf_conntrack_alloc(const struct nf_conntrack_tuple *orig, init_timer(&conntrack->timeout); conntrack->timeout.data = (unsigned long)conntrack; conntrack->timeout.function = death_by_timeout; - read_unlock_bh(&nf_ct_cache_lock); - return conntrack; + atomic_inc(&nf_conntrack_count); out: read_unlock_bh(&nf_ct_cache_lock); - atomic_dec(&nf_conntrack_count); return conntrack; } @@ -1311,7 +1323,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) return ret; } write_lock_bh(&nf_conntrack_lock); - list_add(&me->list, &helpers); + list_prepend(&helpers, me); write_unlock_bh(&nf_conntrack_lock); return 0; @@ -1330,8 +1342,8 @@ __nf_conntrack_helper_find_byname(const char *name) return NULL; } -static inline void unhelp(struct nf_conntrack_tuple_hash *i, - const struct nf_conntrack_helper *me) +static inline int unhelp(struct nf_conntrack_tuple_hash *i, + const struct nf_conntrack_helper *me) { struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(i); struct nf_conn_help *help = nfct_help(ct); @@ -1340,17 +1352,17 @@ static inline void unhelp(struct nf_conntrack_tuple_hash *i, nf_conntrack_event(IPCT_HELPER, ct); help->helper = NULL; } + return 0; } void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) { unsigned int i; - struct nf_conntrack_tuple_hash *h; struct nf_conntrack_expect *exp, *tmp; /* Need write lock here, to delete helper. */ write_lock_bh(&nf_conntrack_lock); - list_del(&me->list); + LIST_DELETE(&helpers, me); /* Get rid of expectations */ list_for_each_entry_safe(exp, tmp, &nf_conntrack_expect_list, list) { @@ -1362,12 +1374,10 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) } /* Get rid of expecteds, set helpers to NULL. */ - list_for_each_entry(h, &unconfirmed, list) - unhelp(h, me); - for (i = 0; i < nf_conntrack_htable_size; i++) { - list_for_each_entry(h, &nf_conntrack_hash[i], list) - unhelp(h, me); - } + LIST_FIND_W(&unconfirmed, unhelp, struct nf_conntrack_tuple_hash*, me); + for (i = 0; i < nf_conntrack_htable_size; i++) + LIST_FIND_W(&nf_conntrack_hash[i], unhelp, + struct nf_conntrack_tuple_hash *, me); write_unlock_bh(&nf_conntrack_lock); /* Someone could be still looking at the helper in a bh. */ @@ -1500,40 +1510,37 @@ do_iter(const struct nf_conntrack_tuple_hash *i, } /* Bring out ya dead! */ -static struct nf_conn * +static struct nf_conntrack_tuple_hash * get_next_corpse(int (*iter)(struct nf_conn *i, void *data), void *data, unsigned int *bucket) { - struct nf_conntrack_tuple_hash *h; - struct nf_conn *ct; + struct nf_conntrack_tuple_hash *h = NULL; write_lock_bh(&nf_conntrack_lock); for (; *bucket < nf_conntrack_htable_size; (*bucket)++) { - list_for_each_entry(h, &nf_conntrack_hash[*bucket], list) { - ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; - } + h = LIST_FIND_W(&nf_conntrack_hash[*bucket], do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + break; } - list_for_each_entry(h, &unconfirmed, list) { - ct = nf_ct_tuplehash_to_ctrack(h); - if (iter(ct, data)) - goto found; - } - return NULL; -found: - atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); + if (!h) + h = LIST_FIND_W(&unconfirmed, do_iter, + struct nf_conntrack_tuple_hash *, iter, data); + if (h) + atomic_inc(&nf_ct_tuplehash_to_ctrack(h)->ct_general.use); write_unlock_bh(&nf_conntrack_lock); - return ct; + + return h; } void nf_ct_iterate_cleanup(int (*iter)(struct nf_conn *i, void *data), void *data) { - struct nf_conn *ct; + struct nf_conntrack_tuple_hash *h; unsigned int bucket = 0; - while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) { + while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); /* Time to push up daises... */ if (del_timer(&ct->timeout)) death_by_timeout((unsigned long)ct); diff --git a/trunk/net/netfilter/nf_conntrack_ftp.c b/trunk/net/netfilter/nf_conntrack_ftp.c index 0c17a5bd112b..960972d225f9 100644 --- a/trunk/net/netfilter/nf_conntrack_ftp.c +++ b/trunk/net/netfilter/nf_conntrack_ftp.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include @@ -112,14 +111,101 @@ static struct ftp_search { }, }; +/* This code is based on inet_pton() in glibc-2.2.4 */ static int get_ipv6_addr(const char *src, size_t dlen, struct in6_addr *dst, u_int8_t term) { - const char *end; - int ret = in6_pton(src, min_t(size_t, dlen, 0xffff), (u8 *)dst, term, &end); - if (ret > 0) - return (int)(end - src); - return 0; + static const char xdigits[] = "0123456789abcdef"; + u_int8_t tmp[16], *tp, *endp, *colonp; + int ch, saw_xdigit; + u_int32_t val; + size_t clen = 0; + + tp = memset(tmp, '\0', sizeof(tmp)); + endp = tp + sizeof(tmp); + colonp = NULL; + + /* Leading :: requires some special handling. */ + if (*src == ':'){ + if (*++src != ':') { + DEBUGP("invalid \":\" at the head of addr\n"); + return 0; + } + clen++; + } + + saw_xdigit = 0; + val = 0; + while ((clen < dlen) && (*src != term)) { + const char *pch; + + ch = tolower(*src++); + clen++; + + pch = strchr(xdigits, ch); + if (pch != NULL) { + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + + saw_xdigit = 1; + continue; + } + if (ch != ':') { + DEBUGP("get_ipv6_addr: invalid char. \'%c\'\n", ch); + return 0; + } + + if (!saw_xdigit) { + if (colonp) { + DEBUGP("invalid location of \"::\".\n"); + return 0; + } + colonp = tp; + continue; + } else if (*src == term) { + DEBUGP("trancated IPv6 addr\n"); + return 0; + } + + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + + saw_xdigit = 0; + val = 0; + continue; + } + if (saw_xdigit) { + if (tp + 2 > endp) + return 0; + *tp++ = (u_int8_t) (val >> 8) & 0xff; + *tp++ = (u_int8_t) val & 0xff; + } + if (colonp != NULL) { + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + if (tp == endp) + return 0; + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp || (*src != term)) + return 0; + + memcpy(dst->s6_addr, tmp, sizeof(dst->s6_addr)); + return clen; } static int try_number(const char *data, size_t dlen, u_int32_t array[], diff --git a/trunk/net/netfilter/nf_conntrack_netlink.c b/trunk/net/netfilter/nf_conntrack_netlink.c index 1721f7c78c77..6527d4e048d8 100644 --- a/trunk/net/netfilter/nf_conntrack_netlink.c +++ b/trunk/net/netfilter/nf_conntrack_netlink.c @@ -339,7 +339,11 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, /* dump everything */ events = ~0UL; group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | IPCT_PROTOINFO)) { + } else if (events & (IPCT_STATUS | + IPCT_PROTOINFO | + IPCT_HELPER | + IPCT_HELPINFO | + IPCT_NATINFO)) { type = IPCTNL_MSG_CT_NEW; group = NFNLGRP_CONNTRACK_UPDATE; } else @@ -391,10 +395,6 @@ static int ctnetlink_conntrack_event(struct notifier_block *this, ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) goto nfattr_failure; - if (events & IPCT_MARK - && ctnetlink_dump_mark(skb, ct) < 0) - goto nfattr_failure; - nlh->nlmsg_len = skb->tail - b; nfnetlink_send(skb, 0, group, 0); return NOTIFY_DONE; @@ -455,11 +455,6 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) cb->args[1] = (unsigned long)ct; goto out; } -#ifdef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) - memset(&ct->counters, 0, sizeof(ct->counters)); -#endif } if (cb->args[1]) { cb->args[1] = 0; @@ -475,6 +470,50 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } +#ifdef CONFIG_NF_CT_ACCT +static int +ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_conn *ct = NULL; + struct nf_conntrack_tuple_hash *h; + struct list_head *i; + u_int32_t *id = (u_int32_t *) &cb->args[1]; + struct nfgenmsg *nfmsg = NLMSG_DATA(cb->nlh); + u_int8_t l3proto = nfmsg->nfgen_family; + + DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, + cb->args[0], *id); + + write_lock_bh(&nf_conntrack_lock); + for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++, *id = 0) { + list_for_each_prev(i, &nf_conntrack_hash[cb->args[0]]) { + h = (struct nf_conntrack_tuple_hash *) i; + if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) + continue; + ct = nf_ct_tuplehash_to_ctrack(h); + if (l3proto && L3PROTO(ct) != l3proto) + continue; + if (ct->id <= *id) + continue; + if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + IPCTNL_MSG_CT_NEW, + 1, ct) < 0) + goto out; + *id = ct->id; + + memset(&ct->counters, 0, sizeof(ct->counters)); + } + } +out: + write_unlock_bh(&nf_conntrack_lock); + + DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); + + return skb->len; +} +#endif + static inline int ctnetlink_parse_tuple_ip(struct nfattr *attr, struct nf_conntrack_tuple *tuple) { @@ -749,14 +788,22 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_DUMP) { u32 rlen; -#ifndef CONFIG_NF_CT_ACCT - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == IPCTNL_MSG_CT_GET_CTRZERO) + if (NFNL_MSG_TYPE(nlh->nlmsg_type) == + IPCTNL_MSG_CT_GET_CTRZERO) { +#ifdef CONFIG_NF_CT_ACCT + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table_w, + ctnetlink_done)) != 0) + return -EINVAL; +#else return -ENOTSUPP; #endif - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) + } else { + if ((*errp = netlink_dump_start(ctnl, skb, nlh, + ctnetlink_dump_table, + ctnetlink_done)) != 0) return -EINVAL; + } rlen = NLMSG_ALIGN(nlh->nlmsg_len); if (rlen > skb->len) @@ -1227,9 +1274,6 @@ static int ctnetlink_expect_event(struct notifier_block *this, } else return NOTIFY_DONE; - if (!nfnetlink_has_listeners(NFNLGRP_CONNTRACK_EXP_NEW)) - return NOTIFY_DONE; - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); if (!skb) return NOTIFY_DONE; diff --git a/trunk/net/netfilter/nf_conntrack_proto_generic.c b/trunk/net/netfilter/nf_conntrack_proto_generic.c index 26408bb0955b..46bc27e2756d 100644 --- a/trunk/net/netfilter/nf_conntrack_proto_generic.c +++ b/trunk/net/netfilter/nf_conntrack_proto_generic.c @@ -17,7 +17,7 @@ #include #include -unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; +unsigned int nf_ct_generic_timeout = 600*HZ; static int generic_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, diff --git a/trunk/net/netfilter/nf_conntrack_proto_sctp.c b/trunk/net/netfilter/nf_conntrack_proto_sctp.c index af568777372b..9bd8a7877fd5 100644 --- a/trunk/net/netfilter/nf_conntrack_proto_sctp.c +++ b/trunk/net/netfilter/nf_conntrack_proto_sctp.c @@ -64,13 +64,13 @@ static const char *sctp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -static unsigned int nf_ct_sctp_timeout_closed __read_mostly = 10 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_wait __read_mostly = 3 SECS; -static unsigned int nf_ct_sctp_timeout_cookie_echoed __read_mostly = 3 SECS; -static unsigned int nf_ct_sctp_timeout_established __read_mostly = 5 DAYS; -static unsigned int nf_ct_sctp_timeout_shutdown_sent __read_mostly = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_recd __read_mostly = 300 SECS / 1000; -static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent __read_mostly = 3 SECS; +static unsigned int nf_ct_sctp_timeout_closed = 10 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_wait = 3 SECS; +static unsigned int nf_ct_sctp_timeout_cookie_echoed = 3 SECS; +static unsigned int nf_ct_sctp_timeout_established = 5 DAYS; +static unsigned int nf_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; +static unsigned int nf_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; static unsigned int * sctp_timeouts[] = { NULL, /* SCTP_CONNTRACK_NONE */ diff --git a/trunk/net/netfilter/nf_conntrack_proto_tcp.c b/trunk/net/netfilter/nf_conntrack_proto_tcp.c index 238bbb5b72ef..af8adcba23a7 100644 --- a/trunk/net/netfilter/nf_conntrack_proto_tcp.c +++ b/trunk/net/netfilter/nf_conntrack_proto_tcp.c @@ -57,19 +57,19 @@ static DEFINE_RWLOCK(tcp_lock); /* "Be conservative in what you do, be liberal in what you accept from others." If it's non-zero, we mark only out of window RST segments as INVALID. */ -int nf_ct_tcp_be_liberal __read_mostly = 0; +int nf_ct_tcp_be_liberal = 0; /* When connection is picked up from the middle, how many packets are required to pass in each direction when we assume we are in sync - if any side uses window scaling, we lost the game. If it is set to zero, we disable picking up already established connections. */ -int nf_ct_tcp_loose __read_mostly = 3; +int nf_ct_tcp_loose = 3; /* Max number of the retransmitted packets without receiving an (acceptable) ACK from the destination. If this number is reached, a shorter timer will be started. */ -int nf_ct_tcp_max_retrans __read_mostly = 3; +int nf_ct_tcp_max_retrans = 3; /* FIXME: Examine ipfilter's timeouts and conntrack transitions more closely. They're more complex. --RR */ @@ -92,19 +92,19 @@ static const char *tcp_conntrack_names[] = { #define HOURS * 60 MINS #define DAYS * 24 HOURS -unsigned int nf_ct_tcp_timeout_syn_sent __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_syn_recv __read_mostly = 60 SECS; -unsigned int nf_ct_tcp_timeout_established __read_mostly = 5 DAYS; -unsigned int nf_ct_tcp_timeout_fin_wait __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_close_wait __read_mostly = 60 SECS; -unsigned int nf_ct_tcp_timeout_last_ack __read_mostly = 30 SECS; -unsigned int nf_ct_tcp_timeout_time_wait __read_mostly = 2 MINS; -unsigned int nf_ct_tcp_timeout_close __read_mostly = 10 SECS; +unsigned int nf_ct_tcp_timeout_syn_sent = 2 MINS; +unsigned int nf_ct_tcp_timeout_syn_recv = 60 SECS; +unsigned int nf_ct_tcp_timeout_established = 5 DAYS; +unsigned int nf_ct_tcp_timeout_fin_wait = 2 MINS; +unsigned int nf_ct_tcp_timeout_close_wait = 60 SECS; +unsigned int nf_ct_tcp_timeout_last_ack = 30 SECS; +unsigned int nf_ct_tcp_timeout_time_wait = 2 MINS; +unsigned int nf_ct_tcp_timeout_close = 10 SECS; /* RFC1122 says the R2 limit should be at least 100 seconds. Linux uses 15 packets as limit, which corresponds to ~13-30min depending on RTO. */ -unsigned int nf_ct_tcp_timeout_max_retrans __read_mostly = 5 MINS; +unsigned int nf_ct_tcp_timeout_max_retrans = 5 MINS; static unsigned int * tcp_timeouts[] = { NULL, /* TCP_CONNTRACK_NONE */ @@ -688,15 +688,13 @@ static int tcp_in_window(struct ip_ct_tcp *state, if (state->last_dir == dir && state->last_seq == seq && state->last_ack == ack - && state->last_end == end - && state->last_win == win) + && state->last_end == end) state->retrans++; else { state->last_dir = dir; state->last_seq = seq; state->last_ack = ack; state->last_end = end; - state->last_win = win; state->retrans = 0; } } @@ -825,7 +823,8 @@ static int tcp_error(struct sk_buff *skb, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the checksum is assumed to be correct. + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. */ /* FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && diff --git a/trunk/net/netfilter/nf_conntrack_proto_udp.c b/trunk/net/netfilter/nf_conntrack_proto_udp.c index d28981cf9af5..ae07ebe3ab37 100644 --- a/trunk/net/netfilter/nf_conntrack_proto_udp.c +++ b/trunk/net/netfilter/nf_conntrack_proto_udp.c @@ -27,8 +27,8 @@ #include #include -unsigned int nf_ct_udp_timeout __read_mostly = 30*HZ; -unsigned int nf_ct_udp_timeout_stream __read_mostly = 180*HZ; +unsigned int nf_ct_udp_timeout = 30*HZ; +unsigned int nf_ct_udp_timeout_stream = 180*HZ; static int udp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, @@ -131,7 +131,8 @@ static int udp_error(struct sk_buff *skb, unsigned int dataoff, /* Checksum invalid? Ignore. * We skip checking packets on the outgoing path - * because the checksum is assumed to be correct. + * because the semantic of CHECKSUM_HW is different there + * and moreover root might send raw packets. * FIXME: Source route IP option packets --RR */ if (nf_conntrack_checksum && ((pf == PF_INET && hooknum == NF_IP_PRE_ROUTING) || diff --git a/trunk/net/netfilter/nf_conntrack_standalone.c b/trunk/net/netfilter/nf_conntrack_standalone.c index 5954f6773810..4ef836699962 100644 --- a/trunk/net/netfilter/nf_conntrack_standalone.c +++ b/trunk/net/netfilter/nf_conntrack_standalone.c @@ -37,6 +37,7 @@ #include #include #include +#include #if 0 #define DEBUGP printk @@ -427,7 +428,7 @@ static struct file_operations ct_cpu_seq_fops = { /* Sysctl support */ -int nf_conntrack_checksum __read_mostly = 1; +int nf_conntrack_checksum = 1; #ifdef CONFIG_SYSCTL diff --git a/trunk/net/netfilter/nf_internals.h b/trunk/net/netfilter/nf_internals.h index a981971ce1d5..86e392bfe833 100644 --- a/trunk/net/netfilter/nf_internals.h +++ b/trunk/net/netfilter/nf_internals.h @@ -23,7 +23,7 @@ extern unsigned int nf_iterate(struct list_head *head, int hook_thresh); /* nf_queue.c */ -extern int nf_queue(struct sk_buff *skb, +extern int nf_queue(struct sk_buff **skb, struct list_head *elem, int pf, unsigned int hook, struct net_device *indev, diff --git a/trunk/net/netfilter/nf_queue.c b/trunk/net/netfilter/nf_queue.c index 4d8936ed581d..662a869593bf 100644 --- a/trunk/net/netfilter/nf_queue.c +++ b/trunk/net/netfilter/nf_queue.c @@ -74,13 +74,13 @@ EXPORT_SYMBOL_GPL(nf_unregister_queue_handlers); * Any packet that leaves via this function must come back * through nf_reinject(). */ -static int __nf_queue(struct sk_buff *skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) +int nf_queue(struct sk_buff **skb, + struct list_head *elem, + int pf, unsigned int hook, + struct net_device *indev, + struct net_device *outdev, + int (*okfn)(struct sk_buff *), + unsigned int queuenum) { int status; struct nf_info *info; @@ -94,14 +94,14 @@ static int __nf_queue(struct sk_buff *skb, read_lock(&queue_handler_lock); if (!queue_handler[pf]) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } afinfo = nf_get_afinfo(pf); if (!afinfo) { read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -109,9 +109,9 @@ static int __nf_queue(struct sk_buff *skb, if (!info) { if (net_ratelimit()) printk(KERN_ERR "OOM queueing packet %p\n", - skb); + *skb); read_unlock(&queue_handler_lock); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -130,15 +130,15 @@ static int __nf_queue(struct sk_buff *skb, if (outdev) dev_hold(outdev); #ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - physindev = skb->nf_bridge->physindev; + if ((*skb)->nf_bridge) { + physindev = (*skb)->nf_bridge->physindev; if (physindev) dev_hold(physindev); - physoutdev = skb->nf_bridge->physoutdev; + physoutdev = (*skb)->nf_bridge->physoutdev; if (physoutdev) dev_hold(physoutdev); } #endif - afinfo->saveroute(skb, info); - status = queue_handler[pf]->outfn(skb, info, queuenum, + afinfo->saveroute(*skb, info); + status = queue_handler[pf]->outfn(*skb, info, queuenum, queue_handler[pf]->data); read_unlock(&queue_handler_lock); @@ -153,7 +153,7 @@ static int __nf_queue(struct sk_buff *skb, #endif module_put(info->elem->owner); kfree(info); - kfree_skb(skb); + kfree_skb(*skb); return 1; } @@ -161,46 +161,6 @@ static int __nf_queue(struct sk_buff *skb, return 1; } -int nf_queue(struct sk_buff *skb, - struct list_head *elem, - int pf, unsigned int hook, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sk_buff *), - unsigned int queuenum) -{ - struct sk_buff *segs; - - if (!skb_is_gso(skb)) - return __nf_queue(skb, elem, pf, hook, indev, outdev, okfn, - queuenum); - - switch (pf) { - case AF_INET: - skb->protocol = htons(ETH_P_IP); - break; - case AF_INET6: - skb->protocol = htons(ETH_P_IPV6); - break; - } - - segs = skb_gso_segment(skb, 0); - kfree_skb(skb); - if (unlikely(IS_ERR(segs))) - return 1; - - do { - struct sk_buff *nskb = segs->next; - - segs->next = NULL; - if (!__nf_queue(segs, elem, pf, hook, indev, outdev, okfn, - queuenum)) - kfree_skb(segs); - segs = nskb; - } while (segs); - return 1; -} - void nf_reinject(struct sk_buff *skb, struct nf_info *info, unsigned int verdict) { @@ -264,9 +224,9 @@ void nf_reinject(struct sk_buff *skb, struct nf_info *info, case NF_STOLEN: break; case NF_QUEUE: - if (!__nf_queue(skb, elem, info->pf, info->hook, - info->indev, info->outdev, info->okfn, - verdict >> NF_VERDICT_BITS)) + if (!nf_queue(&skb, elem, info->pf, info->hook, + info->indev, info->outdev, info->okfn, + verdict >> NF_VERDICT_BITS)) goto next_hook; break; default: diff --git a/trunk/net/netfilter/nfnetlink_queue.c b/trunk/net/netfilter/nfnetlink_queue.c index 8eb2473d83e1..49ef41e34c48 100644 --- a/trunk/net/netfilter/nfnetlink_queue.c +++ b/trunk/net/netfilter/nfnetlink_queue.c @@ -377,9 +377,9 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue, break; case NFQNL_COPY_PACKET: - if ((entskb->ip_summed == CHECKSUM_PARTIAL || - entskb->ip_summed == CHECKSUM_COMPLETE) && - (*errp = skb_checksum_help(entskb))) { + if (entskb->ip_summed == CHECKSUM_HW && + (*errp = skb_checksum_help(entskb, + outdev == NULL))) { spin_unlock_bh(&queue->lock); return NULL; } @@ -584,7 +584,7 @@ nfqnl_enqueue_packet(struct sk_buff *skb, struct nf_info *info, queue->queue_dropped++; status = -ENOSPC; if (net_ratelimit()) - printk(KERN_WARNING "nf_queue: full at %d entries, " + printk(KERN_WARNING "ip_queue: full at %d entries, " "dropping packets(s). Dropped: %d\n", queue->queue_total, queue->queue_dropped); goto err_out_free_nskb; @@ -635,7 +635,7 @@ nfqnl_mangle(void *data, int data_len, struct nfqnl_queue_entry *e) diff, GFP_ATOMIC); if (newskb == NULL) { - printk(KERN_WARNING "nf_queue: OOM " + printk(KERN_WARNING "ip_queue: OOM " "in mangle, dropping packet\n"); return -ENOMEM; } diff --git a/trunk/net/netfilter/x_tables.c b/trunk/net/netfilter/x_tables.c index 58522fc65d33..174e8f970095 100644 --- a/trunk/net/netfilter/x_tables.c +++ b/trunk/net/netfilter/x_tables.c @@ -81,41 +81,11 @@ xt_unregister_target(struct xt_target *target) int af = target->family; mutex_lock(&xt[af].mutex); - list_del(&target->list); + LIST_DELETE(&xt[af].target, target); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_target); -int -xt_register_targets(struct xt_target *target, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = xt_register_target(&target[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - xt_unregister_targets(target, i); - return err; -} -EXPORT_SYMBOL(xt_register_targets); - -void -xt_unregister_targets(struct xt_target *target, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; i++) - xt_unregister_target(&target[i]); -} -EXPORT_SYMBOL(xt_unregister_targets); - int xt_register_match(struct xt_match *match) { @@ -138,41 +108,11 @@ xt_unregister_match(struct xt_match *match) int af = match->family; mutex_lock(&xt[af].mutex); - list_del(&match->list); + LIST_DELETE(&xt[af].match, match); mutex_unlock(&xt[af].mutex); } EXPORT_SYMBOL(xt_unregister_match); -int -xt_register_matches(struct xt_match *match, unsigned int n) -{ - unsigned int i; - int err = 0; - - for (i = 0; i < n; i++) { - err = xt_register_match(&match[i]); - if (err) - goto err; - } - return err; - -err: - if (i > 0) - xt_unregister_matches(match, i); - return err; -} -EXPORT_SYMBOL(xt_register_matches); - -void -xt_unregister_matches(struct xt_match *match, unsigned int n) -{ - unsigned int i; - - for (i = 0; i < n; i++) - xt_unregister_match(&match[i]); -} -EXPORT_SYMBOL(xt_unregister_matches); - /* * These are weird, but module loading must not be done with mutex @@ -333,65 +273,52 @@ int xt_check_match(const struct xt_match *match, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_match); #ifdef CONFIG_COMPAT -int xt_compat_match_offset(struct xt_match *match) -{ - u_int16_t csize = match->compatsize ? : match->matchsize; - return XT_ALIGN(match->matchsize) - COMPAT_XT_ALIGN(csize); -} -EXPORT_SYMBOL_GPL(xt_compat_match_offset); - -void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, - int *size) -{ - struct xt_match *match = m->u.kernel.match; - struct compat_xt_entry_match *cm = (struct compat_xt_entry_match *)m; - int pad, off = xt_compat_match_offset(match); - u_int16_t msize = cm->u.user.match_size; - - m = *dstptr; - memcpy(m, cm, sizeof(*cm)); - if (match->compat_from_user) - match->compat_from_user(m->data, cm->data); - else - memcpy(m->data, cm->data, msize - sizeof(*cm)); - pad = XT_ALIGN(match->matchsize) - match->matchsize; - if (pad > 0) - memset(m->data + match->matchsize, 0, pad); - - msize += off; - m->u.user.match_size = msize; - - *size += off; - *dstptr += msize; -} -EXPORT_SYMBOL_GPL(xt_compat_match_from_user); - -int xt_compat_match_to_user(struct xt_entry_match *m, void __user **dstptr, - int *size) +int xt_compat_match(void *match, void **dstptr, int *size, int convert) { - struct xt_match *match = m->u.kernel.match; - struct compat_xt_entry_match __user *cm = *dstptr; - int off = xt_compat_match_offset(match); - u_int16_t msize = m->u.user.match_size - off; - - if (copy_to_user(cm, m, sizeof(*cm)) || - put_user(msize, &cm->u.user.match_size)) - return -EFAULT; + struct xt_match *m; + struct compat_xt_entry_match *pcompat_m; + struct xt_entry_match *pm; + u_int16_t msize; + int off, ret; - if (match->compat_to_user) { - if (match->compat_to_user((void __user *)cm->data, m->data)) - return -EFAULT; - } else { - if (copy_to_user(cm->data, m->data, msize - sizeof(*cm))) - return -EFAULT; + ret = 0; + m = ((struct xt_entry_match *)match)->u.kernel.match; + off = XT_ALIGN(m->matchsize) - COMPAT_XT_ALIGN(m->matchsize); + switch (convert) { + case COMPAT_TO_USER: + pm = (struct xt_entry_match *)match; + msize = pm->u.user.match_size; + if (copy_to_user(*dstptr, pm, msize)) { + ret = -EFAULT; + break; + } + msize -= off; + if (put_user(msize, (u_int16_t *)*dstptr)) + ret = -EFAULT; + *size -= off; + *dstptr += msize; + break; + case COMPAT_FROM_USER: + pcompat_m = (struct compat_xt_entry_match *)match; + pm = (struct xt_entry_match *)*dstptr; + msize = pcompat_m->u.user.match_size; + memcpy(pm, pcompat_m, msize); + msize += off; + pm->u.user.match_size = msize; + *size += off; + *dstptr += msize; + break; + case COMPAT_CALC_SIZE: + *size += off; + break; + default: + ret = -ENOPROTOOPT; + break; } - - *size -= off; - *dstptr += msize; - return 0; + return ret; } -EXPORT_SYMBOL_GPL(xt_compat_match_to_user); -#endif /* CONFIG_COMPAT */ +EXPORT_SYMBOL_GPL(xt_compat_match); +#endif int xt_check_target(const struct xt_target *target, unsigned short family, unsigned int size, const char *table, unsigned int hook_mask, @@ -423,64 +350,51 @@ int xt_check_target(const struct xt_target *target, unsigned short family, EXPORT_SYMBOL_GPL(xt_check_target); #ifdef CONFIG_COMPAT -int xt_compat_target_offset(struct xt_target *target) +int xt_compat_target(void *target, void **dstptr, int *size, int convert) { - u_int16_t csize = target->compatsize ? : target->targetsize; - return XT_ALIGN(target->targetsize) - COMPAT_XT_ALIGN(csize); -} -EXPORT_SYMBOL_GPL(xt_compat_target_offset); - -void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, - int *size) -{ - struct xt_target *target = t->u.kernel.target; - struct compat_xt_entry_target *ct = (struct compat_xt_entry_target *)t; - int pad, off = xt_compat_target_offset(target); - u_int16_t tsize = ct->u.user.target_size; - - t = *dstptr; - memcpy(t, ct, sizeof(*ct)); - if (target->compat_from_user) - target->compat_from_user(t->data, ct->data); - else - memcpy(t->data, ct->data, tsize - sizeof(*ct)); - pad = XT_ALIGN(target->targetsize) - target->targetsize; - if (pad > 0) - memset(t->data + target->targetsize, 0, pad); - - tsize += off; - t->u.user.target_size = tsize; - - *size += off; - *dstptr += tsize; -} -EXPORT_SYMBOL_GPL(xt_compat_target_from_user); - -int xt_compat_target_to_user(struct xt_entry_target *t, void __user **dstptr, - int *size) -{ - struct xt_target *target = t->u.kernel.target; - struct compat_xt_entry_target __user *ct = *dstptr; - int off = xt_compat_target_offset(target); - u_int16_t tsize = t->u.user.target_size - off; - - if (copy_to_user(ct, t, sizeof(*ct)) || - put_user(tsize, &ct->u.user.target_size)) - return -EFAULT; + struct xt_target *t; + struct compat_xt_entry_target *pcompat; + struct xt_entry_target *pt; + u_int16_t tsize; + int off, ret; - if (target->compat_to_user) { - if (target->compat_to_user((void __user *)ct->data, t->data)) - return -EFAULT; - } else { - if (copy_to_user(ct->data, t->data, tsize - sizeof(*ct))) - return -EFAULT; + ret = 0; + t = ((struct xt_entry_target *)target)->u.kernel.target; + off = XT_ALIGN(t->targetsize) - COMPAT_XT_ALIGN(t->targetsize); + switch (convert) { + case COMPAT_TO_USER: + pt = (struct xt_entry_target *)target; + tsize = pt->u.user.target_size; + if (copy_to_user(*dstptr, pt, tsize)) { + ret = -EFAULT; + break; + } + tsize -= off; + if (put_user(tsize, (u_int16_t *)*dstptr)) + ret = -EFAULT; + *size -= off; + *dstptr += tsize; + break; + case COMPAT_FROM_USER: + pcompat = (struct compat_xt_entry_target *)target; + pt = (struct xt_entry_target *)*dstptr; + tsize = pcompat->u.user.target_size; + memcpy(pt, pcompat, tsize); + tsize += off; + pt->u.user.target_size = tsize; + *size += off; + *dstptr += tsize; + break; + case COMPAT_CALC_SIZE: + *size += off; + break; + default: + ret = -ENOPROTOOPT; + break; } - - *size -= off; - *dstptr += tsize; - return 0; + return ret; } -EXPORT_SYMBOL_GPL(xt_compat_target_to_user); +EXPORT_SYMBOL_GPL(xt_compat_target); #endif struct xt_table_info *xt_alloc_table_info(unsigned int size) @@ -601,18 +515,15 @@ int xt_register_table(struct xt_table *table, { int ret; struct xt_table_info *private; - struct xt_table *t; ret = mutex_lock_interruptible(&xt[table->af].mutex); if (ret != 0) return ret; /* Don't autoload: we'd eat our tail... */ - list_for_each_entry(t, &xt[table->af].tables, list) { - if (strcmp(t->name, table->name) == 0) { - ret = -EEXIST; - goto unlock; - } + if (list_named_find(&xt[table->af].tables, table->name)) { + ret = -EEXIST; + goto unlock; } /* Simplifies replace_table code. */ @@ -627,7 +538,7 @@ int xt_register_table(struct xt_table *table, /* save number of initial entries */ private->initial_entries = private->number; - list_add(&table->list, &xt[table->af].tables); + list_prepend(&xt[table->af].tables, table); ret = 0; unlock: @@ -642,7 +553,7 @@ void *xt_unregister_table(struct xt_table *table) mutex_lock(&xt[table->af].mutex); private = table->private; - list_del(&table->list); + LIST_DELETE(&xt[table->af].tables, table); mutex_unlock(&xt[table->af].mutex); return private; diff --git a/trunk/net/netfilter/xt_CLASSIFY.c b/trunk/net/netfilter/xt_CLASSIFY.c index 50de965bb104..e54e57730012 100644 --- a/trunk/net/netfilter/xt_CLASSIFY.c +++ b/trunk/net/netfilter/xt_CLASSIFY.c @@ -29,7 +29,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct xt_classify_target_info *clinfo = targinfo; @@ -39,41 +40,47 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_classify_target[] = { - { - .family = AF_INET, - .name = "CLASSIFY", - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | - (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .me = THIS_MODULE, - }, - { - .name = "CLASSIFY", - .family = AF_INET6, - .target = target, - .targetsize = sizeof(struct xt_classify_target_info), - .table = "mangle", - .hooks = (1 << NF_IP_LOCAL_OUT) | - (1 << NF_IP_FORWARD) | - (1 << NF_IP_POST_ROUTING), - .me = THIS_MODULE, - }, +static struct xt_target classify_reg = { + .name = "CLASSIFY", + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .family = AF_INET, + .me = THIS_MODULE, }; +static struct xt_target classify6_reg = { + .name = "CLASSIFY", + .target = target, + .targetsize = sizeof(struct xt_classify_target_info), + .table = "mangle", + .hooks = (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_FORWARD) | + (1 << NF_IP_POST_ROUTING), + .family = AF_INET6, + .me = THIS_MODULE, +}; + static int __init xt_classify_init(void) { - return xt_register_targets(xt_classify_target, - ARRAY_SIZE(xt_classify_target)); + int ret; + + ret = xt_register_target(&classify_reg); + if (ret) + return ret; + + ret = xt_register_target(&classify6_reg); + if (ret) + xt_unregister_target(&classify_reg); + + return ret; } static void __exit xt_classify_fini(void) { - xt_unregister_targets(xt_classify_target, - ARRAY_SIZE(xt_classify_target)); + xt_unregister_target(&classify_reg); + xt_unregister_target(&classify6_reg); } module_init(xt_classify_init); diff --git a/trunk/net/netfilter/xt_CONNMARK.c b/trunk/net/netfilter/xt_CONNMARK.c index c01524f817f0..60c375d36f01 100644 --- a/trunk/net/netfilter/xt_CONNMARK.c +++ b/trunk/net/netfilter/xt_CONNMARK.c @@ -38,7 +38,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct xt_connmark_target_info *markinfo = targinfo; u_int32_t diff; @@ -48,37 +49,24 @@ target(struct sk_buff **pskb, u_int32_t *ctmark = nf_ct_get_mark(*pskb, &ctinfo); if (ctmark) { - switch(markinfo->mode) { - case XT_CONNMARK_SET: - newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; - if (newmark != *ctmark) { - *ctmark = newmark; -#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) - ip_conntrack_event_cache(IPCT_MARK, *pskb); -#else - nf_conntrack_event_cache(IPCT_MARK, *pskb); -#endif - } - break; - case XT_CONNMARK_SAVE: - newmark = (*ctmark & ~markinfo->mask) | - ((*pskb)->nfmark & markinfo->mask); - if (*ctmark != newmark) { - *ctmark = newmark; -#if defined(CONFIG_IP_NF_CONNTRACK) || defined(CONFIG_IP_NF_CONNTRACK_MODULE) - ip_conntrack_event_cache(IPCT_MARK, *pskb); -#else - nf_conntrack_event_cache(IPCT_MARK, *pskb); -#endif - } - break; - case XT_CONNMARK_RESTORE: - nfmark = (*pskb)->nfmark; - diff = (*ctmark ^ nfmark) & markinfo->mask; - if (diff != 0) - (*pskb)->nfmark = nfmark ^ diff; - break; - } + switch(markinfo->mode) { + case XT_CONNMARK_SET: + newmark = (*ctmark & ~markinfo->mask) | markinfo->mark; + if (newmark != *ctmark) + *ctmark = newmark; + break; + case XT_CONNMARK_SAVE: + newmark = (*ctmark & ~markinfo->mask) | ((*pskb)->nfmark & markinfo->mask); + if (*ctmark != newmark) + *ctmark = newmark; + break; + case XT_CONNMARK_RESTORE: + nfmark = (*pskb)->nfmark; + diff = (*ctmark ^ nfmark) & markinfo->mask; + if (diff != 0) + (*pskb)->nfmark = nfmark ^ diff; + break; + } } return XT_CONTINUE; @@ -89,91 +77,65 @@ checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct xt_connmark_target_info *matchinfo = targinfo; if (matchinfo->mode == XT_CONNMARK_RESTORE) { - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "CONNMARK: restore can only be " - "called from \"mangle\" table, not \"%s\"\n", - tablename); - return 0; - } + if (strcmp(tablename, "mangle") != 0) { + printk(KERN_WARNING "CONNMARK: restore can only be called from \"mangle\" table, not \"%s\"\n", tablename); + return 0; + } } + if (matchinfo->mark > 0xffffffff || matchinfo->mask > 0xffffffff) { printk(KERN_WARNING "CONNMARK: Only supports 32bit mark\n"); return 0; } + return 1; } -#ifdef CONFIG_COMPAT -struct compat_xt_connmark_target_info { - compat_ulong_t mark, mask; - u_int8_t mode; - u_int8_t __pad1; - u_int16_t __pad2; +static struct xt_target connmark_reg = { + .name = "CONNMARK", + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE }; -static void compat_from_user(void *dst, void *src) -{ - struct compat_xt_connmark_target_info *cm = src; - struct xt_connmark_target_info m = { - .mark = cm->mark, - .mask = cm->mask, - .mode = cm->mode, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int compat_to_user(void __user *dst, void *src) -{ - struct xt_connmark_target_info *m = src; - struct compat_xt_connmark_target_info cm = { - .mark = m->mark, - .mask = m->mask, - .mode = m->mode, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target xt_connmark_target[] = { - { - .name = "CONNMARK", - .family = AF_INET, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_target_info), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, -#endif - .me = THIS_MODULE - }, - { - .name = "CONNMARK", - .family = AF_INET6, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_connmark_target_info), - .me = THIS_MODULE - }, +static struct xt_target connmark6_reg = { + .name = "CONNMARK", + .target = target, + .targetsize = sizeof(struct xt_connmark_target_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_connmark_init(void) { + int ret; + need_conntrack(); - return xt_register_targets(xt_connmark_target, - ARRAY_SIZE(xt_connmark_target)); + + ret = xt_register_target(&connmark_reg); + if (ret) + return ret; + + ret = xt_register_target(&connmark6_reg); + if (ret) + xt_unregister_target(&connmark_reg); + + return ret; } static void __exit xt_connmark_fini(void) { - xt_unregister_targets(xt_connmark_target, - ARRAY_SIZE(xt_connmark_target)); + xt_unregister_target(&connmark_reg); + xt_unregister_target(&connmark6_reg); } module_init(xt_connmark_init); diff --git a/trunk/net/netfilter/xt_CONNSECMARK.c b/trunk/net/netfilter/xt_CONNSECMARK.c index 467386266674..8c011e020769 100644 --- a/trunk/net/netfilter/xt_CONNSECMARK.c +++ b/trunk/net/netfilter/xt_CONNSECMARK.c @@ -66,7 +66,7 @@ static void secmark_restore(struct sk_buff *skb) static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { struct sk_buff *skb = *pskb; const struct xt_connsecmark_target_info *info = targinfo; @@ -89,7 +89,7 @@ static unsigned int target(struct sk_buff **pskb, const struct net_device *in, static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int hook_mask) + unsigned int targinfosize, unsigned int hook_mask) { struct xt_connsecmark_target_info *info = targinfo; @@ -106,38 +106,49 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target xt_connsecmark_target[] = { - { - .name = "CONNSECMARK", - .family = AF_INET, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "CONNSECMARK", - .family = AF_INET6, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_connsecmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, +static struct xt_target ipt_connsecmark_reg = { + .name = "CONNSECMARK", + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 0, +}; + +static struct xt_target ip6t_connsecmark_reg = { + .name = "CONNSECMARK", + .target = target, + .targetsize = sizeof(struct xt_connsecmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET6, + .revision = 0, }; static int __init xt_connsecmark_init(void) { + int err; + need_conntrack(); - return xt_register_targets(xt_connsecmark_target, - ARRAY_SIZE(xt_connsecmark_target)); + + err = xt_register_target(&ipt_connsecmark_reg); + if (err) + return err; + + err = xt_register_target(&ip6t_connsecmark_reg); + if (err) + xt_unregister_target(&ipt_connsecmark_reg); + + return err; } static void __exit xt_connsecmark_fini(void) { - xt_unregister_targets(xt_connsecmark_target, - ARRAY_SIZE(xt_connsecmark_target)); + xt_unregister_target(&ip6t_connsecmark_reg); + xt_unregister_target(&ipt_connsecmark_reg); } module_init(xt_connsecmark_init); diff --git a/trunk/net/netfilter/xt_DSCP.c b/trunk/net/netfilter/xt_DSCP.c deleted file mode 100644 index a7cc75aeb38d..000000000000 --- a/trunk/net/netfilter/xt_DSCP.c +++ /dev/null @@ -1,118 +0,0 @@ -/* x_tables module for setting the IPv4/IPv6 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * xt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("x_tables DSCP modification module"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_DSCP"); -MODULE_ALIAS("ip6t_DSCP"); - -static unsigned int target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -{ - const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv4_get_dsfield((*pskb)->nh.iph) >> XT_DSCP_SHIFT; - - if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - ipv4_change_dsfield((*pskb)->nh.iph, (__u8)(~XT_DSCP_MASK), - dinfo->dscp << XT_DSCP_SHIFT); - - } - return XT_CONTINUE; -} - -static unsigned int target6(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const struct xt_target *target, - const void *targinfo) -{ - const struct xt_DSCP_info *dinfo = targinfo; - u_int8_t dscp = ipv6_get_dsfield((*pskb)->nh.ipv6h) >> XT_DSCP_SHIFT; - - if (dscp != dinfo->dscp) { - if (!skb_make_writable(pskb, sizeof(struct ipv6hdr))) - return NF_DROP; - - ipv6_change_dsfield((*pskb)->nh.ipv6h, (__u8)(~XT_DSCP_MASK), - dinfo->dscp << XT_DSCP_SHIFT); - } - return XT_CONTINUE; -} - -static int checkentry(const char *tablename, - const void *e_void, - const struct xt_target *target, - void *targinfo, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct xt_DSCP_info *)targinfo)->dscp; - - if ((dscp > XT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - return 1; -} - -static struct xt_target xt_dscp_target[] = { - { - .name = "DSCP", - .family = AF_INET, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_DSCP_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "DSCP", - .family = AF_INET6, - .checkentry = checkentry, - .target = target6, - .targetsize = sizeof(struct xt_DSCP_info), - .table = "mangle", - .me = THIS_MODULE, - }, -}; - -static int __init xt_dscp_target_init(void) -{ - return xt_register_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); -} - -static void __exit xt_dscp_target_fini(void) -{ - xt_unregister_targets(xt_dscp_target, ARRAY_SIZE(xt_dscp_target)); -} - -module_init(xt_dscp_target_init); -module_exit(xt_dscp_target_fini); diff --git a/trunk/net/netfilter/xt_MARK.c b/trunk/net/netfilter/xt_MARK.c index c6e860a7114f..ee9c34edc76c 100644 --- a/trunk/net/netfilter/xt_MARK.c +++ b/trunk/net/netfilter/xt_MARK.c @@ -27,7 +27,8 @@ target_v0(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct xt_mark_target_info *markinfo = targinfo; @@ -43,7 +44,8 @@ target_v1(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct xt_mark_target_info_v1 *markinfo = targinfo; int mark = 0; @@ -74,6 +76,7 @@ checkentry_v0(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info *markinfo = targinfo; @@ -90,6 +93,7 @@ checkentry_v1(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, + unsigned int targinfosize, unsigned int hook_mask) { struct xt_mark_target_info_v1 *markinfo = targinfo; @@ -108,81 +112,65 @@ checkentry_v1(const char *tablename, return 1; } -#ifdef CONFIG_COMPAT -struct compat_xt_mark_target_info_v1 { - compat_ulong_t mark; - u_int8_t mode; - u_int8_t __pad1; - u_int16_t __pad2; +static struct xt_target ipt_mark_reg_v0 = { + .name = "MARK", + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .checkentry = checkentry_v0, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 0, }; -static void compat_from_user_v1(void *dst, void *src) -{ - struct compat_xt_mark_target_info_v1 *cm = src; - struct xt_mark_target_info_v1 m = { - .mark = cm->mark, - .mode = cm->mode, - }; - memcpy(dst, &m, sizeof(m)); -} +static struct xt_target ipt_mark_reg_v1 = { + .name = "MARK", + .target = target_v1, + .targetsize = sizeof(struct xt_mark_target_info_v1), + .table = "mangle", + .checkentry = checkentry_v1, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 1, +}; -static int compat_to_user_v1(void __user *dst, void *src) -{ - struct xt_mark_target_info_v1 *m = src; - struct compat_xt_mark_target_info_v1 cm = { - .mark = m->mark, - .mode = m->mode, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_target xt_mark_target[] = { - { - .name = "MARK", - .family = AF_INET, - .revision = 0, - .checkentry = checkentry_v0, - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .family = AF_INET, - .revision = 1, - .checkentry = checkentry_v1, - .target = target_v1, - .targetsize = sizeof(struct xt_mark_target_info_v1), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_target_info_v1), - .compat_from_user = compat_from_user_v1, - .compat_to_user = compat_to_user_v1, -#endif - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "MARK", - .family = AF_INET6, - .revision = 0, - .checkentry = checkentry_v0, - .target = target_v0, - .targetsize = sizeof(struct xt_mark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, +static struct xt_target ip6t_mark_reg_v0 = { + .name = "MARK", + .target = target_v0, + .targetsize = sizeof(struct xt_mark_target_info), + .table = "mangle", + .checkentry = checkentry_v0, + .me = THIS_MODULE, + .family = AF_INET6, + .revision = 0, }; static int __init xt_mark_init(void) { - return xt_register_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); + int err; + + err = xt_register_target(&ipt_mark_reg_v0); + if (err) + return err; + + err = xt_register_target(&ipt_mark_reg_v1); + if (err) + xt_unregister_target(&ipt_mark_reg_v0); + + err = xt_register_target(&ip6t_mark_reg_v0); + if (err) { + xt_unregister_target(&ipt_mark_reg_v0); + xt_unregister_target(&ipt_mark_reg_v1); + } + + return err; } static void __exit xt_mark_fini(void) { - xt_unregister_targets(xt_mark_target, ARRAY_SIZE(xt_mark_target)); + xt_unregister_target(&ipt_mark_reg_v0); + xt_unregister_target(&ipt_mark_reg_v1); + xt_unregister_target(&ip6t_mark_reg_v0); } module_init(xt_mark_init); diff --git a/trunk/net/netfilter/xt_NFQUEUE.c b/trunk/net/netfilter/xt_NFQUEUE.c index db9b896e57c8..86ccceb61fdd 100644 --- a/trunk/net/netfilter/xt_NFQUEUE.c +++ b/trunk/net/netfilter/xt_NFQUEUE.c @@ -29,46 +29,65 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { const struct xt_NFQ_info *tinfo = targinfo; return NF_QUEUE_NR(tinfo->queuenum); } -static struct xt_target xt_nfqueue_target[] = { - { - .name = "NFQUEUE", - .family = AF_INET, - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .me = THIS_MODULE, - }, - { - .name = "NFQUEUE", - .family = AF_INET6, - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .me = THIS_MODULE, - }, - { - .name = "NFQUEUE", - .family = NF_ARP, - .target = target, - .targetsize = sizeof(struct xt_NFQ_info), - .me = THIS_MODULE, - }, +static struct xt_target ipt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_target ip6t_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static struct xt_target arpt_NFQ_reg = { + .name = "NFQUEUE", + .target = target, + .targetsize = sizeof(struct xt_NFQ_info), + .family = NF_ARP, + .me = THIS_MODULE, }; static int __init xt_nfqueue_init(void) { - return xt_register_targets(xt_nfqueue_target, - ARRAY_SIZE(xt_nfqueue_target)); + int ret; + ret = xt_register_target(&ipt_NFQ_reg); + if (ret) + return ret; + ret = xt_register_target(&ip6t_NFQ_reg); + if (ret) + goto out_ip; + ret = xt_register_target(&arpt_NFQ_reg); + if (ret) + goto out_ip6; + + return ret; +out_ip6: + xt_unregister_target(&ip6t_NFQ_reg); +out_ip: + xt_unregister_target(&ipt_NFQ_reg); + + return ret; } static void __exit xt_nfqueue_fini(void) { - xt_register_targets(xt_nfqueue_target, ARRAY_SIZE(xt_nfqueue_target)); + xt_unregister_target(&arpt_NFQ_reg); + xt_unregister_target(&ip6t_NFQ_reg); + xt_unregister_target(&ipt_NFQ_reg); } module_init(xt_nfqueue_init); diff --git a/trunk/net/netfilter/xt_NOTRACK.c b/trunk/net/netfilter/xt_NOTRACK.c index 6d00dcaed238..98f4b5363ce8 100644 --- a/trunk/net/netfilter/xt_NOTRACK.c +++ b/trunk/net/netfilter/xt_NOTRACK.c @@ -16,7 +16,8 @@ target(struct sk_buff **pskb, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, + void *userinfo) { /* Previously seen (loopback)? Ignore. */ if ((*pskb)->nfct != NULL) @@ -33,32 +34,43 @@ target(struct sk_buff **pskb, return XT_CONTINUE; } -static struct xt_target xt_notrack_target[] = { - { - .name = "NOTRACK", - .family = AF_INET, - .target = target, - .table = "raw", - .me = THIS_MODULE, - }, - { - .name = "NOTRACK", - .family = AF_INET6, - .target = target, - .table = "raw", - .me = THIS_MODULE, - }, +static struct xt_target notrack_reg = { + .name = "NOTRACK", + .target = target, + .targetsize = 0, + .table = "raw", + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_target notrack6_reg = { + .name = "NOTRACK", + .target = target, + .targetsize = 0, + .table = "raw", + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_notrack_init(void) { - return xt_register_targets(xt_notrack_target, - ARRAY_SIZE(xt_notrack_target)); + int ret; + + ret = xt_register_target(¬rack_reg); + if (ret) + return ret; + + ret = xt_register_target(¬rack6_reg); + if (ret) + xt_unregister_target(¬rack_reg); + + return ret; } static void __exit xt_notrack_fini(void) { - xt_unregister_targets(xt_notrack_target, ARRAY_SIZE(xt_notrack_target)); + xt_unregister_target(¬rack6_reg); + xt_unregister_target(¬rack_reg); } module_init(xt_notrack_init); diff --git a/trunk/net/netfilter/xt_SECMARK.c b/trunk/net/netfilter/xt_SECMARK.c index add752196290..de9537ad9a7c 100644 --- a/trunk/net/netfilter/xt_SECMARK.c +++ b/trunk/net/netfilter/xt_SECMARK.c @@ -31,7 +31,7 @@ static u8 mode; static unsigned int target(struct sk_buff **pskb, const struct net_device *in, const struct net_device *out, unsigned int hooknum, const struct xt_target *target, - const void *targinfo) + const void *targinfo, void *userinfo) { u32 secmark = 0; const struct xt_secmark_target_info *info = targinfo; @@ -85,7 +85,7 @@ static int checkentry_selinux(struct xt_secmark_target_info *info) static int checkentry(const char *tablename, const void *entry, const struct xt_target *target, void *targinfo, - unsigned int hook_mask) + unsigned int targinfosize, unsigned int hook_mask) { struct xt_secmark_target_info *info = targinfo; @@ -111,36 +111,47 @@ static int checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_target xt_secmark_target[] = { - { - .name = "SECMARK", - .family = AF_INET, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, - { - .name = "SECMARK", - .family = AF_INET6, - .checkentry = checkentry, - .target = target, - .targetsize = sizeof(struct xt_secmark_target_info), - .table = "mangle", - .me = THIS_MODULE, - }, +static struct xt_target ipt_secmark_reg = { + .name = "SECMARK", + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET, + .revision = 0, +}; + +static struct xt_target ip6t_secmark_reg = { + .name = "SECMARK", + .target = target, + .targetsize = sizeof(struct xt_secmark_target_info), + .table = "mangle", + .checkentry = checkentry, + .me = THIS_MODULE, + .family = AF_INET6, + .revision = 0, }; static int __init xt_secmark_init(void) { - return xt_register_targets(xt_secmark_target, - ARRAY_SIZE(xt_secmark_target)); + int err; + + err = xt_register_target(&ipt_secmark_reg); + if (err) + return err; + + err = xt_register_target(&ip6t_secmark_reg); + if (err) + xt_unregister_target(&ipt_secmark_reg); + + return err; } static void __exit xt_secmark_fini(void) { - xt_unregister_targets(xt_secmark_target, ARRAY_SIZE(xt_secmark_target)); + xt_unregister_target(&ip6t_secmark_reg); + xt_unregister_target(&ipt_secmark_reg); } module_init(xt_secmark_init); diff --git a/trunk/net/netfilter/xt_comment.c b/trunk/net/netfilter/xt_comment.c index 7db492d65220..197609cb06d7 100644 --- a/trunk/net/netfilter/xt_comment.c +++ b/trunk/net/netfilter/xt_comment.c @@ -29,32 +29,41 @@ match(const struct sk_buff *skb, return 1; } -static struct xt_match xt_comment_match[] = { - { - .name = "comment", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, - { - .name = "comment", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_comment_info), - .me = THIS_MODULE - }, +static struct xt_match comment_match = { + .name = "comment", + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .family = AF_INET, + .me = THIS_MODULE +}; + +static struct xt_match comment6_match = { + .name = "comment", + .match = match, + .matchsize = sizeof(struct xt_comment_info), + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_comment_init(void) { - return xt_register_matches(xt_comment_match, - ARRAY_SIZE(xt_comment_match)); + int ret; + + ret = xt_register_match(&comment_match); + if (ret) + return ret; + + ret = xt_register_match(&comment6_match); + if (ret) + xt_unregister_match(&comment_match); + + return ret; } static void __exit xt_comment_fini(void) { - xt_unregister_matches(xt_comment_match, ARRAY_SIZE(xt_comment_match)); + xt_unregister_match(&comment_match); + xt_unregister_match(&comment6_match); } module_init(xt_comment_init); diff --git a/trunk/net/netfilter/xt_connbytes.c b/trunk/net/netfilter/xt_connbytes.c index dcc497ea8183..1396fe2d07c1 100644 --- a/trunk/net/netfilter/xt_connbytes.c +++ b/trunk/net/netfilter/xt_connbytes.c @@ -125,6 +125,7 @@ static int check(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_connbytes_info *sinfo = matchinfo; @@ -142,35 +143,40 @@ static int check(const char *tablename, return 1; } -static struct xt_match xt_connbytes_match[] = { - { - .name = "connbytes", - .family = AF_INET, - .checkentry = check, - .match = match, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, - { - .name = "connbytes", - .family = AF_INET6, - .checkentry = check, - .match = match, - .matchsize = sizeof(struct xt_connbytes_info), - .me = THIS_MODULE - }, +static struct xt_match connbytes_match = { + .name = "connbytes", + .match = match, + .checkentry = check, + .matchsize = sizeof(struct xt_connbytes_info), + .family = AF_INET, + .me = THIS_MODULE +}; +static struct xt_match connbytes6_match = { + .name = "connbytes", + .match = match, + .checkentry = check, + .matchsize = sizeof(struct xt_connbytes_info), + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_connbytes_init(void) { - return xt_register_matches(xt_connbytes_match, - ARRAY_SIZE(xt_connbytes_match)); + int ret; + ret = xt_register_match(&connbytes_match); + if (ret) + return ret; + + ret = xt_register_match(&connbytes6_match); + if (ret) + xt_unregister_match(&connbytes_match); + return ret; } static void __exit xt_connbytes_fini(void) { - xt_unregister_matches(xt_connbytes_match, - ARRAY_SIZE(xt_connbytes_match)); + xt_unregister_match(&connbytes_match); + xt_unregister_match(&connbytes6_match); } module_init(xt_connbytes_init); diff --git a/trunk/net/netfilter/xt_connmark.c b/trunk/net/netfilter/xt_connmark.c index 92a5726ef237..56324c8aff0a 100644 --- a/trunk/net/netfilter/xt_connmark.c +++ b/trunk/net/netfilter/xt_connmark.c @@ -55,6 +55,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { struct xt_connmark_info *cm = matchinfo; @@ -74,80 +75,53 @@ checkentry(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo) +destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -#ifdef CONFIG_COMPAT -struct compat_xt_connmark_info { - compat_ulong_t mark, mask; - u_int8_t invert; - u_int8_t __pad1; - u_int16_t __pad2; +static struct xt_match connmark_match = { + .name = "connmark", + .match = match, + .matchsize = sizeof(struct xt_connmark_info), + .checkentry = checkentry, + .destroy = destroy, + .family = AF_INET, + .me = THIS_MODULE }; -static void compat_from_user(void *dst, void *src) -{ - struct compat_xt_connmark_info *cm = src; - struct xt_connmark_info m = { - .mark = cm->mark, - .mask = cm->mask, - .invert = cm->invert, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int compat_to_user(void __user *dst, void *src) -{ - struct xt_connmark_info *m = src; - struct compat_xt_connmark_info cm = { - .mark = m->mark, - .mask = m->mask, - .invert = m->invert, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_match xt_connmark_match[] = { - { - .name = "connmark", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_connmark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_connmark_info), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, -#endif - .me = THIS_MODULE - }, - { - .name = "connmark", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_connmark_info), - .me = THIS_MODULE - }, +static struct xt_match connmark6_match = { + .name = "connmark", + .match = match, + .matchsize = sizeof(struct xt_connmark_info), + .checkentry = checkentry, + .destroy = destroy, + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_connmark_init(void) { + int ret; + need_conntrack(); - return xt_register_matches(xt_connmark_match, - ARRAY_SIZE(xt_connmark_match)); + + ret = xt_register_match(&connmark_match); + if (ret) + return ret; + + ret = xt_register_match(&connmark6_match); + if (ret) + xt_unregister_match(&connmark_match); + return ret; } static void __exit xt_connmark_fini(void) { - xt_register_matches(xt_connmark_match, ARRAY_SIZE(xt_connmark_match)); + xt_unregister_match(&connmark6_match); + xt_unregister_match(&connmark_match); } module_init(xt_connmark_init); diff --git a/trunk/net/netfilter/xt_conntrack.c b/trunk/net/netfilter/xt_conntrack.c index 0ea501a2fda5..145489a4c3f2 100644 --- a/trunk/net/netfilter/xt_conntrack.c +++ b/trunk/net/netfilter/xt_conntrack.c @@ -45,7 +45,7 @@ match(const struct sk_buff *skb, ct = ip_conntrack_get((struct sk_buff *)skb, &ctinfo); -#define FWINV(bool, invflg) ((bool) ^ !!(sinfo->invflags & invflg)) +#define FWINV(bool,invflg) ((bool) ^ !!(sinfo->invflags & invflg)) if (ct == &ip_conntrack_untracked) statebit = XT_CONNTRACK_STATE_UNTRACKED; @@ -54,72 +54,63 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if (sinfo->flags & XT_CONNTRACK_STATE) { + if(sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) + if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip != + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip) statebit |= XT_CONNTRACK_STATE_SNAT; - if (test_bit(IPS_DST_NAT_BIT, &ct->status)) + + if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip != + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip) statebit |= XT_CONNTRACK_STATE_DNAT; } - if (FWINV((statebit & sinfo->statemask) == 0, - XT_CONNTRACK_STATE)) + + if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) return 0; } - if (ct == NULL) { - if (sinfo->flags & ~XT_CONNTRACK_STATE) + if(sinfo->flags & XT_CONNTRACK_PROTO) { + if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) + return 0; + } + + if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) return 0; - return 1; } - if (sinfo->flags & XT_CONNTRACK_PROTO && - FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, - XT_CONNTRACK_PROTO)) - return 0; - - if (sinfo->flags & XT_CONNTRACK_ORIGSRC && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip & - sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, - XT_CONNTRACK_ORIGSRC)) - return 0; + if(sinfo->flags & XT_CONNTRACK_ORIGDST) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_ORIGDST && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip & - sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, - XT_CONNTRACK_ORIGDST)) - return 0; + if(sinfo->flags & XT_CONNTRACK_REPLSRC) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_REPLSRC && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip & - sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].src.ip, - XT_CONNTRACK_REPLSRC)) - return 0; + if(sinfo->flags & XT_CONNTRACK_REPLDST) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_REPLDST && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip & - sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, - XT_CONNTRACK_REPLDST)) - return 0; + if(sinfo->flags & XT_CONNTRACK_STATUS) { + if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_STATUS && - FWINV((ct->status & sinfo->statusmask) == 0, - XT_CONNTRACK_STATUS)) - return 0; + if(sinfo->flags & XT_CONNTRACK_EXPIRES) { + unsigned long expires; + + if(!ct) + return 0; - if (sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = timer_pending(&ct->timeout) ? - (ct->timeout.expires - jiffies)/HZ : 0; + expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; - if (FWINV(!(expires >= sinfo->expires_min && - expires <= sinfo->expires_max), - XT_CONNTRACK_EXPIRES)) + if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) return 0; } + return 1; } @@ -150,72 +141,63 @@ match(const struct sk_buff *skb, else statebit = XT_CONNTRACK_STATE_INVALID; - if (sinfo->flags & XT_CONNTRACK_STATE) { + if(sinfo->flags & XT_CONNTRACK_STATE) { if (ct) { - if (test_bit(IPS_SRC_NAT_BIT, &ct->status)) + if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip != + ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip) statebit |= XT_CONNTRACK_STATE_SNAT; - if (test_bit(IPS_DST_NAT_BIT, &ct->status)) + + if(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip != + ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip) statebit |= XT_CONNTRACK_STATE_DNAT; } - if (FWINV((statebit & sinfo->statemask) == 0, - XT_CONNTRACK_STATE)) + + if (FWINV((statebit & sinfo->statemask) == 0, XT_CONNTRACK_STATE)) return 0; } - if (ct == NULL) { - if (sinfo->flags & ~XT_CONNTRACK_STATE) - return 0; - return 1; + if(sinfo->flags & XT_CONNTRACK_PROTO) { + if (!ct || FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, XT_CONNTRACK_PROTO)) + return 0; } - if (sinfo->flags & XT_CONNTRACK_PROTO && - FWINV(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.protonum, - XT_CONNTRACK_PROTO)) - return 0; - - if (sinfo->flags & XT_CONNTRACK_ORIGSRC && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip & - sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, - XT_CONNTRACK_ORIGSRC)) - return 0; + if(sinfo->flags & XT_CONNTRACK_ORIGSRC) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].src.ip, XT_CONNTRACK_ORIGSRC)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_ORIGDST && - FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip & - sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != - sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, - XT_CONNTRACK_ORIGDST)) - return 0; + if(sinfo->flags & XT_CONNTRACK_ORIGDST) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_ORIGINAL].s_addr) != sinfo->tuple[IP_CT_DIR_ORIGINAL].dst.ip, XT_CONNTRACK_ORIGDST)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_REPLSRC && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip & - sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].src.ip, - XT_CONNTRACK_REPLSRC)) - return 0; + if(sinfo->flags & XT_CONNTRACK_REPLSRC) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip&sinfo->sipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].src.ip, XT_CONNTRACK_REPLSRC)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_REPLDST && - FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip & - sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != - sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, - XT_CONNTRACK_REPLDST)) - return 0; + if(sinfo->flags & XT_CONNTRACK_REPLDST) { + if (!ct || FWINV((ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip&sinfo->dipmsk[IP_CT_DIR_REPLY].s_addr) != sinfo->tuple[IP_CT_DIR_REPLY].dst.ip, XT_CONNTRACK_REPLDST)) + return 0; + } - if (sinfo->flags & XT_CONNTRACK_STATUS && - FWINV((ct->status & sinfo->statusmask) == 0, - XT_CONNTRACK_STATUS)) - return 0; + if(sinfo->flags & XT_CONNTRACK_STATUS) { + if (!ct || FWINV((ct->status & sinfo->statusmask) == 0, XT_CONNTRACK_STATUS)) + return 0; + } if(sinfo->flags & XT_CONNTRACK_EXPIRES) { - unsigned long expires = timer_pending(&ct->timeout) ? - (ct->timeout.expires - jiffies)/HZ : 0; + unsigned long expires; - if (FWINV(!(expires >= sinfo->expires_min && - expires <= sinfo->expires_max), - XT_CONNTRACK_EXPIRES)) + if(!ct) + return 0; + + expires = timer_pending(&ct->timeout) ? (ct->timeout.expires - jiffies)/HZ : 0; + + if (FWINV(!(expires >= sinfo->expires_min && expires <= sinfo->expires_max), XT_CONNTRACK_EXPIRES)) return 0; } + return 1; } @@ -226,6 +208,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -238,7 +221,8 @@ checkentry(const char *tablename, return 1; } -static void destroy(const struct xt_match *match, void *matchinfo) +static void +destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); @@ -257,8 +241,11 @@ static struct xt_match conntrack_match = { static int __init xt_conntrack_init(void) { + int ret; need_conntrack(); - return xt_register_match(&conntrack_match); + ret = xt_register_match(&conntrack_match); + + return ret; } static void __exit xt_conntrack_fini(void) diff --git a/trunk/net/netfilter/xt_dccp.c b/trunk/net/netfilter/xt_dccp.c index 3e6cf430e518..2e2f825dad4c 100644 --- a/trunk/net/netfilter/xt_dccp.c +++ b/trunk/net/netfilter/xt_dccp.c @@ -131,6 +131,7 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_dccp_info *info = matchinfo; @@ -140,26 +141,27 @@ checkentry(const char *tablename, && !(info->invflags & ~info->flags); } -static struct xt_match xt_dccp_match[] = { - { - .name = "dccp", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .me = THIS_MODULE, - }, - { - .name = "dccp", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_dccp_info), - .proto = IPPROTO_DCCP, - .me = THIS_MODULE, - }, +static struct xt_match dccp_match = +{ + .name = "dccp", + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, }; +static struct xt_match dccp6_match = +{ + .name = "dccp", + .match = match, + .matchsize = sizeof(struct xt_dccp_info), + .proto = IPPROTO_DCCP, + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, +}; + static int __init xt_dccp_init(void) { @@ -171,19 +173,27 @@ static int __init xt_dccp_init(void) dccp_optbuf = kmalloc(256 * 4, GFP_KERNEL); if (!dccp_optbuf) return -ENOMEM; - ret = xt_register_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); + ret = xt_register_match(&dccp_match); if (ret) goto out_kfree; + ret = xt_register_match(&dccp6_match); + if (ret) + goto out_unreg; + return ret; +out_unreg: + xt_unregister_match(&dccp_match); out_kfree: kfree(dccp_optbuf); + return ret; } static void __exit xt_dccp_fini(void) { - xt_unregister_matches(xt_dccp_match, ARRAY_SIZE(xt_dccp_match)); + xt_unregister_match(&dccp6_match); + xt_unregister_match(&dccp_match); kfree(dccp_optbuf); } diff --git a/trunk/net/netfilter/xt_dscp.c b/trunk/net/netfilter/xt_dscp.c deleted file mode 100644 index 26c7f4ad102a..000000000000 --- a/trunk/net/netfilter/xt_dscp.c +++ /dev/null @@ -1,103 +0,0 @@ -/* IP tables module for matching the value of the IPv4/IPv6 DSCP field - * - * xt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include - -#include -#include - -MODULE_AUTHOR("Harald Welte "); -MODULE_DESCRIPTION("x_tables DSCP matching module"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS("ipt_dscp"); -MODULE_ALIAS("ip6t_dscp"); - -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct xt_dscp_info *info = matchinfo; - u_int8_t dscp = ipv4_get_dsfield(skb->nh.iph) >> XT_DSCP_SHIFT; - - return (dscp == info->dscp) ^ !!info->invert; -} - -static int match6(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct xt_dscp_info *info = matchinfo; - u_int8_t dscp = ipv6_get_dsfield(skb->nh.ipv6h) >> XT_DSCP_SHIFT; - - return (dscp == info->dscp) ^ !!info->invert; -} - -static int checkentry(const char *tablename, - const void *info, - const struct xt_match *match, - void *matchinfo, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct xt_dscp_info *)matchinfo)->dscp; - - if (dscp > XT_DSCP_MAX) { - printk(KERN_ERR "xt_dscp: dscp %x out of range\n", dscp); - return 0; - } - - return 1; -} - -static struct xt_match xt_dscp_match[] = { - { - .name = "dscp", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_dscp_info), - .me = THIS_MODULE, - }, - { - .name = "dscp", - .family = AF_INET6, - .checkentry = checkentry, - .match = match6, - .matchsize = sizeof(struct xt_dscp_info), - .me = THIS_MODULE, - }, -}; - -static int __init xt_dscp_match_init(void) -{ - return xt_register_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); -} - -static void __exit xt_dscp_match_fini(void) -{ - xt_unregister_matches(xt_dscp_match, ARRAY_SIZE(xt_dscp_match)); -} - -module_init(xt_dscp_match_init); -module_exit(xt_dscp_match_fini); diff --git a/trunk/net/netfilter/xt_esp.c b/trunk/net/netfilter/xt_esp.c index 7c95f149d942..9dad6281e0c1 100644 --- a/trunk/net/netfilter/xt_esp.c +++ b/trunk/net/netfilter/xt_esp.c @@ -79,6 +79,7 @@ checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, void *matchinfo, + unsigned int matchinfosize, unsigned int hook_mask) { const struct xt_esp *espinfo = matchinfo; @@ -91,35 +92,44 @@ checkentry(const char *tablename, return 1; } -static struct xt_match xt_esp_match[] = { - { - .name = "esp", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_esp), - .proto = IPPROTO_ESP, - .me = THIS_MODULE, - }, - { - .name = "esp", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_esp), - .proto = IPPROTO_ESP, - .me = THIS_MODULE, - }, +static struct xt_match esp_match = { + .name = "esp", + .family = AF_INET, + .proto = IPPROTO_ESP, + .match = &match, + .matchsize = sizeof(struct xt_esp), + .checkentry = &checkentry, + .me = THIS_MODULE, +}; + +static struct xt_match esp6_match = { + .name = "esp", + .family = AF_INET6, + .proto = IPPROTO_ESP, + .match = &match, + .matchsize = sizeof(struct xt_esp), + .checkentry = &checkentry, + .me = THIS_MODULE, }; static int __init xt_esp_init(void) { - return xt_register_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); + int ret; + ret = xt_register_match(&esp_match); + if (ret) + return ret; + + ret = xt_register_match(&esp6_match); + if (ret) + xt_unregister_match(&esp_match); + + return ret; } static void __exit xt_esp_cleanup(void) { - xt_unregister_matches(xt_esp_match, ARRAY_SIZE(xt_esp_match)); + xt_unregister_match(&esp_match); + xt_unregister_match(&esp6_match); } module_init(xt_esp_init); diff --git a/trunk/net/netfilter/xt_helper.c b/trunk/net/netfilter/xt_helper.c index 5d7818b73e3a..799c2a43e3b9 100644 --- a/trunk/net/netfilter/xt_helper.c +++ b/trunk/net/netfilter/xt_helper.c @@ -139,6 +139,7 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { struct xt_helper_info *info = matchinfo; @@ -155,44 +156,52 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo) +destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -static struct xt_match xt_helper_match[] = { - { - .name = "helper", - .family = AF_INET, - .checkentry = check, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, - { - .name = "helper", - .family = AF_INET6, - .checkentry = check, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_helper_info), - .me = THIS_MODULE, - }, +static struct xt_match helper_match = { + .name = "helper", + .match = match, + .matchsize = sizeof(struct xt_helper_info), + .checkentry = check, + .destroy = destroy, + .family = AF_INET, + .me = THIS_MODULE, +}; +static struct xt_match helper6_match = { + .name = "helper", + .match = match, + .matchsize = sizeof(struct xt_helper_info), + .checkentry = check, + .destroy = destroy, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_helper_init(void) { + int ret; need_conntrack(); - return xt_register_matches(xt_helper_match, - ARRAY_SIZE(xt_helper_match)); + + ret = xt_register_match(&helper_match); + if (ret < 0) + return ret; + + ret = xt_register_match(&helper6_match); + if (ret < 0) + xt_unregister_match(&helper_match); + + return ret; } static void __exit xt_helper_fini(void) { - xt_unregister_matches(xt_helper_match, ARRAY_SIZE(xt_helper_match)); + xt_unregister_match(&helper_match); + xt_unregister_match(&helper6_match); } module_init(xt_helper_init); diff --git a/trunk/net/netfilter/xt_length.c b/trunk/net/netfilter/xt_length.c index 67fd30d9f303..109132c9a146 100644 --- a/trunk/net/netfilter/xt_length.c +++ b/trunk/net/netfilter/xt_length.c @@ -52,32 +52,39 @@ match6(const struct sk_buff *skb, return (pktlen >= info->min && pktlen <= info->max) ^ info->invert; } -static struct xt_match xt_length_match[] = { - { - .name = "length", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_length_info), - .me = THIS_MODULE, - }, - { - .name = "length", - .family = AF_INET6, - .match = match6, - .matchsize = sizeof(struct xt_length_info), - .me = THIS_MODULE, - }, +static struct xt_match length_match = { + .name = "length", + .match = match, + .matchsize = sizeof(struct xt_length_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match length6_match = { + .name = "length", + .match = match6, + .matchsize = sizeof(struct xt_length_info), + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_length_init(void) { - return xt_register_matches(xt_length_match, - ARRAY_SIZE(xt_length_match)); + int ret; + ret = xt_register_match(&length_match); + if (ret) + return ret; + ret = xt_register_match(&length6_match); + if (ret) + xt_unregister_match(&length_match); + + return ret; } static void __exit xt_length_fini(void) { - xt_unregister_matches(xt_length_match, ARRAY_SIZE(xt_length_match)); + xt_unregister_match(&length_match); + xt_unregister_match(&length6_match); } module_init(xt_length_init); diff --git a/trunk/net/netfilter/xt_limit.c b/trunk/net/netfilter/xt_limit.c index fda7b7dec27d..ce7fdb7e4e07 100644 --- a/trunk/net/netfilter/xt_limit.c +++ b/trunk/net/netfilter/xt_limit.c @@ -110,6 +110,7 @@ ipt_limit_checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { struct xt_rateinfo *r = matchinfo; @@ -122,95 +123,55 @@ ipt_limit_checkentry(const char *tablename, return 0; } + /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * + 128. */ + r->prev = jiffies; + r->credit = user2credits(r->avg * r->burst); /* Credits full. */ + r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ + r->cost = user2credits(r->avg); + /* For SMP, we only want to use one set of counters. */ r->master = r; - if (r->cost == 0) { - /* User avg in seconds * XT_LIMIT_SCALE: convert to jiffies * - 128. */ - r->prev = jiffies; - r->credit = user2credits(r->avg * r->burst); /* Credits full. */ - r->credit_cap = user2credits(r->avg * r->burst); /* Credits full. */ - r->cost = user2credits(r->avg); - } + return 1; } -#ifdef CONFIG_COMPAT -struct compat_xt_rateinfo { - u_int32_t avg; - u_int32_t burst; - - compat_ulong_t prev; - u_int32_t credit; - u_int32_t credit_cap, cost; - - u_int32_t master; +static struct xt_match ipt_limit_reg = { + .name = "limit", + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), + .checkentry = ipt_limit_checkentry, + .family = AF_INET, + .me = THIS_MODULE, }; - -/* To keep the full "prev" timestamp, the upper 32 bits are stored in the - * master pointer, which does not need to be preserved. */ -static void compat_from_user(void *dst, void *src) -{ - struct compat_xt_rateinfo *cm = src; - struct xt_rateinfo m = { - .avg = cm->avg, - .burst = cm->burst, - .prev = cm->prev | (unsigned long)cm->master << 32, - .credit = cm->credit, - .credit_cap = cm->credit_cap, - .cost = cm->cost, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int compat_to_user(void __user *dst, void *src) -{ - struct xt_rateinfo *m = src; - struct compat_xt_rateinfo cm = { - .avg = m->avg, - .burst = m->burst, - .prev = m->prev, - .credit = m->credit, - .credit_cap = m->credit_cap, - .cost = m->cost, - .master = m->prev >> 32, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_match xt_limit_match[] = { - { - .name = "limit", - .family = AF_INET, - .checkentry = ipt_limit_checkentry, - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_rateinfo), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, -#endif - .me = THIS_MODULE, - }, - { - .name = "limit", - .family = AF_INET6, - .checkentry = ipt_limit_checkentry, - .match = ipt_limit_match, - .matchsize = sizeof(struct xt_rateinfo), - .me = THIS_MODULE, - }, +static struct xt_match limit6_reg = { + .name = "limit", + .match = ipt_limit_match, + .matchsize = sizeof(struct xt_rateinfo), + .checkentry = ipt_limit_checkentry, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_limit_init(void) { - return xt_register_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); + int ret; + + ret = xt_register_match(&ipt_limit_reg); + if (ret) + return ret; + + ret = xt_register_match(&limit6_reg); + if (ret) + xt_unregister_match(&ipt_limit_reg); + + return ret; } static void __exit xt_limit_fini(void) { - xt_unregister_matches(xt_limit_match, ARRAY_SIZE(xt_limit_match)); + xt_unregister_match(&ipt_limit_reg); + xt_unregister_match(&limit6_reg); } module_init(xt_limit_init); diff --git a/trunk/net/netfilter/xt_mac.c b/trunk/net/netfilter/xt_mac.c index 425fc21e31f5..356290ffe386 100644 --- a/trunk/net/netfilter/xt_mac.c +++ b/trunk/net/netfilter/xt_mac.c @@ -43,37 +43,43 @@ match(const struct sk_buff *skb, ^ info->invert)); } -static struct xt_match xt_mac_match[] = { - { - .name = "mac", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | - (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .me = THIS_MODULE, - }, - { - .name = "mac", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_mac_info), - .hooks = (1 << NF_IP_PRE_ROUTING) | - (1 << NF_IP_LOCAL_IN) | - (1 << NF_IP_FORWARD), - .me = THIS_MODULE, - }, +static struct xt_match mac_match = { + .name = "mac", + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .family = AF_INET, + .me = THIS_MODULE, +}; +static struct xt_match mac6_match = { + .name = "mac", + .match = match, + .matchsize = sizeof(struct xt_mac_info), + .hooks = (1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_IN) | + (1 << NF_IP_FORWARD), + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_mac_init(void) { - return xt_register_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); + int ret; + ret = xt_register_match(&mac_match); + if (ret) + return ret; + + ret = xt_register_match(&mac6_match); + if (ret) + xt_unregister_match(&mac_match); + + return ret; } static void __exit xt_mac_fini(void) { - xt_unregister_matches(xt_mac_match, ARRAY_SIZE(xt_mac_match)); + xt_unregister_match(&mac_match); + xt_unregister_match(&mac6_match); } module_init(xt_mac_init); diff --git a/trunk/net/netfilter/xt_mark.c b/trunk/net/netfilter/xt_mark.c index 934dddfbcd23..876bc5797738 100644 --- a/trunk/net/netfilter/xt_mark.c +++ b/trunk/net/netfilter/xt_mark.c @@ -39,6 +39,7 @@ checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_mark_info *minfo = matchinfo; @@ -50,69 +51,42 @@ checkentry(const char *tablename, return 1; } -#ifdef CONFIG_COMPAT -struct compat_xt_mark_info { - compat_ulong_t mark, mask; - u_int8_t invert; - u_int8_t __pad1; - u_int16_t __pad2; +static struct xt_match mark_match = { + .name = "mark", + .match = match, + .matchsize = sizeof(struct xt_mark_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, }; -static void compat_from_user(void *dst, void *src) -{ - struct compat_xt_mark_info *cm = src; - struct xt_mark_info m = { - .mark = cm->mark, - .mask = cm->mask, - .invert = cm->invert, - }; - memcpy(dst, &m, sizeof(m)); -} - -static int compat_to_user(void __user *dst, void *src) -{ - struct xt_mark_info *m = src; - struct compat_xt_mark_info cm = { - .mark = m->mark, - .mask = m->mask, - .invert = m->invert, - }; - return copy_to_user(dst, &cm, sizeof(cm)) ? -EFAULT : 0; -} -#endif /* CONFIG_COMPAT */ - -static struct xt_match xt_mark_match[] = { - { - .name = "mark", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_mark_info), -#ifdef CONFIG_COMPAT - .compatsize = sizeof(struct compat_xt_mark_info), - .compat_from_user = compat_from_user, - .compat_to_user = compat_to_user, -#endif - .me = THIS_MODULE, - }, - { - .name = "mark", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_mark_info), - .me = THIS_MODULE, - }, +static struct xt_match mark6_match = { + .name = "mark", + .match = match, + .matchsize = sizeof(struct xt_mark_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_mark_init(void) { - return xt_register_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); + int ret; + ret = xt_register_match(&mark_match); + if (ret) + return ret; + + ret = xt_register_match(&mark6_match); + if (ret) + xt_unregister_match(&mark_match); + + return ret; } static void __exit xt_mark_fini(void) { - xt_unregister_matches(xt_mark_match, ARRAY_SIZE(xt_mark_match)); + xt_unregister_match(&mark_match); + xt_unregister_match(&mark6_match); } module_init(xt_mark_init); diff --git a/trunk/net/netfilter/xt_multiport.c b/trunk/net/netfilter/xt_multiport.c index d3aefd380930..1ff0a25396e7 100644 --- a/trunk/net/netfilter/xt_multiport.c +++ b/trunk/net/netfilter/xt_multiport.c @@ -176,6 +176,7 @@ checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -190,6 +191,7 @@ checkentry_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ipt_ip *ip = info; @@ -204,6 +206,7 @@ checkentry6(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -218,6 +221,7 @@ checkentry6_v1(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct ip6t_ip6 *ip = info; @@ -227,55 +231,84 @@ checkentry6_v1(const char *tablename, multiinfo->count); } -static struct xt_match xt_multiport_match[] = { - { - .name = "multiport", - .family = AF_INET, - .revision = 0, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_multiport), - .me = THIS_MODULE, - }, - { - .name = "multiport", - .family = AF_INET, - .revision = 1, - .checkentry = checkentry_v1, - .match = match_v1, - .matchsize = sizeof(struct xt_multiport_v1), - .me = THIS_MODULE, - }, - { - .name = "multiport", - .family = AF_INET6, - .revision = 0, - .checkentry = checkentry6, - .match = match, - .matchsize = sizeof(struct xt_multiport), - .me = THIS_MODULE, - }, - { - .name = "multiport", - .family = AF_INET6, - .revision = 1, - .checkentry = checkentry6_v1, - .match = match_v1, - .matchsize = sizeof(struct xt_multiport_v1), - .me = THIS_MODULE, - }, +static struct xt_match multiport_match = { + .name = "multiport", + .revision = 0, + .matchsize = sizeof(struct xt_multiport), + .match = &match, + .checkentry = &checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match multiport_match_v1 = { + .name = "multiport", + .revision = 1, + .matchsize = sizeof(struct xt_multiport_v1), + .match = &match_v1, + .checkentry = &checkentry_v1, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match multiport6_match = { + .name = "multiport", + .revision = 0, + .matchsize = sizeof(struct xt_multiport), + .match = &match, + .checkentry = &checkentry6, + .family = AF_INET6, + .me = THIS_MODULE, +}; + +static struct xt_match multiport6_match_v1 = { + .name = "multiport", + .revision = 1, + .matchsize = sizeof(struct xt_multiport_v1), + .match = &match_v1, + .checkentry = &checkentry6_v1, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_multiport_init(void) { - return xt_register_matches(xt_multiport_match, - ARRAY_SIZE(xt_multiport_match)); + int ret; + + ret = xt_register_match(&multiport_match); + if (ret) + goto out; + + ret = xt_register_match(&multiport_match_v1); + if (ret) + goto out_unreg_multi_v0; + + ret = xt_register_match(&multiport6_match); + if (ret) + goto out_unreg_multi_v1; + + ret = xt_register_match(&multiport6_match_v1); + if (ret) + goto out_unreg_multi6_v0; + + return ret; + +out_unreg_multi6_v0: + xt_unregister_match(&multiport6_match); +out_unreg_multi_v1: + xt_unregister_match(&multiport_match_v1); +out_unreg_multi_v0: + xt_unregister_match(&multiport_match); +out: + return ret; } static void __exit xt_multiport_fini(void) { - xt_unregister_matches(xt_multiport_match, - ARRAY_SIZE(xt_multiport_match)); + xt_unregister_match(&multiport_match); + xt_unregister_match(&multiport_match_v1); + xt_unregister_match(&multiport6_match); + xt_unregister_match(&multiport6_match_v1); } module_init(xt_multiport_init); diff --git a/trunk/net/netfilter/xt_physdev.c b/trunk/net/netfilter/xt_physdev.c index fd8f954cded5..63a965467465 100644 --- a/trunk/net/netfilter/xt_physdev.c +++ b/trunk/net/netfilter/xt_physdev.c @@ -106,6 +106,7 @@ checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_physdev_info *info = matchinfo; @@ -131,34 +132,43 @@ checkentry(const char *tablename, return 1; } -static struct xt_match xt_physdev_match[] = { - { - .name = "physdev", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, - { - .name = "physdev", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_physdev_info), - .me = THIS_MODULE, - }, +static struct xt_match physdev_match = { + .name = "physdev", + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match physdev6_match = { + .name = "physdev", + .match = match, + .matchsize = sizeof(struct xt_physdev_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_physdev_init(void) { - return xt_register_matches(xt_physdev_match, - ARRAY_SIZE(xt_physdev_match)); + int ret; + + ret = xt_register_match(&physdev_match); + if (ret < 0) + return ret; + + ret = xt_register_match(&physdev6_match); + if (ret < 0) + xt_unregister_match(&physdev_match); + + return ret; } static void __exit xt_physdev_fini(void) { - xt_unregister_matches(xt_physdev_match, ARRAY_SIZE(xt_physdev_match)); + xt_unregister_match(&physdev_match); + xt_unregister_match(&physdev6_match); } module_init(xt_physdev_init); diff --git a/trunk/net/netfilter/xt_pkttype.c b/trunk/net/netfilter/xt_pkttype.c index 16e7b0804287..d2f5320a80bf 100644 --- a/trunk/net/netfilter/xt_pkttype.c +++ b/trunk/net/netfilter/xt_pkttype.c @@ -43,32 +43,40 @@ static int match(const struct sk_buff *skb, return (type == info->pkttype) ^ info->invert; } -static struct xt_match xt_pkttype_match[] = { - { - .name = "pkttype", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, - { - .name = "pkttype", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_pkttype_info), - .me = THIS_MODULE, - }, +static struct xt_match pkttype_match = { + .name = "pkttype", + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match pkttype6_match = { + .name = "pkttype", + .match = match, + .matchsize = sizeof(struct xt_pkttype_info), + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_pkttype_init(void) { - return xt_register_matches(xt_pkttype_match, - ARRAY_SIZE(xt_pkttype_match)); + int ret; + ret = xt_register_match(&pkttype_match); + if (ret) + return ret; + + ret = xt_register_match(&pkttype6_match); + if (ret) + xt_unregister_match(&pkttype_match); + + return ret; } static void __exit xt_pkttype_fini(void) { - xt_unregister_matches(xt_pkttype_match, ARRAY_SIZE(xt_pkttype_match)); + xt_unregister_match(&pkttype_match); + xt_unregister_match(&pkttype6_match); } module_init(xt_pkttype_init); diff --git a/trunk/net/netfilter/xt_policy.c b/trunk/net/netfilter/xt_policy.c index 46bde2b1e1e0..ba1ca03abad3 100644 --- a/trunk/net/netfilter/xt_policy.c +++ b/trunk/net/netfilter/xt_policy.c @@ -135,7 +135,8 @@ static int match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *ip_void, const struct xt_match *match, - void *matchinfo, unsigned int hook_mask) + void *matchinfo, unsigned int matchsize, + unsigned int hook_mask) { struct xt_policy_info *info = matchinfo; @@ -164,34 +165,43 @@ static int checkentry(const char *tablename, const void *ip_void, return 1; } -static struct xt_match xt_policy_match[] = { - { - .name = "policy", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .me = THIS_MODULE, - }, - { - .name = "policy", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_policy_info), - .me = THIS_MODULE, - }, +static struct xt_match policy_match = { + .name = "policy", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match policy6_match = { + .name = "policy", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_policy_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init init(void) { - return xt_register_matches(xt_policy_match, - ARRAY_SIZE(xt_policy_match)); + int ret; + + ret = xt_register_match(&policy_match); + if (ret) + return ret; + ret = xt_register_match(&policy6_match); + if (ret) + xt_unregister_match(&policy_match); + return ret; } static void __exit fini(void) { - xt_unregister_matches(xt_policy_match, ARRAY_SIZE(xt_policy_match)); + xt_unregister_match(&policy6_match); + xt_unregister_match(&policy_match); } module_init(init); diff --git a/trunk/net/netfilter/xt_quota.c b/trunk/net/netfilter/xt_quota.c index b75fa2c70e66..be8d3c26b568 100644 --- a/trunk/net/netfilter/xt_quota.c +++ b/trunk/net/netfilter/xt_quota.c @@ -41,7 +41,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) + unsigned int matchsize, unsigned int hook_mask) { struct xt_quota_info *q = (struct xt_quota_info *)matchinfo; @@ -52,33 +52,46 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match xt_quota_match[] = { - { - .name = "quota", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, - { - .name = "quota", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_quota_info), - .me = THIS_MODULE - }, +static struct xt_match quota_match = { + .name = "quota", + .family = AF_INET, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .checkentry = checkentry, + .me = THIS_MODULE +}; + +static struct xt_match quota_match6 = { + .name = "quota", + .family = AF_INET6, + .match = match, + .matchsize = sizeof(struct xt_quota_info), + .checkentry = checkentry, + .me = THIS_MODULE }; static int __init xt_quota_init(void) { - return xt_register_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); + int ret; + + ret = xt_register_match("a_match); + if (ret) + goto err1; + ret = xt_register_match("a_match6); + if (ret) + goto err2; + return ret; + +err2: + xt_unregister_match("a_match); +err1: + return ret; } static void __exit xt_quota_fini(void) { - xt_unregister_matches(xt_quota_match, ARRAY_SIZE(xt_quota_match)); + xt_unregister_match("a_match6); + xt_unregister_match("a_match); } module_init(xt_quota_init); diff --git a/trunk/net/netfilter/xt_sctp.c b/trunk/net/netfilter/xt_sctp.c index 7956acaaa24b..843383e01d41 100644 --- a/trunk/net/netfilter/xt_sctp.c +++ b/trunk/net/netfilter/xt_sctp.c @@ -163,6 +163,7 @@ checkentry(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_sctp_info *info = matchinfo; @@ -177,35 +178,44 @@ checkentry(const char *tablename, | SCTP_CHUNK_MATCH_ONLY))); } -static struct xt_match xt_sctp_match[] = { - { - .name = "sctp", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .me = THIS_MODULE - }, - { - .name = "sctp", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_sctp_info), - .proto = IPPROTO_SCTP, - .me = THIS_MODULE - }, +static struct xt_match sctp_match = { + .name = "sctp", + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE +}; + +static struct xt_match sctp6_match = { + .name = "sctp", + .match = match, + .matchsize = sizeof(struct xt_sctp_info), + .proto = IPPROTO_SCTP, + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_sctp_init(void) { - return xt_register_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); + int ret; + ret = xt_register_match(&sctp_match); + if (ret) + return ret; + + ret = xt_register_match(&sctp6_match); + if (ret) + xt_unregister_match(&sctp_match); + + return ret; } static void __exit xt_sctp_fini(void) { - xt_unregister_matches(xt_sctp_match, ARRAY_SIZE(xt_sctp_match)); + xt_unregister_match(&sctp6_match); + xt_unregister_match(&sctp_match); } module_init(xt_sctp_init); diff --git a/trunk/net/netfilter/xt_state.c b/trunk/net/netfilter/xt_state.c index d9010b16a1f9..f9e304dc4504 100644 --- a/trunk/net/netfilter/xt_state.c +++ b/trunk/net/netfilter/xt_state.c @@ -48,6 +48,7 @@ static int check(const char *tablename, const void *inf, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) @@ -61,43 +62,54 @@ static int check(const char *tablename, } static void -destroy(const struct xt_match *match, void *matchinfo) +destroy(const struct xt_match *match, void *matchinfo, unsigned int matchsize) { #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) nf_ct_l3proto_module_put(match->family); #endif } -static struct xt_match xt_state_match[] = { - { - .name = "state", - .family = AF_INET, - .checkentry = check, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .me = THIS_MODULE, - }, - { - .name = "state", - .family = AF_INET6, - .checkentry = check, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_state_info), - .me = THIS_MODULE, - }, +static struct xt_match state_match = { + .name = "state", + .match = match, + .checkentry = check, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match state6_match = { + .name = "state", + .match = match, + .checkentry = check, + .destroy = destroy, + .matchsize = sizeof(struct xt_state_info), + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_state_init(void) { + int ret; + need_conntrack(); - return xt_register_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); + + ret = xt_register_match(&state_match); + if (ret < 0) + return ret; + + ret = xt_register_match(&state6_match); + if (ret < 0) + xt_unregister_match(&state_match); + + return ret; } static void __exit xt_state_fini(void) { - xt_unregister_matches(xt_state_match, ARRAY_SIZE(xt_state_match)); + xt_unregister_match(&state_match); + xt_unregister_match(&state6_match); } module_init(xt_state_init); diff --git a/trunk/net/netfilter/xt_statistic.c b/trunk/net/netfilter/xt_statistic.c index 091a9f89f5d5..de1037f58596 100644 --- a/trunk/net/netfilter/xt_statistic.c +++ b/trunk/net/netfilter/xt_statistic.c @@ -55,7 +55,7 @@ match(const struct sk_buff *skb, static int checkentry(const char *tablename, const void *entry, const struct xt_match *match, void *matchinfo, - unsigned int hook_mask) + unsigned int matchsize, unsigned int hook_mask) { struct xt_statistic_info *info = (struct xt_statistic_info *)matchinfo; @@ -66,35 +66,46 @@ checkentry(const char *tablename, const void *entry, return 1; } -static struct xt_match xt_statistic_match[] = { - { - .name = "statistic", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, - { - .name = "statistic", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .matchsize = sizeof(struct xt_statistic_info), - .me = THIS_MODULE, - }, +static struct xt_match statistic_match = { + .name = "statistic", + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .checkentry = checkentry, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match statistic_match6 = { + .name = "statistic", + .match = match, + .matchsize = sizeof(struct xt_statistic_info), + .checkentry = checkentry, + .family = AF_INET6, + .me = THIS_MODULE, }; static int __init xt_statistic_init(void) { - return xt_register_matches(xt_statistic_match, - ARRAY_SIZE(xt_statistic_match)); + int ret; + + ret = xt_register_match(&statistic_match); + if (ret) + goto err1; + + ret = xt_register_match(&statistic_match6); + if (ret) + goto err2; + return ret; +err2: + xt_unregister_match(&statistic_match); +err1: + return ret; } static void __exit xt_statistic_fini(void) { - xt_unregister_matches(xt_statistic_match, - ARRAY_SIZE(xt_statistic_match)); + xt_unregister_match(&statistic_match6); + xt_unregister_match(&statistic_match); } module_init(xt_statistic_init); diff --git a/trunk/net/netfilter/xt_string.c b/trunk/net/netfilter/xt_string.c index 4453252400aa..275330fcdaaa 100644 --- a/trunk/net/netfilter/xt_string.c +++ b/trunk/net/netfilter/xt_string.c @@ -46,6 +46,7 @@ static int checkentry(const char *tablename, const void *ip, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { struct xt_string_info *conf = matchinfo; @@ -68,40 +69,49 @@ static int checkentry(const char *tablename, return 1; } -static void destroy(const struct xt_match *match, void *matchinfo) +static void destroy(const struct xt_match *match, void *matchinfo, + unsigned int matchsize) { textsearch_destroy(STRING_TEXT_PRIV(matchinfo)->config); } -static struct xt_match xt_string_match[] = { - { - .name = "string", - .family = AF_INET, - .checkentry = checkentry, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_string_info), - .me = THIS_MODULE - }, - { - .name = "string", - .family = AF_INET6, - .checkentry = checkentry, - .match = match, - .destroy = destroy, - .matchsize = sizeof(struct xt_string_info), - .me = THIS_MODULE - }, +static struct xt_match string_match = { + .name = "string", + .match = match, + .matchsize = sizeof(struct xt_string_info), + .checkentry = checkentry, + .destroy = destroy, + .family = AF_INET, + .me = THIS_MODULE +}; +static struct xt_match string6_match = { + .name = "string", + .match = match, + .matchsize = sizeof(struct xt_string_info), + .checkentry = checkentry, + .destroy = destroy, + .family = AF_INET6, + .me = THIS_MODULE }; static int __init xt_string_init(void) { - return xt_register_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); + int ret; + + ret = xt_register_match(&string_match); + if (ret) + return ret; + ret = xt_register_match(&string6_match); + if (ret) + xt_unregister_match(&string_match); + + return ret; } static void __exit xt_string_fini(void) { - xt_unregister_matches(xt_string_match, ARRAY_SIZE(xt_string_match)); + xt_unregister_match(&string_match); + xt_unregister_match(&string6_match); } module_init(xt_string_init); diff --git a/trunk/net/netfilter/xt_tcpmss.c b/trunk/net/netfilter/xt_tcpmss.c index a3682fe2f192..cf7d335cadcd 100644 --- a/trunk/net/netfilter/xt_tcpmss.c +++ b/trunk/net/netfilter/xt_tcpmss.c @@ -18,22 +18,21 @@ #include #include +#define TH_SYN 0x02 + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Marc Boucher "); MODULE_DESCRIPTION("iptables TCP MSS match module"); MODULE_ALIAS("ipt_tcpmss"); -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct xt_match *match, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +/* Returns 1 if the mss option is set and matched by the range, 0 otherwise */ +static inline int +mssoption_match(u_int16_t min, u_int16_t max, + const struct sk_buff *skb, + unsigned int protoff, + int invert, + int *hotdrop) { - const struct xt_tcpmss_match_info *info = matchinfo; struct tcphdr _tcph, *th; /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */ u8 _opt[15 * 4 - sizeof(_tcph)], *op; @@ -65,50 +64,72 @@ match(const struct sk_buff *skb, mssval = (op[i+2] << 8) | op[i+3]; - return (mssval >= info->mss_min && - mssval <= info->mss_max) ^ info->invert; + return (mssval >= min && mssval <= max) ^ invert; } - if (op[i] < 2) - i++; - else - i += op[i+1] ? : 1; + if (op[i] < 2) i++; + else i += op[i+1]?:1; } out: - return info->invert; + return invert; -dropit: + dropit: *hotdrop = 1; return 0; } -static struct xt_match xt_tcpmss_match[] = { - { - .name = "tcpmss", - .family = AF_INET, - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .me = THIS_MODULE, - }, - { - .name = "tcpmss", - .family = AF_INET6, - .match = match, - .matchsize = sizeof(struct xt_tcpmss_match_info), - .proto = IPPROTO_TCP, - .me = THIS_MODULE, - }, +static int +match(const struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + const struct xt_match *match, + const void *matchinfo, + int offset, + unsigned int protoff, + int *hotdrop) +{ + const struct xt_tcpmss_match_info *info = matchinfo; + + return mssoption_match(info->mss_min, info->mss_max, skb, protoff, + info->invert, hotdrop); +} + +static struct xt_match tcpmss_match = { + .name = "tcpmss", + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .family = AF_INET, + .me = THIS_MODULE, +}; + +static struct xt_match tcpmss6_match = { + .name = "tcpmss", + .match = match, + .matchsize = sizeof(struct xt_tcpmss_match_info), + .proto = IPPROTO_TCP, + .family = AF_INET6, + .me = THIS_MODULE, }; + static int __init xt_tcpmss_init(void) { - return xt_register_matches(xt_tcpmss_match, - ARRAY_SIZE(xt_tcpmss_match)); + int ret; + ret = xt_register_match(&tcpmss_match); + if (ret) + return ret; + + ret = xt_register_match(&tcpmss6_match); + if (ret) + xt_unregister_match(&tcpmss_match); + + return ret; } static void __exit xt_tcpmss_fini(void) { - xt_unregister_matches(xt_tcpmss_match, ARRAY_SIZE(xt_tcpmss_match)); + xt_unregister_match(&tcpmss6_match); + xt_unregister_match(&tcpmss_match); } module_init(xt_tcpmss_init); diff --git a/trunk/net/netfilter/xt_tcpudp.c b/trunk/net/netfilter/xt_tcpudp.c index e76a68e0bc66..a9a63aa68936 100644 --- a/trunk/net/netfilter/xt_tcpudp.c +++ b/trunk/net/netfilter/xt_tcpudp.c @@ -141,6 +141,7 @@ tcp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *tcpinfo = matchinfo; @@ -189,6 +190,7 @@ udp_checkentry(const char *tablename, const void *info, const struct xt_match *match, void *matchinfo, + unsigned int matchsize, unsigned int hook_mask) { const struct xt_tcp *udpinfo = matchinfo; @@ -197,54 +199,81 @@ udp_checkentry(const char *tablename, return !(udpinfo->invflags & ~XT_UDP_INV_MASK); } -static struct xt_match xt_tcpudp_match[] = { - { - .name = "tcp", - .family = AF_INET, - .checkentry = tcp_checkentry, - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .me = THIS_MODULE, - }, - { - .name = "tcp", - .family = AF_INET6, - .checkentry = tcp_checkentry, - .match = tcp_match, - .matchsize = sizeof(struct xt_tcp), - .proto = IPPROTO_TCP, - .me = THIS_MODULE, - }, - { - .name = "udp", - .family = AF_INET, - .checkentry = udp_checkentry, - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .me = THIS_MODULE, - }, - { - .name = "udp", - .family = AF_INET6, - .checkentry = udp_checkentry, - .match = udp_match, - .matchsize = sizeof(struct xt_udp), - .proto = IPPROTO_UDP, - .me = THIS_MODULE, - }, +static struct xt_match tcp_matchstruct = { + .name = "tcp", + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .family = AF_INET, + .checkentry = tcp_checkentry, + .me = THIS_MODULE, +}; + +static struct xt_match tcp6_matchstruct = { + .name = "tcp", + .match = tcp_match, + .matchsize = sizeof(struct xt_tcp), + .proto = IPPROTO_TCP, + .family = AF_INET6, + .checkentry = tcp_checkentry, + .me = THIS_MODULE, +}; + +static struct xt_match udp_matchstruct = { + .name = "udp", + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .family = AF_INET, + .checkentry = udp_checkentry, + .me = THIS_MODULE, +}; +static struct xt_match udp6_matchstruct = { + .name = "udp", + .match = udp_match, + .matchsize = sizeof(struct xt_udp), + .proto = IPPROTO_UDP, + .family = AF_INET6, + .checkentry = udp_checkentry, + .me = THIS_MODULE, }; static int __init xt_tcpudp_init(void) { - return xt_register_matches(xt_tcpudp_match, - ARRAY_SIZE(xt_tcpudp_match)); + int ret; + ret = xt_register_match(&tcp_matchstruct); + if (ret) + return ret; + + ret = xt_register_match(&tcp6_matchstruct); + if (ret) + goto out_unreg_tcp; + + ret = xt_register_match(&udp_matchstruct); + if (ret) + goto out_unreg_tcp6; + + ret = xt_register_match(&udp6_matchstruct); + if (ret) + goto out_unreg_udp; + + return ret; + +out_unreg_udp: + xt_unregister_match(&udp_matchstruct); +out_unreg_tcp6: + xt_unregister_match(&tcp6_matchstruct); +out_unreg_tcp: + xt_unregister_match(&tcp_matchstruct); + return ret; } static void __exit xt_tcpudp_fini(void) { - xt_unregister_matches(xt_tcpudp_match, ARRAY_SIZE(xt_tcpudp_match)); + xt_unregister_match(&udp6_matchstruct); + xt_unregister_match(&udp_matchstruct); + xt_unregister_match(&tcp6_matchstruct); + xt_unregister_match(&tcp_matchstruct); } module_init(xt_tcpudp_init); diff --git a/trunk/net/netlabel/Kconfig b/trunk/net/netlabel/Kconfig deleted file mode 100644 index fe23cb7f1e87..000000000000 --- a/trunk/net/netlabel/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -# -# NetLabel configuration -# - -config NETLABEL - bool "NetLabel subsystem support" - depends on NET && SECURITY - default n - ---help--- - NetLabel provides support for explicit network packet labeling - protocols such as CIPSO and RIPSO. For more information see - Documentation/netlabel. - - If you are unsure, say N. diff --git a/trunk/net/netlabel/Makefile b/trunk/net/netlabel/Makefile deleted file mode 100644 index 8af18c0a47d9..000000000000 --- a/trunk/net/netlabel/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -# -# Makefile for the NetLabel subsystem. -# -# Feb 9, 2006, Paul Moore -# - -# base objects -obj-y := netlabel_user.o netlabel_kapi.o netlabel_domainhash.o - -# management objects -obj-y += netlabel_mgmt.o - -# protocol modules -obj-y += netlabel_unlabeled.o -obj-y += netlabel_cipso_v4.o - diff --git a/trunk/net/netlabel/netlabel_cipso_v4.c b/trunk/net/netlabel/netlabel_cipso_v4.c deleted file mode 100644 index a4f40adc447b..000000000000 --- a/trunk/net/netlabel/netlabel_cipso_v4.c +++ /dev/null @@ -1,542 +0,0 @@ -/* - * NetLabel CIPSO/IPv4 Support - * - * This file defines the CIPSO/IPv4 functions for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "netlabel_user.h" -#include "netlabel_cipso_v4.h" - -/* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_cipsov4_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_CIPSOV4_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = 0, -}; - - -/* - * Helper Functions - */ - -/** - * netlbl_cipsov4_doi_free - Frees a CIPSO V4 DOI definition - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to the DOI definition can be released - * safely. - * - */ -static void netlbl_cipsov4_doi_free(struct rcu_head *entry) -{ - struct cipso_v4_doi *ptr; - - ptr = container_of(entry, struct cipso_v4_doi, rcu); - switch (ptr->type) { - case CIPSO_V4_MAP_STD: - kfree(ptr->map.std->lvl.cipso); - kfree(ptr->map.std->lvl.local); - kfree(ptr->map.std->cat.cipso); - kfree(ptr->map.std->cat.local); - break; - } - kfree(ptr); -} - - -/* - * NetLabel Command Handlers - */ - -/** - * netlbl_cipsov4_add_std - Adds a CIPSO V4 DOI definition - * @doi: the DOI value - * @msg: the ADD message data - * @msg_size: the size of the ADD message buffer - * - * Description: - * Create a new CIPSO_V4_MAP_STD DOI definition based on the given ADD message - * and add it to the CIPSO V4 engine. Return zero on success and non-zero on - * error. - * - */ -static int netlbl_cipsov4_add_std(u32 doi, struct nlattr *msg, size_t msg_size) -{ - int ret_val = -EINVAL; - int msg_len = msg_size; - u32 num_tags; - u32 num_lvls; - u32 num_cats; - struct cipso_v4_doi *doi_def = NULL; - u32 iter; - u32 tmp_val_a; - u32 tmp_val_b; - - if (msg_len < NETLBL_LEN_U32) - goto add_std_failure; - num_tags = netlbl_getinc_u32(&msg, &msg_len); - if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) - goto add_std_failure; - - doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); - if (doi_def == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - doi_def->map.std = kzalloc(sizeof(*doi_def->map.std), GFP_KERNEL); - if (doi_def->map.std == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - doi_def->type = CIPSO_V4_MAP_STD; - - for (iter = 0; iter < num_tags; iter++) { - if (msg_len < NETLBL_LEN_U8) - goto add_std_failure; - doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); - switch (doi_def->tags[iter]) { - case CIPSO_V4_TAG_RBITMAP: - break; - default: - goto add_std_failure; - } - } - if (iter < CIPSO_V4_TAG_MAXCNT) - doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; - - if (msg_len < 6 * NETLBL_LEN_U32) - goto add_std_failure; - - num_lvls = netlbl_getinc_u32(&msg, &msg_len); - if (num_lvls == 0) - goto add_std_failure; - doi_def->map.std->lvl.local_size = netlbl_getinc_u32(&msg, &msg_len); - if (doi_def->map.std->lvl.local_size > CIPSO_V4_MAX_LOC_LVLS) - goto add_std_failure; - doi_def->map.std->lvl.local = kcalloc(doi_def->map.std->lvl.local_size, - sizeof(u32), - GFP_KERNEL); - if (doi_def->map.std->lvl.local == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - doi_def->map.std->lvl.cipso_size = netlbl_getinc_u8(&msg, &msg_len); - if (doi_def->map.std->lvl.cipso_size > CIPSO_V4_MAX_REM_LVLS) - goto add_std_failure; - doi_def->map.std->lvl.cipso = kcalloc(doi_def->map.std->lvl.cipso_size, - sizeof(u32), - GFP_KERNEL); - if (doi_def->map.std->lvl.cipso == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - - num_cats = netlbl_getinc_u32(&msg, &msg_len); - doi_def->map.std->cat.local_size = netlbl_getinc_u32(&msg, &msg_len); - if (doi_def->map.std->cat.local_size > CIPSO_V4_MAX_LOC_CATS) - goto add_std_failure; - doi_def->map.std->cat.local = kcalloc(doi_def->map.std->cat.local_size, - sizeof(u32), - GFP_KERNEL); - if (doi_def->map.std->cat.local == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - doi_def->map.std->cat.cipso_size = netlbl_getinc_u16(&msg, &msg_len); - if (doi_def->map.std->cat.cipso_size > CIPSO_V4_MAX_REM_CATS) - goto add_std_failure; - doi_def->map.std->cat.cipso = kcalloc(doi_def->map.std->cat.cipso_size, - sizeof(u32), - GFP_KERNEL); - if (doi_def->map.std->cat.cipso == NULL) { - ret_val = -ENOMEM; - goto add_std_failure; - } - - if (msg_len < - num_lvls * (NETLBL_LEN_U32 + NETLBL_LEN_U8) + - num_cats * (NETLBL_LEN_U32 + NETLBL_LEN_U16)) - goto add_std_failure; - - for (iter = 0; iter < doi_def->map.std->lvl.cipso_size; iter++) - doi_def->map.std->lvl.cipso[iter] = CIPSO_V4_INV_LVL; - for (iter = 0; iter < doi_def->map.std->lvl.local_size; iter++) - doi_def->map.std->lvl.local[iter] = CIPSO_V4_INV_LVL; - for (iter = 0; iter < doi_def->map.std->cat.cipso_size; iter++) - doi_def->map.std->cat.cipso[iter] = CIPSO_V4_INV_CAT; - for (iter = 0; iter < doi_def->map.std->cat.local_size; iter++) - doi_def->map.std->cat.local[iter] = CIPSO_V4_INV_CAT; - - for (iter = 0; iter < num_lvls; iter++) { - tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); - tmp_val_b = netlbl_getinc_u8(&msg, &msg_len); - - if (tmp_val_a >= doi_def->map.std->lvl.local_size || - tmp_val_b >= doi_def->map.std->lvl.cipso_size) - goto add_std_failure; - - doi_def->map.std->lvl.cipso[tmp_val_b] = tmp_val_a; - doi_def->map.std->lvl.local[tmp_val_a] = tmp_val_b; - } - - for (iter = 0; iter < num_cats; iter++) { - tmp_val_a = netlbl_getinc_u32(&msg, &msg_len); - tmp_val_b = netlbl_getinc_u16(&msg, &msg_len); - - if (tmp_val_a >= doi_def->map.std->cat.local_size || - tmp_val_b >= doi_def->map.std->cat.cipso_size) - goto add_std_failure; - - doi_def->map.std->cat.cipso[tmp_val_b] = tmp_val_a; - doi_def->map.std->cat.local[tmp_val_a] = tmp_val_b; - } - - doi_def->doi = doi; - ret_val = cipso_v4_doi_add(doi_def); - if (ret_val != 0) - goto add_std_failure; - return 0; - -add_std_failure: - if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); - return ret_val; -} - -/** - * netlbl_cipsov4_add_pass - Adds a CIPSO V4 DOI definition - * @doi: the DOI value - * @msg: the ADD message data - * @msg_size: the size of the ADD message buffer - * - * Description: - * Create a new CIPSO_V4_MAP_PASS DOI definition based on the given ADD message - * and add it to the CIPSO V4 engine. Return zero on success and non-zero on - * error. - * - */ -static int netlbl_cipsov4_add_pass(u32 doi, - struct nlattr *msg, - size_t msg_size) -{ - int ret_val = -EINVAL; - int msg_len = msg_size; - u32 num_tags; - struct cipso_v4_doi *doi_def = NULL; - u32 iter; - - if (msg_len < NETLBL_LEN_U32) - goto add_pass_failure; - num_tags = netlbl_getinc_u32(&msg, &msg_len); - if (num_tags == 0 || num_tags > CIPSO_V4_TAG_MAXCNT) - goto add_pass_failure; - - doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); - if (doi_def == NULL) { - ret_val = -ENOMEM; - goto add_pass_failure; - } - doi_def->type = CIPSO_V4_MAP_PASS; - - for (iter = 0; iter < num_tags; iter++) { - if (msg_len < NETLBL_LEN_U8) - goto add_pass_failure; - doi_def->tags[iter] = netlbl_getinc_u8(&msg, &msg_len); - switch (doi_def->tags[iter]) { - case CIPSO_V4_TAG_RBITMAP: - break; - default: - goto add_pass_failure; - } - } - if (iter < CIPSO_V4_TAG_MAXCNT) - doi_def->tags[iter] = CIPSO_V4_TAG_INVALID; - - doi_def->doi = doi; - ret_val = cipso_v4_doi_add(doi_def); - if (ret_val != 0) - goto add_pass_failure; - return 0; - -add_pass_failure: - if (doi_def) - netlbl_cipsov4_doi_free(&doi_def->rcu); - return ret_val; -} - -/** - * netlbl_cipsov4_add - Handle an ADD message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Create a new DOI definition based on the given ADD message and add it to the - * CIPSO V4 engine. Returns zero on success, negative values on failure. - * - */ -static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) - -{ - int ret_val = -EINVAL; - u32 doi; - u32 map_type; - int msg_len = netlbl_netlink_payload_len(skb); - struct nlattr *msg = netlbl_netlink_payload_data(skb); - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto add_return; - - if (msg_len < 2 * NETLBL_LEN_U32) - goto add_return; - - doi = netlbl_getinc_u32(&msg, &msg_len); - map_type = netlbl_getinc_u32(&msg, &msg_len); - switch (map_type) { - case CIPSO_V4_MAP_STD: - ret_val = netlbl_cipsov4_add_std(doi, msg, msg_len); - break; - case CIPSO_V4_MAP_PASS: - ret_val = netlbl_cipsov4_add_pass(doi, msg, msg_len); - break; - } - -add_return: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_cipsov4_list - Handle a LIST message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated LIST message and respond accordingly. Returns - * zero on success and negative values on error. - * - */ -static int netlbl_cipsov4_list(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -EINVAL; - u32 doi; - struct nlattr *msg = netlbl_netlink_payload_data(skb); - struct sk_buff *ans_skb; - - if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) - goto list_failure; - - doi = nla_get_u32(msg); - ans_skb = cipso_v4_doi_dump(doi, NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) { - ret_val = -ENOMEM; - goto list_failure; - } - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_LIST); - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto list_failure; - - return 0; - -list_failure: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_cipsov4_listall - Handle a LISTALL message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated LISTALL message and respond accordingly. Returns - * zero on success and negative values on error. - * - */ -static int netlbl_cipsov4_listall(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -EINVAL; - struct sk_buff *ans_skb; - - ans_skb = cipso_v4_doi_dump_all(NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) { - ret_val = -ENOMEM; - goto listall_failure; - } - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_LISTALL); - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto listall_failure; - - return 0; - -listall_failure: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_cipsov4_remove - Handle a REMOVE message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated REMOVE message and respond accordingly. Returns - * zero on success, negative values on failure. - * - */ -static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val; - u32 doi; - struct nlattr *msg = netlbl_netlink_payload_data(skb); - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto remove_return; - - if (netlbl_netlink_payload_len(skb) != NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto remove_return; - } - - doi = nla_get_u32(msg); - ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); - -remove_return: - netlbl_netlink_send_ack(info, - netlbl_cipsov4_gnl_family.id, - NLBL_CIPSOV4_C_ACK, - -ret_val); - return ret_val; -} - -/* - * NetLabel Generic NETLINK Command Definitions - */ - -static struct genl_ops netlbl_cipsov4_genl_c_add = { - .cmd = NLBL_CIPSOV4_C_ADD, - .flags = 0, - .doit = netlbl_cipsov4_add, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_cipsov4_genl_c_remove = { - .cmd = NLBL_CIPSOV4_C_REMOVE, - .flags = 0, - .doit = netlbl_cipsov4_remove, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_cipsov4_genl_c_list = { - .cmd = NLBL_CIPSOV4_C_LIST, - .flags = 0, - .doit = netlbl_cipsov4_list, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_cipsov4_genl_c_listall = { - .cmd = NLBL_CIPSOV4_C_LISTALL, - .flags = 0, - .doit = netlbl_cipsov4_listall, - .dumpit = NULL, -}; - -/* - * NetLabel Generic NETLINK Protocol Functions - */ - -/** - * netlbl_cipsov4_genl_init - Register the CIPSOv4 NetLabel component - * - * Description: - * Register the CIPSOv4 packet NetLabel component with the Generic NETLINK - * mechanism. Returns zero on success, negative values on failure. - * - */ -int netlbl_cipsov4_genl_init(void) -{ - int ret_val; - - ret_val = genl_register_family(&netlbl_cipsov4_gnl_family); - if (ret_val != 0) - return ret_val; - - ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, - &netlbl_cipsov4_genl_c_add); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, - &netlbl_cipsov4_genl_c_remove); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, - &netlbl_cipsov4_genl_c_list); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_cipsov4_gnl_family, - &netlbl_cipsov4_genl_c_listall); - if (ret_val != 0) - return ret_val; - - return 0; -} diff --git a/trunk/net/netlabel/netlabel_cipso_v4.h b/trunk/net/netlabel/netlabel_cipso_v4.h deleted file mode 100644 index 4c6ff4b93004..000000000000 --- a/trunk/net/netlabel/netlabel_cipso_v4.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * NetLabel CIPSO/IPv4 Support - * - * This file defines the CIPSO/IPv4 functions for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_CIPSO_V4 -#define _NETLABEL_CIPSO_V4 - -#include - -/* - * The following NetLabel payloads are supported by the CIPSO subsystem, all - * of which are preceeded by the nlmsghdr struct. - * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. - * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ - * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values - * - * o ADD: - * Sent by an application to add a new DOI mapping table, after completion - * of the task the kernel should ACK this message. - * - * +---------------+--------------------+---------------------+ - * | DOI (32 bits) | map type (32 bits) | tag count (32 bits) | ... - * +---------------+--------------------+---------------------+ - * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ - * - * +-------------- ---- --- -- - - * | mapping data - * +-------------- ---- --- -- - - * - * DOI: the DOI value - * map type: the mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * tag count: the number of tags, must be greater than zero - * tag: the CIPSO tag for the DOI, tags listed first are given - * higher priorirty when sending packets - * mapping data: specific to the map type (see below) - * - * CIPSO_V4_MAP_STD - * - * +------------------+-----------------------+----------------------+ - * | levels (32 bits) | max l level (32 bits) | max r level (8 bits) | ... - * +------------------+-----------------------+----------------------+ - * - * +----------------------+---------------------+---------------------+ - * | categories (32 bits) | max l cat (32 bits) | max r cat (16 bits) | ... - * +----------------------+---------------------+---------------------+ - * - * +--------------------------+-------------------------+ - * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated - * +--------------------------+-------------------------+ - * - * +-----------------------------+-----------------------------+ - * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated - * +-----------------------------+-----------------------------+ - * - * levels: the number of level mappings - * max l level: the highest local level - * max r level: the highest remote/CIPSO level - * categories: the number of category mappings - * max l cat: the highest local category - * max r cat: the highest remote/CIPSO category - * local level: the local part of a level mapping - * CIPSO level: the remote/CIPSO part of a level mapping - * local category: the local part of a category mapping - * CIPSO category: the remote/CIPSO part of a category mapping - * - * CIPSO_V4_MAP_PASS - * - * No mapping data is needed for this map type. - * - * o REMOVE: - * Sent by an application to remove a specific DOI mapping table from the - * CIPSO V4 system. The kernel should ACK this message. - * - * +---------------+ - * | DOI (32 bits) | - * +---------------+ - * - * DOI: the DOI value - * - * o LIST: - * Sent by an application to list the details of a DOI definition. The - * kernel should send an ACK on error or a response as indicated below. The - * application generated message format is shown below. - * - * +---------------+ - * | DOI (32 bits) | - * +---------------+ - * - * DOI: the DOI value - * - * The valid response message format depends on the type of the DOI mapping, - * the known formats are shown below. - * - * +--------------------+ - * | map type (32 bits) | ... - * +--------------------+ - * - * map type: the DOI mapping table type (defined in the cipso_ipv4.h - * header as CIPSO_V4_MAP_*) - * - * (map type == CIPSO_V4_MAP_STD) - * - * +----------------+------------------+----------------------+ - * | tags (32 bits) | levels (32 bits) | categories (32 bits) | ... - * +----------------+------------------+----------------------+ - * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ - * - * +--------------------------+-------------------------+ - * | local level #X (32 bits) | CIPSO level #X (8 bits) | ... repeated - * +--------------------------+-------------------------+ - * - * +-----------------------------+-----------------------------+ - * | local category #X (32 bits) | CIPSO category #X (16 bits) | ... repeated - * +-----------------------------+-----------------------------+ - * - * tags: the number of CIPSO tag types - * levels: the number of level mappings - * categories: the number of category mappings - * tag: the tag number, tags listed first are given higher - * priority when sending packets - * local level: the local part of a level mapping - * CIPSO level: the remote/CIPSO part of a level mapping - * local category: the local part of a category mapping - * CIPSO category: the remote/CIPSO part of a category mapping - * - * (map type == CIPSO_V4_MAP_PASS) - * - * +----------------+ - * | tags (32 bits) | ... - * +----------------+ - * - * +-----------------+ - * | tag #X (8 bits) | ... repeated - * +-----------------+ - * - * tags: the number of CIPSO tag types - * tag: the tag number, tags listed first are given higher - * priority when sending packets - * - * o LISTALL: - * This message is sent by an application to list the valid DOIs on the - * system. There is no payload and the kernel should respond with an ACK - * or the following message. - * - * +---------------------+------------------+-----------------------+ - * | DOI count (32 bits) | DOI #X (32 bits) | map type #X (32 bits) | - * +---------------------+------------------+-----------------------+ - * - * +-----------------------+ - * | map type #X (32 bits) | ... - * +-----------------------+ - * - * DOI count: the number of DOIs - * DOI: the DOI value - * map type: the DOI mapping table type (defined in the cipso_ipv4.h - * header as CIPSO_V4_MAP_*) - * - */ - -/* NetLabel CIPSOv4 commands */ -enum { - NLBL_CIPSOV4_C_UNSPEC, - NLBL_CIPSOV4_C_ACK, - NLBL_CIPSOV4_C_ADD, - NLBL_CIPSOV4_C_REMOVE, - NLBL_CIPSOV4_C_LIST, - NLBL_CIPSOV4_C_LISTALL, - __NLBL_CIPSOV4_C_MAX, -}; -#define NLBL_CIPSOV4_C_MAX (__NLBL_CIPSOV4_C_MAX - 1) - -/* NetLabel protocol functions */ -int netlbl_cipsov4_genl_init(void); - -#endif diff --git a/trunk/net/netlabel/netlabel_domainhash.c b/trunk/net/netlabel/netlabel_domainhash.c deleted file mode 100644 index 0489a1378101..000000000000 --- a/trunk/net/netlabel/netlabel_domainhash.c +++ /dev/null @@ -1,513 +0,0 @@ -/* - * NetLabel Domain Hash Table - * - * This file manages the domain hash table that NetLabel uses to determine - * which network labeling protocol to use for a given domain. The NetLabel - * system manages static and dynamic label mappings for network protocols such - * as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "netlabel_mgmt.h" -#include "netlabel_domainhash.h" - -struct netlbl_domhsh_tbl { - struct list_head *tbl; - u32 size; -}; - -/* Domain hash table */ -/* XXX - updates should be so rare that having one spinlock for the entire - * hash table should be okay */ -static DEFINE_SPINLOCK(netlbl_domhsh_lock); -static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; - -/* Default domain mapping */ -static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); -static struct netlbl_dom_map *netlbl_domhsh_def = NULL; - -/* - * Domain Hash Table Helper Functions - */ - -/** - * netlbl_domhsh_free_entry - Frees a domain hash table entry - * @entry: the entry's RCU field - * - * Description: - * This function is designed to be used as a callback to the call_rcu() - * function so that the memory allocated to a hash table entry can be released - * safely. - * - */ -static void netlbl_domhsh_free_entry(struct rcu_head *entry) -{ - struct netlbl_dom_map *ptr; - - ptr = container_of(entry, struct netlbl_dom_map, rcu); - kfree(ptr->domain); - kfree(ptr); -} - -/** - * netlbl_domhsh_hash - Hashing function for the domain hash table - * @domain: the domain name to hash - * - * Description: - * This is the hashing function for the domain hash table, it returns the - * correct bucket number for the domain. The caller is responsibile for - * calling the rcu_read_[un]lock() functions. - * - */ -static u32 netlbl_domhsh_hash(const char *key) -{ - u32 iter; - u32 val; - u32 len; - - /* This is taken (with slight modification) from - * security/selinux/ss/symtab.c:symhash() */ - - for (iter = 0, val = 0, len = strlen(key); iter < len; iter++) - val = (val << 4 | (val >> (8 * sizeof(u32) - 4))) ^ key[iter]; - return val & (rcu_dereference(netlbl_domhsh)->size - 1); -} - -/** - * netlbl_domhsh_search - Search for a domain entry - * @domain: the domain - * @def: return default if no match is found - * - * Description: - * Searches the domain hash table and returns a pointer to the hash table - * entry if found, otherwise NULL is returned. If @def is non-zero and a - * match is not found in the domain hash table the default mapping is returned - * if it exists. The caller is responsibile for the rcu hash table locks - * (i.e. the caller much call rcu_read_[un]lock()). - * - */ -static struct netlbl_dom_map *netlbl_domhsh_search(const char *domain, u32 def) -{ - u32 bkt; - struct netlbl_dom_map *iter; - - if (domain != NULL) { - bkt = netlbl_domhsh_hash(domain); - list_for_each_entry_rcu(iter, &netlbl_domhsh->tbl[bkt], list) - if (iter->valid && strcmp(iter->domain, domain) == 0) - return iter; - } - - if (def != 0) { - iter = rcu_dereference(netlbl_domhsh_def); - if (iter != NULL && iter->valid) - return iter; - } - - return NULL; -} - -/* - * Domain Hash Table Functions - */ - -/** - * netlbl_domhsh_init - Init for the domain hash - * @size: the number of bits to use for the hash buckets - * - * Description: - * Initializes the domain hash table, should be called only by - * netlbl_user_init() during initialization. Returns zero on success, non-zero - * values on error. - * - */ -int netlbl_domhsh_init(u32 size) -{ - u32 iter; - struct netlbl_domhsh_tbl *hsh_tbl; - - if (size == 0) - return -EINVAL; - - hsh_tbl = kmalloc(sizeof(*hsh_tbl), GFP_KERNEL); - if (hsh_tbl == NULL) - return -ENOMEM; - hsh_tbl->size = 1 << size; - hsh_tbl->tbl = kcalloc(hsh_tbl->size, - sizeof(struct list_head), - GFP_KERNEL); - if (hsh_tbl->tbl == NULL) { - kfree(hsh_tbl); - return -ENOMEM; - } - for (iter = 0; iter < hsh_tbl->size; iter++) - INIT_LIST_HEAD(&hsh_tbl->tbl[iter]); - - rcu_read_lock(); - spin_lock(&netlbl_domhsh_lock); - rcu_assign_pointer(netlbl_domhsh, hsh_tbl); - spin_unlock(&netlbl_domhsh_lock); - rcu_read_unlock(); - - return 0; -} - -/** - * netlbl_domhsh_add - Adds a entry to the domain hash table - * @entry: the entry to add - * - * Description: - * Adds a new entry to the domain hash table and handles any updates to the - * lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. - * - */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry) -{ - int ret_val; - u32 bkt; - - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = 0; - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_add(entry->type_def.cipsov4, - entry->domain); - break; - default: - return -EINVAL; - } - if (ret_val != 0) - return ret_val; - - entry->valid = 1; - INIT_RCU_HEAD(&entry->rcu); - - ret_val = 0; - rcu_read_lock(); - if (entry->domain != NULL) { - bkt = netlbl_domhsh_hash(entry->domain); - spin_lock(&netlbl_domhsh_lock); - if (netlbl_domhsh_search(entry->domain, 0) == NULL) - list_add_tail_rcu(&entry->list, - &netlbl_domhsh->tbl[bkt]); - else - ret_val = -EEXIST; - spin_unlock(&netlbl_domhsh_lock); - } else if (entry->domain == NULL) { - INIT_LIST_HEAD(&entry->list); - spin_lock(&netlbl_domhsh_def_lock); - if (rcu_dereference(netlbl_domhsh_def) == NULL) - rcu_assign_pointer(netlbl_domhsh_def, entry); - else - ret_val = -EEXIST; - spin_unlock(&netlbl_domhsh_def_lock); - } else - ret_val = -EINVAL; - rcu_read_unlock(); - - if (ret_val != 0) { - switch (entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - if (cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain) != 0) - BUG(); - break; - } - } - - return ret_val; -} - -/** - * netlbl_domhsh_add_default - Adds the default entry to the domain hash table - * @entry: the entry to add - * - * Description: - * Adds a new default entry to the domain hash table and handles any updates - * to the lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. - * - */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) -{ - return netlbl_domhsh_add(entry); -} - -/** - * netlbl_domhsh_remove - Removes an entry from the domain hash table - * @domain: the domain to remove - * - * Description: - * Removes an entry from the domain hash table and handles any updates to the - * lower level protocol handler (i.e. CIPSO). Returns zero on success, - * negative on failure. - * - */ -int netlbl_domhsh_remove(const char *domain) -{ - int ret_val = -ENOENT; - struct netlbl_dom_map *entry; - - rcu_read_lock(); - if (domain != NULL) - entry = netlbl_domhsh_search(domain, 0); - else - entry = netlbl_domhsh_search(domain, 1); - if (entry == NULL) - goto remove_return; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_doi_domhsh_remove(entry->type_def.cipsov4, - entry->domain); - if (ret_val != 0) - goto remove_return; - break; - } - ret_val = 0; - if (entry != rcu_dereference(netlbl_domhsh_def)) { - spin_lock(&netlbl_domhsh_lock); - if (entry->valid) { - entry->valid = 0; - list_del_rcu(&entry->list); - } else - ret_val = -ENOENT; - spin_unlock(&netlbl_domhsh_lock); - } else { - spin_lock(&netlbl_domhsh_def_lock); - if (entry->valid) { - entry->valid = 0; - rcu_assign_pointer(netlbl_domhsh_def, NULL); - } else - ret_val = -ENOENT; - spin_unlock(&netlbl_domhsh_def_lock); - } - if (ret_val == 0) - call_rcu(&entry->rcu, netlbl_domhsh_free_entry); - -remove_return: - rcu_read_unlock(); - return ret_val; -} - -/** - * netlbl_domhsh_remove_default - Removes the default entry from the table - * - * Description: - * Removes/resets the default entry for the domain hash table and handles any - * updates to the lower level protocol handler (i.e. CIPSO). Returns zero on - * success, non-zero on failure. - * - */ -int netlbl_domhsh_remove_default(void) -{ - return netlbl_domhsh_remove(NULL); -} - -/** - * netlbl_domhsh_getentry - Get an entry from the domain hash table - * @domain: the domain name to search for - * - * Description: - * Look through the domain hash table searching for an entry to match @domain, - * return a pointer to a copy of the entry or NULL. The caller is responsibile - * for ensuring that rcu_read_[un]lock() is called. - * - */ -struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) -{ - return netlbl_domhsh_search(domain, 1); -} - -/** - * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff - * - * Description: - * Dump the domain hash table into a buffer suitable for returning to an - * application in response to a NetLabel management DOMAIN message. This - * function may fail if another process is growing the hash table at the same - * time. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a - * pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *netlbl_domhsh_dump(size_t headroom) -{ - struct sk_buff *skb = NULL; - ssize_t buf_len; - u32 bkt_iter; - u32 dom_cnt = 0; - struct netlbl_domhsh_tbl *hsh_tbl; - struct netlbl_dom_map *list_iter; - ssize_t tmp_len; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - buf_len += NETLBL_LEN_U32 + - nla_total_size(strlen(list_iter->domain) + 1); - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - dom_cnt++; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_failure; - - if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32; - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - tmp_len = nla_total_size(strlen(list_iter->domain) + - 1); - if (buf_len < NETLBL_LEN_U32 + tmp_len) - goto dump_failure; - if (nla_put_string(skb, - NLA_STRING, - list_iter->domain) != 0) - goto dump_failure; - if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32 + tmp_len; - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->type) != 0) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->doi) != 0) - goto dump_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; - } - } - rcu_read_unlock(); - - return skb; - -dump_failure: - rcu_read_unlock(); - kfree_skb(skb); - return NULL; -} - -/** - * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff - * - * Description: - * Dump the default domain mapping into a buffer suitable for returning to an - * application in response to a NetLabel management DEFDOMAIN message. This - * function may fail if another process is changing the default domain mapping - * at the same time. The returned sk_buff has room at the front of the - * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message - * format. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom) -{ - struct sk_buff *skb; - ssize_t buf_len; - struct netlbl_dom_map *entry; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - entry = rcu_dereference(netlbl_domhsh_def); - if (entry != NULL) - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_default_failure; - - if (entry != rcu_dereference(netlbl_domhsh_def)) - goto dump_default_failure; - if (entry != NULL) { - if (nla_put_u32(skb, NLA_U32, entry->type) != 0) - goto dump_default_failure; - buf_len -= NETLBL_LEN_U32; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->type) != 0) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->doi) != 0) - goto dump_default_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; - } - } else - nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE); - rcu_read_unlock(); - - return skb; - -dump_default_failure: - rcu_read_unlock(); - kfree_skb(skb); - return NULL; -} diff --git a/trunk/net/netlabel/netlabel_domainhash.h b/trunk/net/netlabel/netlabel_domainhash.h deleted file mode 100644 index 99a2287de246..000000000000 --- a/trunk/net/netlabel/netlabel_domainhash.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * NetLabel Domain Hash Table - * - * This file manages the domain hash table that NetLabel uses to determine - * which network labeling protocol to use for a given domain. The NetLabel - * system manages static and dynamic label mappings for network protocols such - * as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_DOMAINHASH_H -#define _NETLABEL_DOMAINHASH_H - -#include -#include -#include - -/* Domain hash table size */ -/* XXX - currently this number is an uneducated guess */ -#define NETLBL_DOMHSH_BITSIZE 7 - -/* Domain mapping definition struct */ -struct netlbl_dom_map { - char *domain; - u32 type; - union { - struct cipso_v4_doi *cipsov4; - } type_def; - - u32 valid; - struct list_head list; - struct rcu_head rcu; -}; - -/* init function */ -int netlbl_domhsh_init(u32 size); - -/* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); -int netlbl_domhsh_remove_default(void); -struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct sk_buff *netlbl_domhsh_dump(size_t headroom); -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); - -#endif diff --git a/trunk/net/netlabel/netlabel_kapi.c b/trunk/net/netlabel/netlabel_kapi.c deleted file mode 100644 index 0fd8aaafe23f..000000000000 --- a/trunk/net/netlabel/netlabel_kapi.c +++ /dev/null @@ -1,231 +0,0 @@ -/* - * NetLabel Kernel API - * - * This file defines the kernel API for the NetLabel system. The NetLabel - * system manages static and dynamic label mappings for network protocols such - * as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include - -#include "netlabel_domainhash.h" -#include "netlabel_unlabeled.h" -#include "netlabel_user.h" - -/* - * LSM Functions - */ - -/** - * netlbl_socket_setattr - Label a socket using the correct protocol - * @sock: the socket to label - * @secattr: the security attributes - * - * Description: - * Attach the correct label to the given socket using the security attributes - * specified in @secattr. This function requires exclusive access to - * @sock->sk, which means it either needs to be in the process of being - * created or locked via lock_sock(sock->sk). Returns zero on success, - * negative values on failure. - * - */ -int netlbl_socket_setattr(const struct socket *sock, - const struct netlbl_lsm_secattr *secattr) -{ - int ret_val = -ENOENT; - struct netlbl_dom_map *dom_entry; - - rcu_read_lock(); - dom_entry = netlbl_domhsh_getentry(secattr->domain); - if (dom_entry == NULL) - goto socket_setattr_return; - switch (dom_entry->type) { - case NETLBL_NLTYPE_CIPSOV4: - ret_val = cipso_v4_socket_setattr(sock, - dom_entry->type_def.cipsov4, - secattr); - break; - case NETLBL_NLTYPE_UNLABELED: - ret_val = 0; - break; - default: - ret_val = -ENOENT; - } - -socket_setattr_return: - rcu_read_unlock(); - return ret_val; -} - -/** - * netlbl_socket_getattr - Determine the security attributes of a socket - * @sock: the socket - * @secattr: the security attributes - * - * Description: - * Examines the given socket to see any NetLabel style labeling has been - * applied to the socket, if so it parses the socket label and returns the - * security attributes in @secattr. Returns zero on success, negative values - * on failure. - * - */ -int netlbl_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val; - - ret_val = cipso_v4_socket_getattr(sock, secattr); - if (ret_val == 0) - return 0; - - return netlbl_unlabel_getattr(secattr); -} - -/** - * netlbl_skbuff_getattr - Determine the security attributes of a packet - * @skb: the packet - * @secattr: the security attributes - * - * Description: - * Examines the given packet to see if a recognized form of packet labeling - * is present, if so it parses the packet label and returns the security - * attributes in @secattr. Returns zero on success, negative values on - * failure. - * - */ -int netlbl_skbuff_getattr(const struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr) -{ - int ret_val; - - ret_val = cipso_v4_skbuff_getattr(skb, secattr); - if (ret_val == 0) - return 0; - - return netlbl_unlabel_getattr(secattr); -} - -/** - * netlbl_skbuff_err - Handle a LSM error on a sk_buff - * @skb: the packet - * @error: the error code - * - * Description: - * Deal with a LSM problem when handling the packet in @skb, typically this is - * a permission denied problem (-EACCES). The correct action is determined - * according to the packet's labeling protocol. - * - */ -void netlbl_skbuff_err(struct sk_buff *skb, int error) -{ - if (CIPSO_V4_OPTEXIST(skb)) - cipso_v4_error(skb, error, 0); -} - -/** - * netlbl_cache_invalidate - Invalidate all of the NetLabel protocol caches - * - * Description: - * For all of the NetLabel protocols that support some form of label mapping - * cache, invalidate the cache. Returns zero on success, negative values on - * error. - * - */ -void netlbl_cache_invalidate(void) -{ - cipso_v4_cache_invalidate(); -} - -/** - * netlbl_cache_add - Add an entry to a NetLabel protocol cache - * @skb: the packet - * @secattr: the packet's security attributes - * - * Description: - * Add the LSM security attributes for the given packet to the underlying - * NetLabel protocol's label mapping cache. Returns zero on success, negative - * values on error. - * - */ -int netlbl_cache_add(const struct sk_buff *skb, - const struct netlbl_lsm_secattr *secattr) -{ - if (secattr->cache.data == NULL) - return -ENOMSG; - - if (CIPSO_V4_OPTEXIST(skb)) - return cipso_v4_cache_add(skb, secattr); - - return -ENOMSG; -} - -/* - * Setup Functions - */ - -/** - * netlbl_init - Initialize NetLabel - * - * Description: - * Perform the required NetLabel initialization before first use. - * - */ -static int __init netlbl_init(void) -{ - int ret_val; - - printk(KERN_INFO "NetLabel: Initializing\n"); - printk(KERN_INFO "NetLabel: domain hash size = %u\n", - (1 << NETLBL_DOMHSH_BITSIZE)); - printk(KERN_INFO "NetLabel: protocols =" - " UNLABELED" - " CIPSOv4" - "\n"); - - ret_val = netlbl_domhsh_init(NETLBL_DOMHSH_BITSIZE); - if (ret_val != 0) - goto init_failure; - - ret_val = netlbl_netlink_init(); - if (ret_val != 0) - goto init_failure; - - ret_val = netlbl_unlabel_defconf(); - if (ret_val != 0) - goto init_failure; - printk(KERN_INFO "NetLabel: unlabeled traffic allowed by default\n"); - - return 0; - -init_failure: - panic("NetLabel: failed to initialize properly (%d)\n", ret_val); -} - -subsys_initcall(netlbl_init); diff --git a/trunk/net/netlabel/netlabel_mgmt.c b/trunk/net/netlabel/netlabel_mgmt.c deleted file mode 100644 index 85bc11a1fc46..000000000000 --- a/trunk/net/netlabel/netlabel_mgmt.c +++ /dev/null @@ -1,624 +0,0 @@ -/* - * NetLabel Management Support - * - * This file defines the management functions for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "netlabel_domainhash.h" -#include "netlabel_user.h" -#include "netlabel_mgmt.h" - -/* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_mgmt_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_MGMT_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = 0, -}; - - -/* - * NetLabel Command Handlers - */ - -/** - * netlbl_mgmt_add - Handle an ADD message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated ADD message and add the domains from the message - * to the hash table. See netlabel.h for a description of the message format. - * Returns zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); - u32 count; - struct netlbl_dom_map *entry = NULL; - u32 iter; - u32 tmp_val; - int tmp_size; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto add_failure; - - if (msg_len < NETLBL_LEN_U32) - goto add_failure; - count = netlbl_getinc_u32(&msg_ptr, &msg_len); - - for (iter = 0; iter < count && msg_len > 0; iter++, entry = NULL) { - if (msg_len <= 0) { - ret_val = -EINVAL; - goto add_failure; - } - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } - tmp_size = nla_len(msg_ptr); - if (tmp_size <= 0 || tmp_size > msg_len) { - ret_val = -EINVAL; - goto add_failure; - } - entry->domain = kmalloc(tmp_size, GFP_KERNEL); - if (entry->domain == NULL) { - ret_val = -ENOMEM; - goto add_failure; - } - nla_strlcpy(entry->domain, msg_ptr, tmp_size); - entry->domain[tmp_size - 1] = '\0'; - msg_ptr = nla_next(msg_ptr, &msg_len); - - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto add_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - entry->type = tmp_val; - switch (tmp_val) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto add_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - /* We should be holding a rcu_read_lock() here - * while we hold the result but since the entry - * will always be deleted when the CIPSO DOI - * is deleted we aren't going to keep the lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); - ret_val = -EINVAL; - goto add_failure; - } - ret_val = netlbl_domhsh_add(entry); - rcu_read_unlock(); - break; - default: - ret_val = -EINVAL; - } - if (ret_val != 0) - goto add_failure; - } - - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - NETLBL_E_OK); - return 0; - -add_failure: - if (entry) - kfree(entry->domain); - kfree(entry); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_remove - Handle a REMOVE message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated REMOVE message and remove the specified domain - * mappings. Returns zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); - u32 count; - u32 iter; - int tmp_size; - unsigned char *domain; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto remove_return; - - if (msg_len < NETLBL_LEN_U32) - goto remove_return; - count = netlbl_getinc_u32(&msg_ptr, &msg_len); - - for (iter = 0; iter < count && msg_len > 0; iter++) { - if (msg_len <= 0) { - ret_val = -EINVAL; - goto remove_return; - } - tmp_size = nla_len(msg_ptr); - domain = nla_data(msg_ptr); - if (tmp_size <= 0 || tmp_size > msg_len || - domain[tmp_size - 1] != '\0') { - ret_val = -EINVAL; - goto remove_return; - } - ret_val = netlbl_domhsh_remove(domain); - if (ret_val != 0) - goto remove_return; - msg_ptr = nla_next(msg_ptr, &msg_len); - } - - ret_val = 0; - -remove_return: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_list - Handle a LIST message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated LIST message and dumps the domain hash table in a - * form suitable for use in a kernel generated LIST message. Returns zero on - * success, negative values on failure. - * - */ -static int netlbl_mgmt_list(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -ENOMEM; - struct sk_buff *ans_skb; - - ans_skb = netlbl_domhsh_dump(NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) - goto list_failure; - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_LIST); - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto list_failure; - - return 0; - -list_failure: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_adddef - Handle an ADDDEF message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated ADDDEF message and respond accordingly. Returns - * zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -EINVAL; - struct nlattr *msg_ptr = netlbl_netlink_payload_data(skb); - int msg_len = netlbl_netlink_payload_len(skb); - struct netlbl_dom_map *entry = NULL; - u32 tmp_val; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto adddef_failure; - - if (msg_len < NETLBL_LEN_U32) - goto adddef_failure; - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) { - ret_val = -ENOMEM; - goto adddef_failure; - } - - entry->type = tmp_val; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry); - break; - case NETLBL_NLTYPE_CIPSOV4: - if (msg_len < NETLBL_LEN_U32) { - ret_val = -EINVAL; - goto adddef_failure; - } - tmp_val = netlbl_getinc_u32(&msg_ptr, &msg_len); - /* We should be holding a rcu_read_lock here while we - * hold the result but since the entry will always be - * deleted when the CIPSO DOI is deleted we are going - * to skip the lock. */ - rcu_read_lock(); - entry->type_def.cipsov4 = cipso_v4_doi_getdef(tmp_val); - if (entry->type_def.cipsov4 == NULL) { - rcu_read_unlock(); - ret_val = -EINVAL; - goto adddef_failure; - } - ret_val = netlbl_domhsh_add_default(entry); - rcu_read_unlock(); - break; - default: - ret_val = -EINVAL; - } - if (ret_val != 0) - goto adddef_failure; - - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - NETLBL_E_OK); - return 0; - -adddef_failure: - kfree(entry); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_removedef - Handle a REMOVEDEF message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated REMOVEDEF message and remove the default domain - * mapping. Returns zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - goto removedef_return; - - ret_val = netlbl_domhsh_remove_default(); - -removedef_return: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_listdef - Handle a LISTDEF message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated LISTDEF message and dumps the default domain - * mapping in a form suitable for use in a kernel generated LISTDEF message. - * Returns zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_listdef(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -ENOMEM; - struct sk_buff *ans_skb; - - ans_skb = netlbl_domhsh_dump_default(NLMSG_SPACE(GENL_HDRLEN)); - if (ans_skb == NULL) - goto listdef_failure; - netlbl_netlink_hdr_push(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_LISTDEF); - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto listdef_failure; - - return 0; - -listdef_failure: - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_modules - Handle a MODULES message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated MODULES message and respond accordingly. - * - */ -static int netlbl_mgmt_modules(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -ENOMEM; - size_t data_size; - u32 mod_count; - struct sk_buff *ans_skb = NULL; - - /* unlabeled + cipsov4 */ - mod_count = 2; - - data_size = GENL_HDRLEN + NETLBL_LEN_U32 + mod_count * NETLBL_LEN_U32; - ans_skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); - if (ans_skb == NULL) - goto modules_failure; - - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_MODULES) == NULL) - goto modules_failure; - - ret_val = nla_put_u32(ans_skb, NLA_U32, mod_count); - if (ret_val != 0) - goto modules_failure; - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_UNLABELED); - if (ret_val != 0) - goto modules_failure; - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_NLTYPE_CIPSOV4); - if (ret_val != 0) - goto modules_failure; - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto modules_failure; - - return 0; - -modules_failure: - kfree_skb(ans_skb); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - -/** - * netlbl_mgmt_version - Handle a VERSION message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated VERSION message and respond accordingly. Returns - * zero on success, negative values on failure. - * - */ -static int netlbl_mgmt_version(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -ENOMEM; - struct sk_buff *ans_skb = NULL; - - ans_skb = netlbl_netlink_alloc_skb(0, - GENL_HDRLEN + NETLBL_LEN_U32, - GFP_KERNEL); - if (ans_skb == NULL) - goto version_failure; - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_VERSION) == NULL) - goto version_failure; - - ret_val = nla_put_u32(ans_skb, NLA_U32, NETLBL_PROTO_VERSION); - if (ret_val != 0) - goto version_failure; - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto version_failure; - - return 0; - -version_failure: - kfree_skb(ans_skb); - netlbl_netlink_send_ack(info, - netlbl_mgmt_gnl_family.id, - NLBL_MGMT_C_ACK, - -ret_val); - return ret_val; -} - - -/* - * NetLabel Generic NETLINK Command Definitions - */ - -static struct genl_ops netlbl_mgmt_genl_c_add = { - .cmd = NLBL_MGMT_C_ADD, - .flags = 0, - .doit = netlbl_mgmt_add, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_remove = { - .cmd = NLBL_MGMT_C_REMOVE, - .flags = 0, - .doit = netlbl_mgmt_remove, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_list = { - .cmd = NLBL_MGMT_C_LIST, - .flags = 0, - .doit = netlbl_mgmt_list, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_adddef = { - .cmd = NLBL_MGMT_C_ADDDEF, - .flags = 0, - .doit = netlbl_mgmt_adddef, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_removedef = { - .cmd = NLBL_MGMT_C_REMOVEDEF, - .flags = 0, - .doit = netlbl_mgmt_removedef, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_listdef = { - .cmd = NLBL_MGMT_C_LISTDEF, - .flags = 0, - .doit = netlbl_mgmt_listdef, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_modules = { - .cmd = NLBL_MGMT_C_MODULES, - .flags = 0, - .doit = netlbl_mgmt_modules, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_mgmt_genl_c_version = { - .cmd = NLBL_MGMT_C_VERSION, - .flags = 0, - .doit = netlbl_mgmt_version, - .dumpit = NULL, -}; - -/* - * NetLabel Generic NETLINK Protocol Functions - */ - -/** - * netlbl_mgmt_genl_init - Register the NetLabel management component - * - * Description: - * Register the NetLabel management component with the Generic NETLINK - * mechanism. Returns zero on success, negative values on failure. - * - */ -int netlbl_mgmt_genl_init(void) -{ - int ret_val; - - ret_val = genl_register_family(&netlbl_mgmt_gnl_family); - if (ret_val != 0) - return ret_val; - - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_add); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_remove); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_list); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_adddef); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_removedef); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_listdef); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_modules); - if (ret_val != 0) - return ret_val; - ret_val = genl_register_ops(&netlbl_mgmt_gnl_family, - &netlbl_mgmt_genl_c_version); - if (ret_val != 0) - return ret_val; - - return 0; -} diff --git a/trunk/net/netlabel/netlabel_mgmt.h b/trunk/net/netlabel/netlabel_mgmt.h deleted file mode 100644 index fd6c6acbfa08..000000000000 --- a/trunk/net/netlabel/netlabel_mgmt.h +++ /dev/null @@ -1,246 +0,0 @@ -/* - * NetLabel Management Support - * - * This file defines the management functions for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_MGMT_H -#define _NETLABEL_MGMT_H - -#include - -/* - * The following NetLabel payloads are supported by the management interface, - * all of which are preceeded by the nlmsghdr struct. - * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. - * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ - * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values - * - * o ADD: - * Sent by an application to add a domain mapping to the NetLabel system. - * The kernel should respond with an ACK. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ - * - * domains: the number of domains in the message - * - * +--------------------------+-------------------------+ - * | domain string (variable) | protocol type (32 bits) | ... - * +--------------------------+-------------------------+ - * - * +-------------- ---- --- -- - - * | mapping data ... repeated - * +-------------- ---- --- -- - - * - * domain string: the domain string, NULL terminated - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) - * - * NETLBL_NLTYPE_UNLABELED - * - * No mapping data for this protocol type. - * - * NETLBL_NLTYPE_CIPSOV4 - * - * +---------------+ - * | doi (32 bits) | - * +---------------+ - * - * doi: the CIPSO DOI value - * - * o REMOVE: - * Sent by an application to remove a domain mapping from the NetLabel - * system. The kernel should ACK this message. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ - * - * domains: the number of domains in the message - * - * +--------------------------+ - * | domain string (variable) | ... - * +--------------------------+ - * - * domain string: the domain string, NULL terminated - * - * o LIST: - * This message can be sent either from an application or by the kernel in - * response to an application generated LIST message. When sent by an - * application there is no payload. The kernel should respond to a LIST - * message either with a LIST message on success or an ACK message on - * failure. - * - * +-------------------+ - * | domains (32 bits) | ... - * +-------------------+ - * - * domains: the number of domains in the message - * - * +--------------------------+ - * | domain string (variable) | ... - * +--------------------------+ - * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - - * - * domain string: the domain string, NULL terminated - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) - * - * NETLBL_NLTYPE_UNLABELED - * - * No mapping data for this protocol type. - * - * NETLBL_NLTYPE_CIPSOV4 - * - * +----------------+---------------+ - * | type (32 bits) | doi (32 bits) | - * +----------------+---------------+ - * - * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * doi: the CIPSO DOI value - * - * o ADDDEF: - * Sent by an application to set the default domain mapping for the NetLabel - * system. The kernel should respond with an ACK. - * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - - * - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) - * - * NETLBL_NLTYPE_UNLABELED - * - * No mapping data for this protocol type. - * - * NETLBL_NLTYPE_CIPSOV4 - * - * +---------------+ - * | doi (32 bits) | - * +---------------+ - * - * doi: the CIPSO DOI value - * - * o REMOVEDEF: - * Sent by an application to remove the default domain mapping from the - * NetLabel system, there is no payload. The kernel should ACK this message. - * - * o LISTDEF: - * This message can be sent either from an application or by the kernel in - * response to an application generated LISTDEF message. When sent by an - * application there is no payload. The kernel should respond to a - * LISTDEF message either with a LISTDEF message on success or an ACK message - * on failure. - * - * +-------------------------+-------------- ---- --- -- - - * | protocol type (32 bits) | mapping data ... repeated - * +-------------------------+-------------- ---- --- -- - - * - * protocol type: the protocol type (defined by NETLBL_NLTYPE_*) - * mapping data: specific to the map type (see below) - * - * NETLBL_NLTYPE_UNLABELED - * - * No mapping data for this protocol type. - * - * NETLBL_NLTYPE_CIPSOV4 - * - * +----------------+---------------+ - * | type (32 bits) | doi (32 bits) | - * +----------------+---------------+ - * - * type: the CIPSO mapping table type (defined in the cipso_ipv4.h header - * as CIPSO_V4_MAP_*) - * doi: the CIPSO DOI value - * - * o MODULES: - * Sent by an application to request a list of configured NetLabel modules - * in the kernel. When sent by an application there is no payload. - * - * +-------------------+ - * | modules (32 bits) | ... - * +-------------------+ - * - * modules: the number of modules in the message, if this is an application - * generated message and the value is zero then return a list of - * the configured modules - * - * +------------------+ - * | module (32 bits) | ... repeated - * +------------------+ - * - * module: the module number as defined by NETLBL_NLTYPE_* - * - * o VERSION: - * Sent by an application to request the NetLabel version string. When sent - * by an application there is no payload. This message type is also used by - * the kernel to respond to an VERSION request. - * - * +-------------------+ - * | version (32 bits) | - * +-------------------+ - * - * version: the protocol version number - * - */ - -/* NetLabel Management commands */ -enum { - NLBL_MGMT_C_UNSPEC, - NLBL_MGMT_C_ACK, - NLBL_MGMT_C_ADD, - NLBL_MGMT_C_REMOVE, - NLBL_MGMT_C_LIST, - NLBL_MGMT_C_ADDDEF, - NLBL_MGMT_C_REMOVEDEF, - NLBL_MGMT_C_LISTDEF, - NLBL_MGMT_C_MODULES, - NLBL_MGMT_C_VERSION, - __NLBL_MGMT_C_MAX, -}; -#define NLBL_MGMT_C_MAX (__NLBL_MGMT_C_MAX - 1) - -/* NetLabel protocol functions */ -int netlbl_mgmt_genl_init(void); - -#endif diff --git a/trunk/net/netlabel/netlabel_unlabeled.c b/trunk/net/netlabel/netlabel_unlabeled.c deleted file mode 100644 index 785f4960e0d3..000000000000 --- a/trunk/net/netlabel/netlabel_unlabeled.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * NetLabel Unlabeled Support - * - * This file defines functions for dealing with unlabeled packets for the - * NetLabel system. The NetLabel system manages static and dynamic label - * mappings for network protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include "netlabel_user.h" -#include "netlabel_domainhash.h" -#include "netlabel_unlabeled.h" - -/* Accept unlabeled packets flag */ -static atomic_t netlabel_unlabel_accept_flg = ATOMIC_INIT(0); - -/* NetLabel Generic NETLINK CIPSOv4 family */ -static struct genl_family netlbl_unlabel_gnl_family = { - .id = GENL_ID_GENERATE, - .hdrsize = 0, - .name = NETLBL_NLTYPE_UNLABELED_NAME, - .version = NETLBL_PROTO_VERSION, - .maxattr = 0, -}; - - -/* - * NetLabel Command Handlers - */ - -/** - * netlbl_unlabel_accept - Handle an ACCEPT message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated ACCEPT message and set the accept flag accordingly. - * Returns zero on success, negative values on failure. - * - */ -static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val; - struct nlattr *data = netlbl_netlink_payload_data(skb); - u32 value; - - ret_val = netlbl_netlink_cap_check(skb, CAP_NET_ADMIN); - if (ret_val != 0) - return ret_val; - - if (netlbl_netlink_payload_len(skb) == NETLBL_LEN_U32) { - value = nla_get_u32(data); - if (value == 1 || value == 0) { - atomic_set(&netlabel_unlabel_accept_flg, value); - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - NETLBL_E_OK); - return 0; - } - } - - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - EINVAL); - return -EINVAL; -} - -/** - * netlbl_unlabel_list - Handle a LIST message - * @skb: the NETLINK buffer - * @info: the Generic NETLINK info block - * - * Description: - * Process a user generated LIST message and respond with the current status. - * Returns zero on success, negative values on failure. - * - */ -static int netlbl_unlabel_list(struct sk_buff *skb, struct genl_info *info) -{ - int ret_val = -ENOMEM; - struct sk_buff *ans_skb; - - ans_skb = netlbl_netlink_alloc_skb(0, - GENL_HDRLEN + NETLBL_LEN_U32, - GFP_KERNEL); - if (ans_skb == NULL) - goto list_failure; - - if (netlbl_netlink_hdr_put(ans_skb, - info->snd_pid, - 0, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_LIST) == NULL) - goto list_failure; - - ret_val = nla_put_u32(ans_skb, - NLA_U32, - atomic_read(&netlabel_unlabel_accept_flg)); - if (ret_val != 0) - goto list_failure; - - ret_val = netlbl_netlink_snd(ans_skb, info->snd_pid); - if (ret_val != 0) - goto list_failure; - - return 0; - -list_failure: - netlbl_netlink_send_ack(info, - netlbl_unlabel_gnl_family.id, - NLBL_UNLABEL_C_ACK, - -ret_val); - return ret_val; -} - - -/* - * NetLabel Generic NETLINK Command Definitions - */ - -static struct genl_ops netlbl_unlabel_genl_c_accept = { - .cmd = NLBL_UNLABEL_C_ACCEPT, - .flags = 0, - .doit = netlbl_unlabel_accept, - .dumpit = NULL, -}; - -static struct genl_ops netlbl_unlabel_genl_c_list = { - .cmd = NLBL_UNLABEL_C_LIST, - .flags = 0, - .doit = netlbl_unlabel_list, - .dumpit = NULL, -}; - - -/* - * NetLabel Generic NETLINK Protocol Functions - */ - -/** - * netlbl_unlabel_genl_init - Register the Unlabeled NetLabel component - * - * Description: - * Register the unlabeled packet NetLabel component with the Generic NETLINK - * mechanism. Returns zero on success, negative values on failure. - * - */ -int netlbl_unlabel_genl_init(void) -{ - int ret_val; - - ret_val = genl_register_family(&netlbl_unlabel_gnl_family); - if (ret_val != 0) - return ret_val; - - ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, - &netlbl_unlabel_genl_c_accept); - if (ret_val != 0) - return ret_val; - - ret_val = genl_register_ops(&netlbl_unlabel_gnl_family, - &netlbl_unlabel_genl_c_list); - if (ret_val != 0) - return ret_val; - - return 0; -} - -/* - * NetLabel KAPI Hooks - */ - -/** - * netlbl_unlabel_getattr - Get the security attributes for an unlabled packet - * @secattr: the security attributes - * - * Description: - * Determine the security attributes, if any, for an unlabled packet and return - * them in @secattr. Returns zero on success and negative values on failure. - * - */ -int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr) -{ - if (atomic_read(&netlabel_unlabel_accept_flg) == 1) { - memset(secattr, 0, sizeof(*secattr)); - return 0; - } - - return -ENOMSG; -} - -/** - * netlbl_unlabel_defconf - Set the default config to allow unlabeled packets - * - * Description: - * Set the default NetLabel configuration to allow incoming unlabeled packets - * and to send unlabeled network traffic by default. - * - */ -int netlbl_unlabel_defconf(void) -{ - int ret_val; - struct netlbl_dom_map *entry; - - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (entry == NULL) - return -ENOMEM; - entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry); - if (ret_val != 0) - return ret_val; - - atomic_set(&netlabel_unlabel_accept_flg, 1); - - return 0; -} diff --git a/trunk/net/netlabel/netlabel_unlabeled.h b/trunk/net/netlabel/netlabel_unlabeled.h deleted file mode 100644 index f300e54e14b6..000000000000 --- a/trunk/net/netlabel/netlabel_unlabeled.h +++ /dev/null @@ -1,98 +0,0 @@ -/* - * NetLabel Unlabeled Support - * - * This file defines functions for dealing with unlabeled packets for the - * NetLabel system. The NetLabel system manages static and dynamic label - * mappings for network protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_UNLABELED_H -#define _NETLABEL_UNLABELED_H - -#include - -/* - * The following NetLabel payloads are supported by the Unlabeled subsystem. - * - * o ACK: - * Sent by the kernel in response to an applications message, applications - * should never send this message. - * - * +----------------------+-----------------------+ - * | seq number (32 bits) | return code (32 bits) | - * +----------------------+-----------------------+ - * - * seq number: the sequence number of the original message, taken from the - * nlmsghdr structure - * return code: return value, based on errno values - * - * o ACCEPT - * This message is sent from an application to specify if the kernel should - * allow unlabled packets to pass if they do not match any of the static - * mappings defined in the unlabeled module. - * - * +-----------------+ - * | allow (32 bits) | - * +-----------------+ - * - * allow: if true (1) then allow the packets to pass, if false (0) then - * reject the packets - * - * o LIST - * This message can be sent either from an application or by the kernel in - * response to an application generated LIST message. When sent by an - * application there is no payload. The kernel should respond to a LIST - * message either with a LIST message on success or an ACK message on - * failure. - * - * +-----------------------+ - * | accept flag (32 bits) | - * +-----------------------+ - * - * accept flag: if true (1) then unlabeled packets are allowed to pass, - * if false (0) then unlabeled packets are rejected - * - */ - -/* NetLabel Unlabeled commands */ -enum { - NLBL_UNLABEL_C_UNSPEC, - NLBL_UNLABEL_C_ACK, - NLBL_UNLABEL_C_ACCEPT, - NLBL_UNLABEL_C_LIST, - __NLBL_UNLABEL_C_MAX, -}; -#define NLBL_UNLABEL_C_MAX (__NLBL_UNLABEL_C_MAX - 1) - -/* NetLabel protocol functions */ -int netlbl_unlabel_genl_init(void); - -/* Process Unlabeled incoming network packets */ -int netlbl_unlabel_getattr(struct netlbl_lsm_secattr *secattr); - -/* Set the default configuration to allow Unlabeled packets */ -int netlbl_unlabel_defconf(void); - -#endif diff --git a/trunk/net/netlabel/netlabel_user.c b/trunk/net/netlabel/netlabel_user.c deleted file mode 100644 index 73cbe66e42ff..000000000000 --- a/trunk/net/netlabel/netlabel_user.c +++ /dev/null @@ -1,158 +0,0 @@ -/* - * NetLabel NETLINK Interface - * - * This file defines the NETLINK interface for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "netlabel_mgmt.h" -#include "netlabel_unlabeled.h" -#include "netlabel_cipso_v4.h" -#include "netlabel_user.h" - -/* - * NetLabel NETLINK Setup Functions - */ - -/** - * netlbl_netlink_init - Initialize the NETLINK communication channel - * - * Description: - * Call out to the NetLabel components so they can register their families and - * commands with the Generic NETLINK mechanism. Returns zero on success and - * non-zero on failure. - * - */ -int netlbl_netlink_init(void) -{ - int ret_val; - - ret_val = netlbl_mgmt_genl_init(); - if (ret_val != 0) - return ret_val; - - ret_val = netlbl_cipsov4_genl_init(); - if (ret_val != 0) - return ret_val; - - ret_val = netlbl_unlabel_genl_init(); - if (ret_val != 0) - return ret_val; - - return 0; -} - -/* - * NetLabel Common Protocol Functions - */ - -/** - * netlbl_netlink_send_ack - Send an ACK message - * @info: the generic NETLINK information - * @genl_family: the generic NETLINK family ID value - * @ack_cmd: the generic NETLINK family ACK command value - * @ret_code: return code to use - * - * Description: - * This function sends an ACK message to the sender of the NETLINK message - * specified by @info. - * - */ -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code) -{ - size_t data_size; - struct sk_buff *skb; - - data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32; - skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); - if (skb == NULL) - return; - - if (netlbl_netlink_hdr_put(skb, - info->snd_pid, - 0, - genl_family, - ack_cmd) == NULL) - goto send_ack_failure; - - if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0) - goto send_ack_failure; - if (nla_put_u32(skb, NLA_U32, ret_code) != 0) - goto send_ack_failure; - - netlbl_netlink_snd(skb, info->snd_pid); - return; - -send_ack_failure: - kfree_skb(skb); -} - -/* - * NETLINK I/O Functions - */ - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: destination PID - * - * Description: - * Sends a unicast NetLabel message over the NETLINK socket. - * - */ -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) -{ - return genlmsg_unicast(skb, pid); -} - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: sending PID - * @group: multicast group id - * - * Description: - * Sends a multicast NetLabel message over the NETLINK socket to all members - * of @group except @pid. - * - */ -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) -{ - return genlmsg_multicast(skb, pid, group, GFP_KERNEL); -} diff --git a/trunk/net/netlabel/netlabel_user.h b/trunk/net/netlabel/netlabel_user.h deleted file mode 100644 index 385a6c7488c6..000000000000 --- a/trunk/net/netlabel/netlabel_user.h +++ /dev/null @@ -1,215 +0,0 @@ -/* - * NetLabel NETLINK Interface - * - * This file defines the NETLINK interface for the NetLabel system. The - * NetLabel system manages static and dynamic label mappings for network - * protocols such as CIPSO and RIPSO. - * - * Author: Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _NETLABEL_USER_H -#define _NETLABEL_USER_H - -#include -#include -#include -#include -#include -#include - -/* NetLabel NETLINK helper functions */ - -/** - * netlbl_netlink_cap_check - Check the NETLINK msg capabilities - * @skb: the NETLINK buffer - * @req_cap: the required capability - * - * Description: - * Check the NETLINK buffer's capabilities against the required capabilities. - * Returns zero on success, negative values on failure. - * - */ -static inline int netlbl_netlink_cap_check(const struct sk_buff *skb, - kernel_cap_t req_cap) -{ - if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap)) - return 0; - return -EPERM; -} - -/** - * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u8 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len) -{ - u8 val = nla_get_u8(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u16 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len) -{ - u16 val = nla_get_u16(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u32 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len) -{ - u32 val = nla_get_u32(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff - * @skb: the packet - * @pid: the PID of the receipient - * @seq: the sequence number - * @type: the generic NETLINK message family type - * @cmd: command - * - * Description: - * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr - * struct to the packet. Returns a pointer to the start of the payload buffer - * on success or NULL on failure. - * - */ -static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, - u32 pid, - u32 seq, - int type, - u8 cmd) -{ - return genlmsg_put(skb, - pid, - seq, - type, - 0, - 0, - cmd, - NETLBL_PROTO_VERSION); -} - -/** - * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff - * @skb: the packet - * @pid: the PID of the receipient - * @seq: the sequence number - * @type: the generic NETLINK message family type - * @cmd: command - * - * Description: - * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr - * struct to the packet. - * - */ -static inline void netlbl_netlink_hdr_push(struct sk_buff *skb, - u32 pid, - u32 seq, - int type, - u8 cmd) - -{ - struct nlmsghdr *nlh; - struct genlmsghdr *hdr; - - nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN)); - nlh->nlmsg_type = type; - nlh->nlmsg_len = skb->len; - nlh->nlmsg_flags = 0; - nlh->nlmsg_pid = pid; - nlh->nlmsg_seq = seq; - - hdr = nlmsg_data(nlh); - hdr->cmd = cmd; - hdr->version = NETLBL_PROTO_VERSION; - hdr->reserved = 0; -} - -/** - * netlbl_netlink_payload_len - Return the length of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns the length of the NetLabel payload. - * - */ -static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb) -{ - return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN; -} - -/** - * netlbl_netlink_payload_data - Returns a pointer to the start of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns a pointer to the start of the NetLabel payload. - * - */ -static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb) -{ - return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) + - GENL_HDRLEN; -} - -/* NetLabel common protocol functions */ - -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code); - -/* NetLabel NETLINK I/O functions */ - -int netlbl_netlink_init(void); -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid); -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group); - -#endif diff --git a/trunk/net/netlink/af_netlink.c b/trunk/net/netlink/af_netlink.c index d56e0d21f919..8b85036ba8e3 100644 --- a/trunk/net/netlink/af_netlink.c +++ b/trunk/net/netlink/af_netlink.c @@ -1147,7 +1147,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (len > sk->sk_sndbuf - 32) goto out; err = -ENOBUFS; - skb = nlmsg_new(len, GFP_KERNEL); + skb = alloc_skb(len, GFP_KERNEL); if (skb==NULL) goto out; @@ -1341,18 +1341,19 @@ static int netlink_dump(struct sock *sk) struct netlink_callback *cb; struct sk_buff *skb; struct nlmsghdr *nlh; - int len, err = -ENOBUFS; + int len; skb = sock_rmalloc(sk, NLMSG_GOODSIZE, 0, GFP_KERNEL); if (!skb) - goto errout; + return -ENOBUFS; spin_lock(&nlk->cb_lock); cb = nlk->cb; if (cb == NULL) { - err = -EINVAL; - goto errout_skb; + spin_unlock(&nlk->cb_lock); + kfree_skb(skb); + return -EINVAL; } len = cb->dump(skb, cb); @@ -1364,12 +1365,8 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - if (!nlh) - goto errout_skb; - - memcpy(nlmsg_data(nlh), &len, sizeof(len)); - + nlh = NLMSG_NEW_ANSWER(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); + memcpy(NLMSG_DATA(nlh), &len, sizeof(len)); skb_queue_tail(&sk->sk_receive_queue, skb); sk->sk_data_ready(sk, skb->len); @@ -1381,11 +1378,8 @@ static int netlink_dump(struct sock *sk) netlink_destroy_callback(cb); return 0; -errout_skb: - spin_unlock(&nlk->cb_lock); - kfree_skb(skb); -errout: - return err; +nlmsg_failure: + return -ENOBUFS; } int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, @@ -1437,11 +1431,11 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) int size; if (err == 0) - size = nlmsg_total_size(sizeof(*errmsg)); + size = NLMSG_SPACE(sizeof(struct nlmsgerr)); else - size = nlmsg_total_size(sizeof(*errmsg) + nlmsg_len(nlh)); + size = NLMSG_SPACE(4 + NLMSG_ALIGN(nlh->nlmsg_len)); - skb = nlmsg_new(size, GFP_KERNEL); + skb = alloc_skb(size, GFP_KERNEL); if (!skb) { struct sock *sk; @@ -1457,15 +1451,16 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, NLMSG_ERROR, sizeof(struct nlmsgerr), 0); - errmsg = nlmsg_data(rep); + errmsg = NLMSG_DATA(rep); errmsg->error = err; - memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); + memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(struct nlmsghdr)); netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); } static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)) { + unsigned int total_len; struct nlmsghdr *nlh; int err; @@ -1475,6 +1470,8 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, if (nlh->nlmsg_len < NLMSG_HDRLEN || skb->len < nlh->nlmsg_len) return 0; + total_len = min(NLMSG_ALIGN(nlh->nlmsg_len), skb->len); + if (cb(skb, nlh, &err) < 0) { /* Not an error, but we have to interrupt processing * here. Note: that in this case we do not pull @@ -1486,7 +1483,7 @@ static int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, } else if (nlh->nlmsg_flags & NLM_F_ACK) netlink_ack(skb, nlh, 0); - netlink_queue_skip(nlh, skb); + skb_pull(skb, total_len); } return 0; @@ -1549,38 +1546,6 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) skb_pull(skb, msglen); } -/** - * nlmsg_notify - send a notification netlink message - * @sk: netlink socket to use - * @skb: notification message - * @pid: destination netlink pid for reports or 0 - * @group: destination multicast group or 0 - * @report: 1 to report back, 0 to disable - * @flags: allocation flags - */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, - unsigned int group, int report, gfp_t flags) -{ - int err = 0; - - if (group) { - int exclude_pid = 0; - - if (report) { - atomic_inc(&skb->users); - exclude_pid = pid; - } - - /* errors reported via destination sk->sk_err */ - nlmsg_multicast(sk, skb, exclude_pid, group, flags); - } - - if (report) - err = nlmsg_unicast(sk, skb, pid); - - return err; -} - #ifdef CONFIG_PROC_FS struct nl_seq_iter { int link; @@ -1762,6 +1727,8 @@ static struct net_proto_family netlink_family_ops = { .owner = THIS_MODULE, /* for consistency 8) */ }; +extern void netlink_skb_parms_too_large(void); + static int __init netlink_proto_init(void) { struct sk_buff *dummy_skb; @@ -1773,7 +1740,8 @@ static int __init netlink_proto_init(void) if (err != 0) goto out; - BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)); + if (sizeof(struct netlink_skb_parms) > sizeof(dummy_skb->cb)) + netlink_skb_parms_too_large(); nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL); if (!nl_table) @@ -1831,4 +1799,4 @@ EXPORT_SYMBOL(netlink_set_err); EXPORT_SYMBOL(netlink_set_nonroot); EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); -EXPORT_SYMBOL(nlmsg_notify); + diff --git a/trunk/net/netlink/attr.c b/trunk/net/netlink/attr.c index 004139557e09..dddbd15135a8 100644 --- a/trunk/net/netlink/attr.c +++ b/trunk/net/netlink/attr.c @@ -20,6 +20,7 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), + [NLA_STRING] = 1, [NLA_NESTED] = NLA_HDRLEN, }; @@ -27,7 +28,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, struct nla_policy *policy) { struct nla_policy *pt; - int minlen = 0, attrlen = nla_len(nla); + int minlen = 0; if (nla->nla_type <= 0 || nla->nla_type > maxtype) return 0; @@ -36,46 +37,16 @@ static int validate_nla(struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - switch (pt->type) { - case NLA_FLAG: - if (attrlen > 0) - return -ERANGE; - break; + if (pt->minlen) + minlen = pt->minlen; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; - case NLA_NUL_STRING: - if (pt->len) - minlen = min_t(int, attrlen, pt->len + 1); - else - minlen = attrlen; + if (pt->type == NLA_FLAG && nla_len(nla) > 0) + return -ERANGE; - if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) - return -EINVAL; - /* fall through */ - - case NLA_STRING: - if (attrlen < 1) - return -ERANGE; - - if (pt->len) { - char *buf = nla_data(nla); - - if (buf[attrlen - 1] == '\0') - attrlen--; - - if (attrlen > pt->len) - return -ERANGE; - } - break; - - default: - if (pt->len) - minlen = pt->len; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; - - if (attrlen < minlen) - return -ERANGE; - } + if (nla_len(nla) < minlen) + return -ERANGE; return 0; } @@ -283,26 +254,6 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return nla; } -/** - * __nla_reserve_nohdr - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @attrlen: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the payload. - */ -void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - void *start; - - start = skb_put(skb, NLA_ALIGN(attrlen)); - memset(start, 0, NLA_ALIGN(attrlen)); - - return start; -} - /** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on @@ -323,24 +274,6 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return __nla_reserve(skb, attrtype, attrlen); } -/** - * nla_reserve - reserve room for attribute without header - * @skb: socket buffer to reserve room on - * @len: length of attribute payload - * - * Reserves room for attribute payload without a header. - * - * Returns NULL if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return NULL; - - return __nla_reserve_nohdr(skb, attrlen); -} - /** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -360,22 +293,6 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, memcpy(nla_data(nla), data, attrlen); } -/** - * __nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * The caller is responsible to ensure that the skb provides enough - * tailroom for the attribute payload. - */ -void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - void *start; - - start = __nla_reserve_nohdr(skb, attrlen); - memcpy(start, data, attrlen); -} /** * nla_put - Add a netlink attribute to a socket buffer @@ -396,36 +313,15 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) return 0; } -/** - * nla_put_nohdr - Add a netlink attribute without header - * @skb: socket buffer to add attribute to - * @attrlen: length of attribute payload - * @data: head of attribute payload - * - * Returns -1 if the tailroom of the skb is insufficient to store - * the attribute payload. - */ -int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) -{ - if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) - return -1; - - __nla_put_nohdr(skb, attrlen, data); - return 0; -} EXPORT_SYMBOL(nla_validate); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); EXPORT_SYMBOL(__nla_reserve); -EXPORT_SYMBOL(__nla_reserve_nohdr); EXPORT_SYMBOL(nla_reserve); -EXPORT_SYMBOL(nla_reserve_nohdr); EXPORT_SYMBOL(__nla_put); -EXPORT_SYMBOL(__nla_put_nohdr); EXPORT_SYMBOL(nla_put); -EXPORT_SYMBOL(nla_put_nohdr); EXPORT_SYMBOL(nla_memcpy); EXPORT_SYMBOL(nla_memcmp); EXPORT_SYMBOL(nla_strcmp); diff --git a/trunk/net/netlink/genetlink.c b/trunk/net/netlink/genetlink.c index 49bc2db7982b..a298f77cc3e3 100644 --- a/trunk/net/netlink/genetlink.c +++ b/trunk/net/netlink/genetlink.c @@ -387,10 +387,7 @@ static void genl_rcv(struct sock *sk, int len) static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { - struct nlattr *nla_ops; - struct genl_ops *ops; void *hdr; - int idx = 1; hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, family->version); @@ -399,37 +396,6 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); - NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); - NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); - NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); - - nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); - if (nla_ops == NULL) - goto nla_put_failure; - - list_for_each_entry(ops, &family->ops_list, ops_list) { - struct nlattr *nest; - - nest = nla_nest_start(skb, idx++); - if (nest == NULL) - goto nla_put_failure; - - NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); - NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); - - if (ops->policy) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); - - if (ops->doit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); - - if (ops->dumpit) - NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); - - nla_nest_end(skb, nest); - } - - nla_nest_end(skb, nla_ops); return genlmsg_end(skb, hdr); @@ -445,9 +411,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; - if (chains_to_skip != 0) - genl_lock(); - for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -465,9 +428,6 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: - if (chains_to_skip != 0) - genl_unlock(); - cb->args[0] = i; cb->args[1] = n; @@ -480,7 +440,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + skb = nlmsg_new(NLMSG_GOODSIZE); if (skb == NULL) return ERR_PTR(-ENOBUFS); @@ -495,8 +455,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, static struct nla_policy ctrl_policy[CTRL_ATTR_MAX+1] __read_mostly = { [CTRL_ATTR_FAMILY_ID] = { .type = NLA_U16 }, - [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_NUL_STRING, - .len = GENL_NAMSIZ - 1 }, + [CTRL_ATTR_FAMILY_NAME] = { .type = NLA_STRING }, }; static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) @@ -511,9 +470,12 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) } if (info->attrs[CTRL_ATTR_FAMILY_NAME]) { - char *name; + char name[GENL_NAMSIZ]; + + if (nla_strlcpy(name, info->attrs[CTRL_ATTR_FAMILY_NAME], + GENL_NAMSIZ) >= GENL_NAMSIZ) + goto errout; - name = nla_data(info->attrs[CTRL_ATTR_FAMILY_NAME]); res = genl_family_find_byname(name); } @@ -548,7 +510,7 @@ static int genl_ctrl_event(int event, void *data) if (IS_ERR(msg)) return PTR_ERR(msg); - genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); + genlmsg_multicast(msg, 0, GENL_ID_CTRL); break; } diff --git a/trunk/net/packet/af_packet.c b/trunk/net/packet/af_packet.c index f4ccb90e6739..4172a5235916 100644 --- a/trunk/net/packet/af_packet.c +++ b/trunk/net/packet/af_packet.c @@ -427,24 +427,21 @@ static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock, } #endif -static inline int run_filter(struct sk_buff *skb, struct sock *sk, - unsigned *snaplen) +static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) { struct sk_filter *filter; - int err = 0; - rcu_read_lock_bh(); - filter = rcu_dereference(sk->sk_filter); - if (filter != NULL) { - err = sk_run_filter(skb, filter->insns, filter->len); - if (!err) - err = -EPERM; - else if (*snaplen > err) - *snaplen = err; - } - rcu_read_unlock_bh(); + bh_lock_sock(sk); + filter = sk->sk_filter; + /* + * Our caller already checked that filter != NULL but we need to + * verify that under bh_lock_sock() to be safe + */ + if (likely(filter != NULL)) + res = sk_run_filter(skb, filter->insns, filter->len); + bh_unlock_sock(sk); - return err; + return res; } /* @@ -494,8 +491,13 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) - goto drop_n_restore; + if (sk->sk_filter) { + unsigned res = run_filter(skb, sk, snaplen); + if (res == 0) + goto drop_n_restore; + if (snaplen > res) + snaplen = res; + } if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -584,15 +586,20 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe else if (skb->pkt_type == PACKET_OUTGOING) { /* Special case: outgoing packets have ll header at head */ skb_pull(skb, skb->nh.raw - skb->data); - if (skb->ip_summed == CHECKSUM_PARTIAL) + if (skb->ip_summed == CHECKSUM_HW) status |= TP_STATUS_CSUMNOTREADY; } } snaplen = skb->len; - if (run_filter(skb, sk, &snaplen) < 0) - goto drop_n_restore; + if (sk->sk_filter) { + unsigned res = run_filter(skb, sk, snaplen); + if (res == 0) + goto drop_n_restore; + if (snaplen > res) + snaplen = res; + } if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; diff --git a/trunk/net/sched/act_api.c b/trunk/net/sched/act_api.c index 835070e9169c..a2587b52e531 100644 --- a/trunk/net/sched/act_api.c +++ b/trunk/net/sched/act_api.c @@ -33,230 +33,16 @@ #include #include -void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) -{ - unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - struct tcf_common **p1p; - - for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == p) { - write_lock_bh(hinfo->lock); - *p1p = p->tcfc_next; - write_unlock_bh(hinfo->lock); -#ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->tcfc_bstats, - &p->tcfc_rate_est); +#if 0 /* control */ +#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) +#else +#define DPRINTK(format, args...) #endif - kfree(p); - return; - } - } - BUG_TRAP(0); -} -EXPORT_SYMBOL(tcf_hash_destroy); - -int tcf_hash_release(struct tcf_common *p, int bind, - struct tcf_hashinfo *hinfo) -{ - int ret = 0; - - if (p) { - if (bind) - p->tcfc_bindcnt--; - - p->tcfc_refcnt--; - if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { - tcf_hash_destroy(p, hinfo); - ret = 1; - } - } - return ret; -} -EXPORT_SYMBOL(tcf_hash_release); - -static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, - struct tc_action *a, struct tcf_hashinfo *hinfo) -{ - struct tcf_common *p; - int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; - struct rtattr *r ; - - read_lock(hinfo->lock); - - s_i = cb->args[0]; - - for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - for (; p; p = p->tcfc_next) { - index++; - if (index < s_i) - continue; - a->priv = p; - a->order = n_i; - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - err = tcf_action_dump_1(skb, a, 0, 0); - if (err < 0) { - index--; - skb_trim(skb, (u8*)r - skb->data); - goto done; - } - r->rta_len = skb->tail - (u8*)r; - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) - goto done; - } - } -done: - read_unlock(hinfo->lock); - if (n_i) - cb->args[0] += n_i; - return n_i; - -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - goto done; -} - -static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, - struct tcf_hashinfo *hinfo) -{ - struct tcf_common *p, *s_p; - struct rtattr *r ; - int i= 0, n_i = 0; - - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); - for (i = 0; i < (hinfo->hmask + 1); i++) { - p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - while (p != NULL) { - s_p = p->tcfc_next; - if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) - module_put(a->ops->owner); - n_i++; - p = s_p; - } - } - RTA_PUT(skb, TCA_FCNT, 4, &n_i); - r->rta_len = skb->tail - (u8*)r; - - return n_i; -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - return -EINVAL; -} - -int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, - int type, struct tc_action *a) -{ - struct tcf_hashinfo *hinfo = a->ops->hinfo; - - if (type == RTM_DELACTION) { - return tcf_del_walker(skb, a, hinfo); - } else if (type == RTM_GETACTION) { - return tcf_dump_walker(skb, cb, a, hinfo); - } else { - printk("tcf_generic_walker: unknown action %d\n", type); - return -EINVAL; - } -} -EXPORT_SYMBOL(tcf_generic_walker); - -struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) -{ - struct tcf_common *p; - - read_lock(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { - if (p->tcfc_index == index) - break; - } - read_unlock(hinfo->lock); - - return p; -} -EXPORT_SYMBOL(tcf_hash_lookup); - -u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) -{ - u32 val = *idx_gen; - - do { - if (++val == 0) - val = 1; - } while (tcf_hash_lookup(val, hinfo)); - - return (*idx_gen = val); -} -EXPORT_SYMBOL(tcf_hash_new_index); - -int tcf_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_hashinfo *hinfo = a->ops->hinfo; - struct tcf_common *p = tcf_hash_lookup(index, hinfo); - - if (p) { - a->priv = p; - return 1; - } - return 0; -} -EXPORT_SYMBOL(tcf_hash_search); - -struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, - struct tcf_hashinfo *hinfo) -{ - struct tcf_common *p = NULL; - if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { - if (bind) { - p->tcfc_bindcnt++; - p->tcfc_refcnt++; - } - a->priv = p; - } - return p; -} -EXPORT_SYMBOL(tcf_hash_check); - -struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) -{ - struct tcf_common *p = kzalloc(size, GFP_KERNEL); - - if (unlikely(!p)) - return p; - p->tcfc_refcnt = 1; - if (bind) - p->tcfc_bindcnt = 1; - - spin_lock_init(&p->tcfc_lock); - p->tcfc_stats_lock = &p->tcfc_lock; - p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); - p->tcfc_tm.install = jiffies; - p->tcfc_tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR - if (est) - gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, - p->tcfc_stats_lock, est); +#if 0 /* data */ +#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) +#else +#define D2PRINTK(format, args...) #endif - a->priv = (void *) p; - return p; -} -EXPORT_SYMBOL(tcf_hash_create); - -void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) -{ - unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); - - write_lock_bh(hinfo->lock); - p->tcfc_next = hinfo->htab[h]; - hinfo->htab[h] = p; - write_unlock_bh(hinfo->lock); -} -EXPORT_SYMBOL(tcf_hash_insert); static struct tc_action_ops *act_base = NULL; static DEFINE_RWLOCK(act_mod_lock); @@ -369,6 +155,9 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); + D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", + skb, skb->input_dev ? skb->input_dev->name : "xxx", + skb->dev->name); ret = TC_ACT_OK; goto exec_done; } @@ -398,6 +187,8 @@ void tcf_action_destroy(struct tc_action *act, int bind) for (a = act; a; a = act) { if (a->ops && a->ops->cleanup) { + DPRINTK("tcf_action_destroy destroying %p next %p\n", + a, a->next); if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -540,6 +331,7 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, if (*err != ACT_P_CREATED) module_put(a_o->owner); a->ops = a_o; + DPRINTK("tcf_action_init_1: successfull %s\n", act_name); *err = 0; return a; @@ -600,12 +392,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); + TCA_STATS, TCA_XSTATS, h->stats_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->tcf_stats_lock, &d); + h->stats_lock, &d); if (err < 0) goto errout; @@ -614,11 +406,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (a->ops->get_stats(skb, a) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || + if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) + gnet_stats_copy_queue(&d, &h->qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) @@ -667,6 +459,7 @@ static int act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -675,8 +468,10 @@ act_get_notify(u32 pid, struct nlmsghdr *n, struct tc_action *a, int event) kfree_skb(skb); return -EINVAL; } - - return rtnl_unicast(skb, pid); + err = netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT); + if (err > 0) + err = 0; + return err; } static struct tc_action * diff --git a/trunk/net/sched/act_gact.c b/trunk/net/sched/act_gact.c index 6cff56696a81..e75a147ad60f 100644 --- a/trunk/net/sched/act_gact.c +++ b/trunk/net/sched/act_gact.c @@ -34,43 +34,48 @@ #include #include -#define GACT_TAB_MASK 15 -static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; -static u32 gact_idx_gen; +/* use generic hash table */ +#define MY_TAB_SIZE 16 +#define MY_TAB_MASK 15 + +static u32 idx_gen; +static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE]; static DEFINE_RWLOCK(gact_lock); -static struct tcf_hashinfo gact_hash_info = { - .htab = tcf_gact_ht, - .hmask = GACT_TAB_MASK, - .lock = &gact_lock, -}; +/* ovewrride the defaults */ +#define tcf_st tcf_gact +#define tc_st tc_gact +#define tcf_t_lock gact_lock +#define tcf_ht tcf_gact_ht + +#define CONFIG_NET_ACT_INIT 1 +#include #ifdef CONFIG_GACT_PROB -static int gact_net_rand(struct tcf_gact *gact) +static int gact_net_rand(struct tcf_gact *p) { - if (net_random() % gact->tcfg_pval) - return gact->tcf_action; - return gact->tcfg_paction; + if (net_random()%p->pval) + return p->action; + return p->paction; } -static int gact_determ(struct tcf_gact *gact) +static int gact_determ(struct tcf_gact *p) { - if (gact->tcf_bstats.packets % gact->tcfg_pval) - return gact->tcf_action; - return gact->tcfg_paction; + if (p->bstats.packets%p->pval) + return p->action; + return p->paction; } -typedef int (*g_rand)(struct tcf_gact *gact); +typedef int (*g_rand)(struct tcf_gact *p); static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; -#endif /* CONFIG_GACT_PROB */ +#endif static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm; - struct tcf_gact *gact; - struct tcf_common *pc; + struct tcf_gact *p; int ret = 0; if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) @@ -89,106 +94,105 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, return -EOPNOTSUPP; #endif - pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), - bind, &gact_idx_gen, &gact_hash_info); - if (unlikely(!pc)) + p = tcf_hash_check(parm->index, a, ovr, bind); + if (p == NULL) { + p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); + if (p == NULL) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_hash_release(pc, bind, &gact_hash_info); + tcf_hash_release(p, bind); return -EEXIST; } } - gact = to_gact(pc); - - spin_lock_bh(&gact->tcf_lock); - gact->tcf_action = parm->action; + spin_lock_bh(&p->lock); + p->action = parm->action; #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB-1] != NULL) { struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); - gact->tcfg_paction = p_parm->paction; - gact->tcfg_pval = p_parm->pval; - gact->tcfg_ptype = p_parm->ptype; + p->paction = p_parm->paction; + p->pval = p_parm->pval; + p->ptype = p_parm->ptype; } #endif - spin_unlock_bh(&gact->tcf_lock); + spin_unlock_bh(&p->lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &gact_hash_info); + tcf_hash_insert(p); return ret; } -static int tcf_gact_cleanup(struct tc_action *a, int bind) +static int +tcf_gact_cleanup(struct tc_action *a, int bind) { - struct tcf_gact *gact = a->priv; + struct tcf_gact *p = PRIV(a, gact); - if (gact) - return tcf_hash_release(&gact->common, bind, &gact_hash_info); + if (p != NULL) + return tcf_hash_release(p, bind); return 0; } -static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int +tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_gact *gact = a->priv; + struct tcf_gact *p = PRIV(a, gact); int action = TC_ACT_SHOT; - spin_lock(&gact->tcf_lock); + spin_lock(&p->lock); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) - action = gact_rand[gact->tcfg_ptype](gact); + if (p->ptype && gact_rand[p->ptype] != NULL) + action = gact_rand[p->ptype](p); else - action = gact->tcf_action; + action = p->action; #else - action = gact->tcf_action; + action = p->action; #endif - gact->tcf_bstats.bytes += skb->len; - gact->tcf_bstats.packets++; + p->bstats.bytes += skb->len; + p->bstats.packets++; if (action == TC_ACT_SHOT) - gact->tcf_qstats.drops++; - gact->tcf_tm.lastuse = jiffies; - spin_unlock(&gact->tcf_lock); + p->qstats.drops++; + p->tm.lastuse = jiffies; + spin_unlock(&p->lock); return action; } -static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int +tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; - struct tcf_gact *gact = a->priv; + struct tcf_gact *p = PRIV(a, gact); struct tcf_t t; - opt.index = gact->tcf_index; - opt.refcnt = gact->tcf_refcnt - ref; - opt.bindcnt = gact->tcf_bindcnt - bind; - opt.action = gact->tcf_action; + opt.index = p->index; + opt.refcnt = p->refcnt - ref; + opt.bindcnt = p->bindcnt - bind; + opt.action = p->action; RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB - if (gact->tcfg_ptype) { + if (p->ptype) { struct tc_gact_p p_opt; - p_opt.paction = gact->tcfg_paction; - p_opt.pval = gact->tcfg_pval; - p_opt.ptype = gact->tcfg_ptype; + p_opt.paction = p->paction; + p_opt.pval = p->pval; + p_opt.ptype = p->ptype; RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif - t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); + t.expires = jiffies_to_clock_t(p->tm.expires); RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); return skb->len; -rtattr_failure: + rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_gact_ops = { .kind = "gact", - .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -204,7 +208,8 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Classifier actions"); MODULE_LICENSE("GPL"); -static int __init gact_init_module(void) +static int __init +gact_init_module(void) { #ifdef CONFIG_GACT_PROB printk("GACT probability on\n"); @@ -214,7 +219,8 @@ static int __init gact_init_module(void) return tcf_register_action(&act_gact_ops); } -static void __exit gact_cleanup_module(void) +static void __exit +gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); } diff --git a/trunk/net/sched/act_ipt.c b/trunk/net/sched/act_ipt.c index d8c9310da6e5..d799e01248c4 100644 --- a/trunk/net/sched/act_ipt.c +++ b/trunk/net/sched/act_ipt.c @@ -38,19 +38,25 @@ #include +/* use generic hash table */ +#define MY_TAB_SIZE 16 +#define MY_TAB_MASK 15 -#define IPT_TAB_MASK 15 -static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; -static u32 ipt_idx_gen; +static u32 idx_gen; +static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; +/* ipt hash table lock */ static DEFINE_RWLOCK(ipt_lock); -static struct tcf_hashinfo ipt_hash_info = { - .htab = tcf_ipt_ht, - .hmask = IPT_TAB_MASK, - .lock = &ipt_lock, -}; +/* ovewrride the defaults */ +#define tcf_st tcf_ipt +#define tcf_t_lock ipt_lock +#define tcf_ht tcf_ipt_ht + +#define CONFIG_NET_ACT_INIT +#include -static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) +static int +ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { struct ipt_target *target; int ret = 0; @@ -59,6 +65,7 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int if (!target) return -ENOENT; + DPRINTK("ipt_init_target: found %s\n", target->name); t->u.kernel.target = target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), @@ -69,7 +76,10 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int if (t->u.kernel.target->checkentry && !t->u.kernel.target->checkentry(table, NULL, t->u.kernel.target, t->data, + t->u.target_size - sizeof(*t), hook)) { + DPRINTK("ipt_init_target: check failed for `%s'.\n", + t->u.kernel.target->name); module_put(t->u.kernel.target->me); ret = -EINVAL; } @@ -77,37 +87,40 @@ static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int return ret; } -static void ipt_destroy_target(struct ipt_entry_target *t) +static void +ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->u.kernel.target, t->data); + t->u.kernel.target->destroy(t->u.kernel.target, t->data, + t->u.target_size - sizeof(*t)); module_put(t->u.kernel.target->me); } -static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) +static int +tcf_ipt_release(struct tcf_ipt *p, int bind) { int ret = 0; - if (ipt) { + if (p) { if (bind) - ipt->tcf_bindcnt--; - ipt->tcf_refcnt--; - if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { - ipt_destroy_target(ipt->tcfi_t); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); - tcf_hash_destroy(&ipt->common, &ipt_hash_info); + p->bindcnt--; + p->refcnt--; + if (p->bindcnt <= 0 && p->refcnt <= 0) { + ipt_destroy_target(p->t); + kfree(p->tname); + kfree(p->t); + tcf_hash_destroy(p); ret = ACT_P_DELETED; } } return ret; } -static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) +static int +tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, + int ovr, int bind) { struct rtattr *tb[TCA_IPT_MAX]; - struct tcf_ipt *ipt; - struct tcf_common *pc; + struct tcf_ipt *p; struct ipt_entry_target *td, *t; char *tname; int ret = 0, err; @@ -131,51 +144,49 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); - pc = tcf_hash_check(index, a, bind, &ipt_hash_info); - if (!pc) { - pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, - &ipt_idx_gen, &ipt_hash_info); - if (unlikely(!pc)) + p = tcf_hash_check(index, a, ovr, bind); + if (p == NULL) { + p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); + if (p == NULL) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_ipt_release(to_ipt(pc), bind); + tcf_ipt_release(p, bind); return -EEXIST; } } - ipt = to_ipt(pc); hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); err = -ENOMEM; tname = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (unlikely(!tname)) + if (tname == NULL) goto err1; if (tb[TCA_IPT_TABLE - 1] == NULL || rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); t = kmalloc(td->u.target_size, GFP_KERNEL); - if (unlikely(!t)) + if (t == NULL) goto err2; memcpy(t, td, td->u.target_size); if ((err = ipt_init_target(t, tname, hook)) < 0) goto err3; - spin_lock_bh(&ipt->tcf_lock); + spin_lock_bh(&p->lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(ipt->tcfi_t); - kfree(ipt->tcfi_tname); - kfree(ipt->tcfi_t); + ipt_destroy_target(p->t); + kfree(p->tname); + kfree(p->t); } - ipt->tcfi_tname = tname; - ipt->tcfi_t = t; - ipt->tcfi_hook = hook; - spin_unlock_bh(&ipt->tcf_lock); + p->tname = tname; + p->t = t; + p->hook = hook; + spin_unlock_bh(&p->lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &ipt_hash_info); + tcf_hash_insert(p); return ret; err3: @@ -183,32 +194,33 @@ static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, err2: kfree(tname); err1: - kfree(pc); + kfree(p); return err; } -static int tcf_ipt_cleanup(struct tc_action *a, int bind) +static int +tcf_ipt_cleanup(struct tc_action *a, int bind) { - struct tcf_ipt *ipt = a->priv; - return tcf_ipt_release(ipt, bind); + struct tcf_ipt *p = PRIV(a, ipt); + return tcf_ipt_release(p, bind); } -static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, - struct tcf_result *res) +static int +tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { int ret = 0, result = 0; - struct tcf_ipt *ipt = a->priv; + struct tcf_ipt *p = PRIV(a, ipt); if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return TC_ACT_UNSPEC; } - spin_lock(&ipt->tcf_lock); + spin_lock(&p->lock); - ipt->tcf_tm.lastuse = jiffies; - ipt->tcf_bstats.bytes += skb->len; - ipt->tcf_bstats.packets++; + p->tm.lastuse = jiffies; + p->bstats.bytes += skb->len; + p->bstats.packets++; /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed @@ -217,17 +229,16 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, /* iptables targets take a double skb pointer in case the skb * needs to be replaced. We don't own the skb, so this must not * happen. The pskb_expand_head above should make sure of this */ - ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, - ipt->tcfi_hook, - ipt->tcfi_t->u.kernel.target, - ipt->tcfi_t->data); + ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, + p->t->u.kernel.target, p->t->data, + NULL); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; - ipt->tcf_qstats.drops++; + p->qstats.drops++; break; case IPT_CONTINUE: result = TC_ACT_PIPE; @@ -238,46 +249,53 @@ static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, result = TC_POLICE_OK; break; } - spin_unlock(&ipt->tcf_lock); + spin_unlock(&p->lock); return result; } -static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int +tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { - unsigned char *b = skb->tail; - struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; struct tc_cnt c; + unsigned char *b = skb->tail; + struct tcf_ipt *p = PRIV(a, ipt); /* for simple targets kernel size == user size ** user name = target name ** for foolproof you need to not assume this */ - t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); - if (unlikely(!t)) + t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); + if (t == NULL) goto rtattr_failure; - c.bindcnt = ipt->tcf_bindcnt - bind; - c.refcnt = ipt->tcf_refcnt - ref; - memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); - strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); - - RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); - RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); - RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); + c.bindcnt = p->bindcnt - bind; + c.refcnt = p->refcnt - ref; + memcpy(t, p->t, p->t->u.user.target_size); + strcpy(t->u.user.name, p->t->u.kernel.target->name); + + DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, + strlen(p->tname)); + DPRINTK("\tdump target name %s size %d size user %d " + "data[0] %x data[1] %x\n", p->t->u.kernel.target->name, + p->t->u.target_size, p->t->u.user.target_size, + p->t->data[0], p->t->data[1]); + RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t); + RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index); + RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook); RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); - RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); - tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); - tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); + RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); + tm.install = jiffies_to_clock_t(jiffies - p->tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); + tm.expires = jiffies_to_clock_t(p->tm.expires); RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); kfree(t); return skb->len; -rtattr_failure: + rtattr_failure: skb_trim(skb, b - skb->data); kfree(t); return -1; @@ -285,7 +303,6 @@ static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int static struct tc_action_ops act_ipt_ops = { .kind = "ipt", - .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -301,12 +318,14 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Iptables target actions"); MODULE_LICENSE("GPL"); -static int __init ipt_init_module(void) +static int __init +ipt_init_module(void) { return tcf_register_action(&act_ipt_ops); } -static void __exit ipt_cleanup_module(void) +static void __exit +ipt_cleanup_module(void) { tcf_unregister_action(&act_ipt_ops); } diff --git a/trunk/net/sched/act_mirred.c b/trunk/net/sched/act_mirred.c index 483897271f15..fc562047ecc5 100644 --- a/trunk/net/sched/act_mirred.c +++ b/trunk/net/sched/act_mirred.c @@ -39,39 +39,46 @@ #include #include -#define MIRRED_TAB_MASK 7 -static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; -static u32 mirred_idx_gen; + +/* use generic hash table */ +#define MY_TAB_SIZE 8 +#define MY_TAB_MASK (MY_TAB_SIZE - 1) +static u32 idx_gen; +static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE]; static DEFINE_RWLOCK(mirred_lock); -static struct tcf_hashinfo mirred_hash_info = { - .htab = tcf_mirred_ht, - .hmask = MIRRED_TAB_MASK, - .lock = &mirred_lock, -}; +/* ovewrride the defaults */ +#define tcf_st tcf_mirred +#define tc_st tc_mirred +#define tcf_t_lock mirred_lock +#define tcf_ht tcf_mirred_ht + +#define CONFIG_NET_ACT_INIT 1 +#include -static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) +static inline int +tcf_mirred_release(struct tcf_mirred *p, int bind) { - if (m) { + if (p) { if (bind) - m->tcf_bindcnt--; - m->tcf_refcnt--; - if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { - dev_put(m->tcfm_dev); - tcf_hash_destroy(&m->common, &mirred_hash_info); + p->bindcnt--; + p->refcnt--; + if(!p->bindcnt && p->refcnt <= 0) { + dev_put(p->dev); + tcf_hash_destroy(p); return 1; } } return 0; } -static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) +static int +tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, + int ovr, int bind) { struct rtattr *tb[TCA_MIRRED_MAX]; struct tc_mirred *parm; - struct tcf_mirred *m; - struct tcf_common *pc; + struct tcf_mirred *p; struct net_device *dev = NULL; int ret = 0; int ok_push = 0; @@ -103,62 +110,64 @@ static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, } } - pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); - if (!pc) { + p = tcf_hash_check(parm->index, a, ovr, bind); + if (p == NULL) { if (!parm->ifindex) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, - &mirred_idx_gen, &mirred_hash_info); - if (unlikely(!pc)) + p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); + if (p == NULL) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_mirred_release(to_mirred(pc), bind); + tcf_mirred_release(p, bind); return -EEXIST; } } - m = to_mirred(pc); - spin_lock_bh(&m->tcf_lock); - m->tcf_action = parm->action; - m->tcfm_eaction = parm->eaction; + spin_lock_bh(&p->lock); + p->action = parm->action; + p->eaction = parm->eaction; if (parm->ifindex) { - m->tcfm_ifindex = parm->ifindex; + p->ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(m->tcfm_dev); - m->tcfm_dev = dev; + dev_put(p->dev); + p->dev = dev; dev_hold(dev); - m->tcfm_ok_push = ok_push; + p->ok_push = ok_push; } - spin_unlock_bh(&m->tcf_lock); + spin_unlock_bh(&p->lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &mirred_hash_info); + tcf_hash_insert(p); + DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s " + "ifindex %d\n", parm->index, parm->action, parm->eaction, + dev->name, parm->ifindex); return ret; } -static int tcf_mirred_cleanup(struct tc_action *a, int bind) +static int +tcf_mirred_cleanup(struct tc_action *a, int bind) { - struct tcf_mirred *m = a->priv; + struct tcf_mirred *p = PRIV(a, mirred); - if (m) - return tcf_mirred_release(m, bind); + if (p != NULL) + return tcf_mirred_release(p, bind); return 0; } -static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, - struct tcf_result *res) +static int +tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_mirred *m = a->priv; + struct tcf_mirred *p = PRIV(a, mirred); struct net_device *dev; struct sk_buff *skb2 = NULL; u32 at = G_TC_AT(skb->tc_verd); - spin_lock(&m->tcf_lock); + spin_lock(&p->lock); - dev = m->tcfm_dev; - m->tcf_tm.lastuse = jiffies; + dev = p->dev; + p->tm.lastuse = jiffies; if (!(dev->flags&IFF_UP) ) { if (net_ratelimit()) @@ -167,10 +176,10 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, bad_mirred: if (skb2 != NULL) kfree_skb(skb2); - m->tcf_qstats.overlimits++; - m->tcf_bstats.bytes += skb->len; - m->tcf_bstats.packets++; - spin_unlock(&m->tcf_lock); + p->qstats.overlimits++; + p->bstats.bytes += skb->len; + p->bstats.packets++; + spin_unlock(&p->lock); /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -180,59 +189,59 @@ static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto bad_mirred; - if (m->tcfm_eaction != TCA_EGRESS_MIRROR && - m->tcfm_eaction != TCA_EGRESS_REDIR) { + if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { if (net_ratelimit()) - printk("tcf_mirred unknown action %d\n", - m->tcfm_eaction); + printk("tcf_mirred unknown action %d\n", p->eaction); goto bad_mirred; } - m->tcf_bstats.bytes += skb2->len; - m->tcf_bstats.packets++; + p->bstats.bytes += skb2->len; + p->bstats.packets++; if (!(at & AT_EGRESS)) - if (m->tcfm_ok_push) + if (p->ok_push) skb_push(skb2, skb2->dev->hard_header_len); /* mirror is always swallowed */ - if (m->tcfm_eaction != TCA_EGRESS_MIRROR) + if (p->eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; skb2->input_dev = skb->dev; dev_queue_xmit(skb2); - spin_unlock(&m->tcf_lock); - return m->tcf_action; + spin_unlock(&p->lock); + return p->action; } -static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int +tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; - struct tcf_mirred *m = a->priv; struct tc_mirred opt; + struct tcf_mirred *p = PRIV(a, mirred); struct tcf_t t; - opt.index = m->tcf_index; - opt.action = m->tcf_action; - opt.refcnt = m->tcf_refcnt - ref; - opt.bindcnt = m->tcf_bindcnt - bind; - opt.eaction = m->tcfm_eaction; - opt.ifindex = m->tcfm_ifindex; + opt.index = p->index; + opt.action = p->action; + opt.refcnt = p->refcnt - ref; + opt.bindcnt = p->bindcnt - bind; + opt.eaction = p->eaction; + opt.ifindex = p->ifindex; + DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n", + p->index, p->action, p->eaction, p->ifindex); RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); - t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(m->tcf_tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); + t.expires = jiffies_to_clock_t(p->tm.expires); RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); return skb->len; -rtattr_failure: + rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", - .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -248,13 +257,15 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); -static int __init mirred_init_module(void) +static int __init +mirred_init_module(void) { printk("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } -static void __exit mirred_cleanup_module(void) +static void __exit +mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops); } diff --git a/trunk/net/sched/act_pedit.c b/trunk/net/sched/act_pedit.c index 8ac65c219b98..f257475e0e0c 100644 --- a/trunk/net/sched/act_pedit.c +++ b/trunk/net/sched/act_pedit.c @@ -33,25 +33,32 @@ #include #include -#define PEDIT_TAB_MASK 15 -static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; -static u32 pedit_idx_gen; + +#define PEDIT_DEB 1 + +/* use generic hash table */ +#define MY_TAB_SIZE 16 +#define MY_TAB_MASK 15 +static u32 idx_gen; +static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE]; static DEFINE_RWLOCK(pedit_lock); -static struct tcf_hashinfo pedit_hash_info = { - .htab = tcf_pedit_ht, - .hmask = PEDIT_TAB_MASK, - .lock = &pedit_lock, -}; +#define tcf_st tcf_pedit +#define tc_st tc_pedit +#define tcf_t_lock pedit_lock +#define tcf_ht tcf_pedit_ht -static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) +#define CONFIG_NET_ACT_INIT 1 +#include + +static int +tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, + int ovr, int bind) { struct rtattr *tb[TCA_PEDIT_MAX]; struct tc_pedit *parm; int ret = 0; struct tcf_pedit *p; - struct tcf_common *pc; struct tc_pedit_key *keys = NULL; int ksize; @@ -66,56 +73,54 @@ static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) return -EINVAL; - pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); - if (!pc) { + p = tcf_hash_check(parm->index, a, ovr, bind); + if (p == NULL) { if (!parm->nkeys) return -EINVAL; - pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, - &pedit_idx_gen, &pedit_hash_info); - if (unlikely(!pc)) + p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); + if (p == NULL) return -ENOMEM; - p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(pc); + kfree(p); return -ENOMEM; } ret = ACT_P_CREATED; } else { - p = to_pedit(pc); if (!ovr) { - tcf_hash_release(pc, bind, &pedit_hash_info); + tcf_hash_release(p, bind); return -EEXIST; } - if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { + if (p->nkeys && p->nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) return -ENOMEM; } } - spin_lock_bh(&p->tcf_lock); - p->tcfp_flags = parm->flags; - p->tcf_action = parm->action; + spin_lock_bh(&p->lock); + p->flags = parm->flags; + p->action = parm->action; if (keys) { - kfree(p->tcfp_keys); - p->tcfp_keys = keys; - p->tcfp_nkeys = parm->nkeys; + kfree(p->keys); + p->keys = keys; + p->nkeys = parm->nkeys; } - memcpy(p->tcfp_keys, parm->keys, ksize); - spin_unlock_bh(&p->tcf_lock); + memcpy(p->keys, parm->keys, ksize); + spin_unlock_bh(&p->lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &pedit_hash_info); + tcf_hash_insert(p); return ret; } -static int tcf_pedit_cleanup(struct tc_action *a, int bind) +static int +tcf_pedit_cleanup(struct tc_action *a, int bind) { - struct tcf_pedit *p = a->priv; + struct tcf_pedit *p = PRIV(a, pedit); - if (p) { - struct tc_pedit_key *keys = p->tcfp_keys; - if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { + if (p != NULL) { + struct tc_pedit_key *keys = p->keys; + if (tcf_hash_release(p, bind)) { kfree(keys); return 1; } @@ -123,30 +128,30 @@ static int tcf_pedit_cleanup(struct tc_action *a, int bind) return 0; } -static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, - struct tcf_result *res) +static int +tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_pedit *p = a->priv; + struct tcf_pedit *p = PRIV(a, pedit); int i, munged = 0; u8 *pptr; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - return p->tcf_action; + return p->action; } } pptr = skb->nh.raw; - spin_lock(&p->tcf_lock); + spin_lock(&p->lock); - p->tcf_tm.lastuse = jiffies; + p->tm.lastuse = jiffies; - if (p->tcfp_nkeys > 0) { - struct tc_pedit_key *tkey = p->tcfp_keys; + if (p->nkeys > 0) { + struct tc_pedit_key *tkey = p->keys; - for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { + for (i = p->nkeys; i > 0; i--, tkey++) { u32 *ptr; int offset = tkey->off; @@ -164,8 +169,7 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, printk("offset must be on 32 bit boundaries\n"); goto bad; } - if (skb->len < 0 || - (offset > 0 && offset > skb->len)) { + if (skb->len < 0 || (offset > 0 && offset > skb->len)) { printk("offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; @@ -181,47 +185,63 @@ static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else { - printk("pedit BUG: index %d\n", p->tcf_index); + printk("pedit BUG: index %d\n",p->index); } bad: - p->tcf_qstats.overlimits++; + p->qstats.overlimits++; done: - p->tcf_bstats.bytes += skb->len; - p->tcf_bstats.packets++; - spin_unlock(&p->tcf_lock); - return p->tcf_action; + p->bstats.bytes += skb->len; + p->bstats.packets++; + spin_unlock(&p->lock); + return p->action; } -static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, - int bind, int ref) +static int +tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) { unsigned char *b = skb->tail; - struct tcf_pedit *p = a->priv; struct tc_pedit *opt; + struct tcf_pedit *p = PRIV(a, pedit); struct tcf_t t; int s; - s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); + s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); - if (unlikely(!opt)) + if (opt == NULL) return -ENOBUFS; - memcpy(opt->keys, p->tcfp_keys, - p->tcfp_nkeys * sizeof(struct tc_pedit_key)); - opt->index = p->tcf_index; - opt->nkeys = p->tcfp_nkeys; - opt->flags = p->tcfp_flags; - opt->action = p->tcf_action; - opt->refcnt = p->tcf_refcnt - ref; - opt->bindcnt = p->tcf_bindcnt - bind; + memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); + opt->index = p->index; + opt->nkeys = p->nkeys; + opt->flags = p->flags; + opt->action = p->action; + opt->refcnt = p->refcnt - ref; + opt->bindcnt = p->bindcnt - bind; + + +#ifdef PEDIT_DEB + { + /* Debug - get rid of later */ + int i; + struct tc_pedit_key *key = opt->keys; + + for (i=0; inkeys; i++, key++) { + printk( "\n key #%d",i); + printk( " at %d: val %08x mask %08x", + (unsigned int)key->off, + (unsigned int)key->val, + (unsigned int)key->mask); + } + } +#endif RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); - t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); + t.expires = jiffies_to_clock_t(p->tm.expires); RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); kfree(opt); return skb->len; @@ -232,9 +252,9 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, return -1; } -static struct tc_action_ops act_pedit_ops = { +static +struct tc_action_ops act_pedit_ops = { .kind = "pedit", - .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -250,12 +270,14 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Packet Editor actions"); MODULE_LICENSE("GPL"); -static int __init pedit_init_module(void) +static int __init +pedit_init_module(void) { return tcf_register_action(&act_pedit_ops); } -static void __exit pedit_cleanup_module(void) +static void __exit +pedit_cleanup_module(void) { tcf_unregister_action(&act_pedit_ops); } diff --git a/trunk/net/sched/act_police.c b/trunk/net/sched/act_police.c index fed47b658837..da905d7b4b40 100644 --- a/trunk/net/sched/act_police.c +++ b/trunk/net/sched/act_police.c @@ -32,27 +32,43 @@ #include #include -#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) - -#define POL_TAB_MASK 15 -static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; -static u32 police_idx_gen; +#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) +#define PRIV(a) ((struct tcf_police *) (a)->priv) + +/* use generic hash table */ +#define MY_TAB_SIZE 16 +#define MY_TAB_MASK 15 +static u32 idx_gen; +static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; +/* Policer hash table lock */ static DEFINE_RWLOCK(police_lock); -static struct tcf_hashinfo police_hash_info = { - .htab = tcf_police_ht, - .hmask = POL_TAB_MASK, - .lock = &police_lock, -}; - /* Each policer is serialized by its individual spinlock */ +static __inline__ unsigned tcf_police_hash(u32 index) +{ + return index&0xF; +} + +static __inline__ struct tcf_police * tcf_police_lookup(u32 index) +{ + struct tcf_police *p; + + read_lock(&police_lock); + for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { + if (p->index == index) + break; + } + read_unlock(&police_lock); + return p; +} + #ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { - struct tcf_common *p; + struct tcf_police *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; @@ -60,10 +76,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c s_i = cb->args[0]; - for (i = 0; i < (POL_TAB_MASK + 1); i++) { - p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; + for (i = 0; i < MY_TAB_SIZE; i++) { + p = tcf_police_ht[tcf_police_hash(i)]; - for (; p; p = p->tcfc_next) { + for (; p; p = p->next) { index++; if (index < s_i) continue; @@ -94,26 +110,48 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c skb_trim(skb, (u8*)r - skb->data); goto done; } + +static inline int +tcf_act_police_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_police *p = tcf_police_lookup(index); + + if (p != NULL) { + a->priv = p; + return 1; + } else { + return 0; + } +} #endif +static inline u32 tcf_police_new_index(void) +{ + do { + if (++idx_gen == 0) + idx_gen = 1; + } while (tcf_police_lookup(idx_gen)); + + return idx_gen; +} + void tcf_police_destroy(struct tcf_police *p) { - unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); - struct tcf_common **p1p; + unsigned h = tcf_police_hash(p->index); + struct tcf_police **p1p; - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { - if (*p1p == &p->common) { + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { + if (*p1p == p) { write_lock_bh(&police_lock); - *p1p = p->tcf_next; + *p1p = p->next; write_unlock_bh(&police_lock); #ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->tcf_bstats, - &p->tcf_rate_est); + gen_kill_estimator(&p->bstats, &p->rate_est); #endif - if (p->tcfp_R_tab) - qdisc_put_rtab(p->tcfp_R_tab); - if (p->tcfp_P_tab) - qdisc_put_rtab(p->tcfp_P_tab); + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + if (p->P_tab) + qdisc_put_rtab(p->P_tab); kfree(p); return; } @@ -129,7 +167,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, int ret = 0, err; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; - struct tcf_police *police; + struct tcf_police *p; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) @@ -147,32 +185,27 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) return -EINVAL; - if (parm->index) { - struct tcf_common *pc; - - pc = tcf_hash_lookup(parm->index, &police_hash_info); - if (pc != NULL) { - a->priv = pc; - police = to_police(pc); - if (bind) { - police->tcf_bindcnt += 1; - police->tcf_refcnt += 1; - } - if (ovr) - goto override; - return ret; + if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { + a->priv = p; + if (bind) { + p->bindcnt += 1; + p->refcnt += 1; } + if (ovr) + goto override; + return ret; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) return -ENOMEM; + ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; + p->refcnt = 1; + spin_lock_init(&p->lock); + p->stats_lock = &p->lock; if (bind) - police->tcf_bindcnt = 1; + p->bindcnt = 1; override: if (parm->rate.rate) { err = -ENOMEM; @@ -182,71 +215,67 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1]); - if (P_tab == NULL) { + if (p->P_tab == NULL) { qdisc_put_rtab(R_tab); goto failure; } } } /* No failure allowed after this point */ - spin_lock_bh(&police->tcf_lock); + spin_lock_bh(&p->lock); if (R_tab != NULL) { - qdisc_put_rtab(police->tcfp_R_tab); - police->tcfp_R_tab = R_tab; + qdisc_put_rtab(p->R_tab); + p->R_tab = R_tab; } if (P_tab != NULL) { - qdisc_put_rtab(police->tcfp_P_tab); - police->tcfp_P_tab = P_tab; + qdisc_put_rtab(p->P_tab); + p->P_tab = P_tab; } if (tb[TCA_POLICE_RESULT-1]) - police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + p->toks = p->burst = parm->burst; + p->mtu = parm->mtu; + if (p->mtu == 0) { + p->mtu = ~0; + if (p->R_tab) + p->mtu = 255<R_tab->rate.cell_log; } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - police->tcf_action = parm->action; + if (p->P_tab) + p->ptoks = L2T_P(p, p->mtu); + p->action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) - police->tcfp_ewma_rate = - *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) - gen_replace_estimator(&police->tcf_bstats, - &police->tcf_rate_est, - police->tcf_stats_lock, est); + gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); #endif - spin_unlock_bh(&police->tcf_lock); + spin_unlock_bh(&p->lock); if (ret != ACT_P_CREATED) return ret; - PSCHED_GET_TIME(police->tcfp_t_c); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(&police_idx_gen, &police_hash_info); - h = tcf_hash(police->tcf_index, POL_TAB_MASK); + PSCHED_GET_TIME(p->t_c); + p->index = parm->index ? : tcf_police_new_index(); + h = tcf_police_hash(p->index); write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; + p->next = tcf_police_ht[h]; + tcf_police_ht[h] = p; write_unlock_bh(&police_lock); - a->priv = police; + a->priv = p; return ret; failure: if (ret == ACT_P_CREATED) - kfree(police); + kfree(p); return err; } static int tcf_act_police_cleanup(struct tc_action *a, int bind) { - struct tcf_police *p = a->priv; + struct tcf_police *p = PRIV(a); if (p != NULL) return tcf_police_release(p, bind); @@ -256,87 +285,86 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_police *police = a->priv; psched_time_t now; + struct tcf_police *p = PRIV(a); long toks; long ptoks = 0; - spin_lock(&police->tcf_lock); + spin_lock(&p->lock); - police->tcf_bstats.bytes += skb->len; - police->tcf_bstats.packets++; + p->bstats.bytes += skb->len; + p->bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; + if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; } #endif - if (skb->len <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { - spin_unlock(&police->tcf_lock); - return police->tcfp_result; + if (skb->len <= p->mtu) { + if (p->R_tab == NULL) { + spin_unlock(&p->lock); + return p->result; } PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { - ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); + toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); + + if (p->P_tab) { + ptoks = toks + p->ptoks; + if (ptoks > (long)L2T_P(p, p->mtu)) + ptoks = (long)L2T_P(p, p->mtu); + ptoks -= L2T_P(p, skb->len); } - toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) - toks = police->tcfp_burst; - toks -= L2T(police, skb->len); + toks += p->toks; + if (toks > (long)p->burst) + toks = p->burst; + toks -= L2T(p, skb->len); + if ((toks|ptoks) >= 0) { - police->tcfp_t_c = now; - police->tcfp_toks = toks; - police->tcfp_ptoks = ptoks; - spin_unlock(&police->tcf_lock); - return police->tcfp_result; + p->t_c = now; + p->toks = toks; + p->ptoks = ptoks; + spin_unlock(&p->lock); + return p->result; } } - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; } static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; - struct tcf_police *police = a->priv; struct tc_police opt; - - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - opt.refcnt = police->tcf_refcnt - ref; - opt.bindcnt = police->tcf_bindcnt - bind; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; + struct tcf_police *p = PRIV(a); + + opt.index = p->index; + opt.action = p->action; + opt.mtu = p->mtu; + opt.burst = p->burst; + opt.refcnt = p->refcnt - ref; + opt.bindcnt = p->bindcnt - bind; + if (p->R_tab) + opt.rate = p->R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; + if (p->P_tab) + opt.peakrate = p->P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (police->tcfp_result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), - &police->tcfp_result); + if (p->result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); #ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); + if (p->ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); #endif return skb->len; @@ -351,14 +379,13 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", - .hinfo = &police_hash_info, .type = TCA_ID_POLICE, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_hash_search, + .lookup = tcf_act_police_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -380,39 +407,10 @@ module_exit(police_cleanup_module); #else /* CONFIG_NET_CLS_ACT */ -static struct tcf_common *tcf_police_lookup(u32 index) -{ - struct tcf_hashinfo *hinfo = &police_hash_info; - struct tcf_common *p; - - read_lock(hinfo->lock); - for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; - p = p->tcfc_next) { - if (p->tcfc_index == index) - break; - } - read_unlock(hinfo->lock); - - return p; -} - -static u32 tcf_police_new_index(void) -{ - u32 *idx_gen = &police_idx_gen; - u32 val = *idx_gen; - - do { - if (++val == 0) - val = 1; - } while (tcf_police_lookup(val)); - - return (*idx_gen = val); -} - -struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) +struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) { - unsigned int h; - struct tcf_police *police; + unsigned h; + struct tcf_police *p; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; @@ -425,158 +423,149 @@ struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - if (parm->index) { - struct tcf_common *pc; - - pc = tcf_police_lookup(parm->index); - if (pc) { - police = to_police(pc); - police->tcf_refcnt++; - return police; - } + if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { + p->refcnt++; + return p; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (unlikely(!police)) + + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (p == NULL) return NULL; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - police->tcf_stats_lock = &police->tcf_lock; + p->refcnt = 1; + spin_lock_init(&p->lock); + p->stats_lock = &p->lock; if (parm->rate.rate) { - police->tcfp_R_tab = - qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (police->tcfp_R_tab == NULL) + p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (p->R_tab == NULL) goto failure; if (parm->peakrate.rate) { - police->tcfp_P_tab = - qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (police->tcfp_P_tab == NULL) + p->P_tab = qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (p->P_tab == NULL) goto failure; } } if (tb[TCA_POLICE_RESULT-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) goto failure; - police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; - police->tcfp_ewma_rate = - *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } #endif - police->tcfp_toks = police->tcfp_burst = parm->burst; - police->tcfp_mtu = parm->mtu; - if (police->tcfp_mtu == 0) { - police->tcfp_mtu = ~0; - if (police->tcfp_R_tab) - police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; + p->toks = p->burst = parm->burst; + p->mtu = parm->mtu; + if (p->mtu == 0) { + p->mtu = ~0; + if (p->R_tab) + p->mtu = 255<R_tab->rate.cell_log; } - if (police->tcfp_P_tab) - police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); - PSCHED_GET_TIME(police->tcfp_t_c); - police->tcf_index = parm->index ? parm->index : - tcf_police_new_index(); - police->tcf_action = parm->action; + if (p->P_tab) + p->ptoks = L2T_P(p, p->mtu); + PSCHED_GET_TIME(p->t_c); + p->index = parm->index ? : tcf_police_new_index(); + p->action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (est) - gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, - police->tcf_stats_lock, est); + gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); #endif - h = tcf_hash(police->tcf_index, POL_TAB_MASK); + h = tcf_police_hash(p->index); write_lock_bh(&police_lock); - police->tcf_next = tcf_police_ht[h]; - tcf_police_ht[h] = &police->common; + p->next = tcf_police_ht[h]; + tcf_police_ht[h] = p; write_unlock_bh(&police_lock); - return police; + return p; failure: - if (police->tcfp_R_tab) - qdisc_put_rtab(police->tcfp_R_tab); - kfree(police); + if (p->R_tab) + qdisc_put_rtab(p->R_tab); + kfree(p); return NULL; } -int tcf_police(struct sk_buff *skb, struct tcf_police *police) +int tcf_police(struct sk_buff *skb, struct tcf_police *p) { psched_time_t now; long toks; long ptoks = 0; - spin_lock(&police->tcf_lock); + spin_lock(&p->lock); - police->tcf_bstats.bytes += skb->len; - police->tcf_bstats.packets++; + p->bstats.bytes += skb->len; + p->bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate && - police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; + if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; } #endif - if (skb->len <= police->tcfp_mtu) { - if (police->tcfp_R_tab == NULL) { - spin_unlock(&police->tcf_lock); - return police->tcfp_result; + + if (skb->len <= p->mtu) { + if (p->R_tab == NULL) { + spin_unlock(&p->lock); + return p->result; } PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, - police->tcfp_burst); - if (police->tcfp_P_tab) { - ptoks = toks + police->tcfp_ptoks; - if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) - ptoks = (long)L2T_P(police, police->tcfp_mtu); - ptoks -= L2T_P(police, skb->len); + + toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); + + if (p->P_tab) { + ptoks = toks + p->ptoks; + if (ptoks > (long)L2T_P(p, p->mtu)) + ptoks = (long)L2T_P(p, p->mtu); + ptoks -= L2T_P(p, skb->len); } - toks += police->tcfp_toks; - if (toks > (long)police->tcfp_burst) - toks = police->tcfp_burst; - toks -= L2T(police, skb->len); + toks += p->toks; + if (toks > (long)p->burst) + toks = p->burst; + toks -= L2T(p, skb->len); + if ((toks|ptoks) >= 0) { - police->tcfp_t_c = now; - police->tcfp_toks = toks; - police->tcfp_ptoks = ptoks; - spin_unlock(&police->tcf_lock); - return police->tcfp_result; + p->t_c = now; + p->toks = toks; + p->ptoks = ptoks; + spin_unlock(&p->lock); + return p->result; } } - police->tcf_qstats.overlimits++; - spin_unlock(&police->tcf_lock); - return police->tcf_action; + p->qstats.overlimits++; + spin_unlock(&p->lock); + return p->action; } EXPORT_SYMBOL(tcf_police); -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) { - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct tc_police opt; - opt.index = police->tcf_index; - opt.action = police->tcf_action; - opt.mtu = police->tcfp_mtu; - opt.burst = police->tcfp_burst; - if (police->tcfp_R_tab) - opt.rate = police->tcfp_R_tab->rate; + opt.index = p->index; + opt.action = p->action; + opt.mtu = p->mtu; + opt.burst = p->burst; + if (p->R_tab) + opt.rate = p->R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (police->tcfp_P_tab) - opt.peakrate = police->tcfp_P_tab->rate; + if (p->P_tab) + opt.peakrate = p->P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (police->tcfp_result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), - &police->tcfp_result); + if (p->result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); #ifdef CONFIG_NET_ESTIMATOR - if (police->tcfp_ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); + if (p->ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); #endif return skb->len; @@ -585,20 +574,19 @@ int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) return -1; } -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) +int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) { struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, police->tcf_stats_lock, - &d) < 0) + TCA_XSTATS, p->stats_lock, &d) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || + if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) + gnet_stats_copy_queue(&d, &p->qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/trunk/net/sched/act_simple.c b/trunk/net/sched/act_simple.c index 901571a67707..17105c82537f 100644 --- a/trunk/net/sched/act_simple.c +++ b/trunk/net/sched/act_simple.c @@ -20,175 +20,54 @@ #define TCA_ACT_SIMP 22 +/* XXX: Hide all these common elements under some macro + * probably +*/ #include #include -#define SIMP_TAB_MASK 7 -static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; -static u32 simp_idx_gen; +/* use generic hash table with 8 buckets */ +#define MY_TAB_SIZE 8 +#define MY_TAB_MASK (MY_TAB_SIZE - 1) +static u32 idx_gen; +static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE]; static DEFINE_RWLOCK(simp_lock); -static struct tcf_hashinfo simp_hash_info = { - .htab = tcf_simp_ht, - .hmask = SIMP_TAB_MASK, - .lock = &simp_lock, -}; +/* override the defaults */ +#define tcf_st tcf_defact +#define tc_st tc_defact +#define tcf_t_lock simp_lock +#define tcf_ht tcf_simp_ht + +#define CONFIG_NET_ACT_INIT 1 +#include +#include static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_defact *d = a->priv; + struct tcf_defact *p = PRIV(a, defact); - spin_lock(&d->tcf_lock); - d->tcf_tm.lastuse = jiffies; - d->tcf_bstats.bytes += skb->len; - d->tcf_bstats.packets++; + spin_lock(&p->lock); + p->tm.lastuse = jiffies; + p->bstats.bytes += skb->len; + p->bstats.packets++; /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ - printk("simple: %s_%d\n", - (char *)d->tcfd_defdata, d->tcf_bstats.packets); - spin_unlock(&d->tcf_lock); - return d->tcf_action; -} - -static int tcf_simp_release(struct tcf_defact *d, int bind) -{ - int ret = 0; - if (d) { - if (bind) - d->tcf_bindcnt--; - d->tcf_refcnt--; - if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { - kfree(d->tcfd_defdata); - tcf_hash_destroy(&d->common, &simp_hash_info); - ret = 1; - } - } - return ret; -} - -static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) -{ - d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); - if (unlikely(!d->tcfd_defdata)) - return -ENOMEM; - d->tcfd_datalen = datalen; - memcpy(d->tcfd_defdata, defdata, datalen); - return 0; -} - -static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) -{ - kfree(d->tcfd_defdata); - return alloc_defdata(d, datalen, defdata); -} - -static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) -{ - struct rtattr *tb[TCA_DEF_MAX]; - struct tc_defact *parm; - struct tcf_defact *d; - struct tcf_common *pc; - void *defdata; - u32 datalen = 0; - int ret = 0; - - if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) - return -EINVAL; - - if (tb[TCA_DEF_PARMS - 1] == NULL || - RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) - return -EINVAL; - - parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); - defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); - if (defdata == NULL) - return -EINVAL; - - datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); - if (datalen <= 0) - return -EINVAL; - - pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); - if (!pc) { - pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, - &simp_idx_gen, &simp_hash_info); - if (unlikely(!pc)) - return -ENOMEM; - - d = to_defact(pc); - ret = alloc_defdata(d, datalen, defdata); - if (ret < 0) { - kfree(pc); - return ret; - } - ret = ACT_P_CREATED; - } else { - d = to_defact(pc); - if (!ovr) { - tcf_simp_release(d, bind); - return -EEXIST; - } - realloc_defdata(d, datalen, defdata); - } - - spin_lock_bh(&d->tcf_lock); - d->tcf_action = parm->action; - spin_unlock_bh(&d->tcf_lock); - - if (ret == ACT_P_CREATED) - tcf_hash_insert(pc, &simp_hash_info); - return ret; -} - -static inline int tcf_simp_cleanup(struct tc_action *a, int bind) -{ - struct tcf_defact *d = a->priv; - - if (d) - return tcf_simp_release(d, bind); - return 0; -} - -static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, - int bind, int ref) -{ - unsigned char *b = skb->tail; - struct tcf_defact *d = a->priv; - struct tc_defact opt; - struct tcf_t t; - - opt.index = d->tcf_index; - opt.refcnt = d->tcf_refcnt - ref; - opt.bindcnt = d->tcf_bindcnt - bind; - opt.action = d->tcf_action; - RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); - RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); - t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); - t.expires = jiffies_to_clock_t(d->tcf_tm.expires); - RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); - return skb->len; - -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); + spin_unlock(&p->lock); + return p->action; } static struct tc_action_ops act_simp_ops = { - .kind = "simple", - .hinfo = &simp_hash_info, - .type = TCA_ACT_SIMP, - .capab = TCA_CAP_NONE, - .owner = THIS_MODULE, - .act = tcf_simp, - .dump = tcf_simp_dump, - .cleanup = tcf_simp_cleanup, - .init = tcf_simp_init, - .walk = tcf_generic_walker, + .kind = "simple", + .type = TCA_ACT_SIMP, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_simp, + tca_use_default_ops }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); diff --git a/trunk/net/sched/cls_fw.c b/trunk/net/sched/cls_fw.c index e54acc6bcccd..e6973d9b686d 100644 --- a/trunk/net/sched/cls_fw.c +++ b/trunk/net/sched/cls_fw.c @@ -50,7 +50,6 @@ struct fw_head { struct fw_filter *ht[HTSIZE]; - u32 mask; }; struct fw_filter @@ -102,7 +101,7 @@ static int fw_classify(struct sk_buff *skb, struct tcf_proto *tp, struct fw_filter *f; int r; #ifdef CONFIG_NETFILTER - u32 id = skb->nfmark & head->mask; + u32 id = skb->nfmark; #else u32 id = 0; #endif @@ -210,9 +209,7 @@ static int fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, struct rtattr **tb, struct rtattr **tca, unsigned long base) { - struct fw_head *head = (struct fw_head *)tp->root; struct tcf_exts e; - u32 mask; int err; err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &fw_ext_map); @@ -235,15 +232,6 @@ fw_change_attrs(struct tcf_proto *tp, struct fw_filter *f, } #endif /* CONFIG_NET_CLS_IND */ - if (tb[TCA_FW_MASK-1]) { - if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) - goto errout; - mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); - if (mask != head->mask) - goto errout; - } else if (head->mask != 0xFFFFFFFF) - goto errout; - tcf_exts_change(tp, &f->exts, &e); return 0; @@ -279,17 +267,9 @@ static int fw_change(struct tcf_proto *tp, unsigned long base, return -EINVAL; if (head == NULL) { - u32 mask = 0xFFFFFFFF; - if (tb[TCA_FW_MASK-1]) { - if (RTA_PAYLOAD(tb[TCA_FW_MASK-1]) != sizeof(u32)) - return -EINVAL; - mask = *(u32*)RTA_DATA(tb[TCA_FW_MASK-1]); - } - head = kzalloc(sizeof(struct fw_head), GFP_KERNEL); if (head == NULL) return -ENOBUFS; - head->mask = mask; tcf_tree_lock(tp); tp->root = head; @@ -350,7 +330,6 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) static int fw_dump(struct tcf_proto *tp, unsigned long fh, struct sk_buff *skb, struct tcmsg *t) { - struct fw_head *head = (struct fw_head *)tp->root; struct fw_filter *f = (struct fw_filter*)fh; unsigned char *b = skb->tail; struct rtattr *rta; @@ -372,8 +351,6 @@ static int fw_dump(struct tcf_proto *tp, unsigned long fh, if (strlen(f->indev)) RTA_PUT(skb, TCA_FW_INDEV, IFNAMSIZ, f->indev); #endif /* CONFIG_NET_CLS_IND */ - if (head->mask != 0xFFFFFFFF) - RTA_PUT(skb, TCA_FW_MASK, 4, &head->mask); if (tcf_exts_dump(skb, &f->exts, &fw_ext_map) < 0) goto rtattr_failure; diff --git a/trunk/net/sched/sch_htb.c b/trunk/net/sched/sch_htb.c index bb3ddd4784b1..880a3394a51f 100644 --- a/trunk/net/sched/sch_htb.c +++ b/trunk/net/sched/sch_htb.c @@ -1,4 +1,4 @@ -/* +/* vim: ts=8 sw=8 * net/sched/sch_htb.c Hierarchical token bucket, feed tree version * * This program is free software; you can redistribute it and/or @@ -68,165 +68,218 @@ one less than their parent. */ -#define HTB_HSIZE 16 /* classid hash size */ -#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ -#define HTB_RATECM 1 /* whether to use rate computer */ -#define HTB_HYSTERESIS 1 /* whether to use mode hysteresis for speedup */ -#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ +#define HTB_HSIZE 16 /* classid hash size */ +#define HTB_EWMAC 2 /* rate average over HTB_EWMAC*HTB_HSIZE sec */ +#undef HTB_DEBUG /* compile debugging support (activated by tc tool) */ +#define HTB_RATECM 1 /* whether to use rate computer */ +#define HTB_HYSTERESIS 1/* whether to use mode hysteresis for speedup */ +#define HTB_QLOCK(S) spin_lock_bh(&(S)->dev->queue_lock) +#define HTB_QUNLOCK(S) spin_unlock_bh(&(S)->dev->queue_lock) +#define HTB_VER 0x30011 /* major must be matched with number suplied by TC as version */ #if HTB_VER >> 16 != TC_HTB_PROTOVER #error "Mismatched sch_htb.c and pkt_sch.h" #endif +/* debugging support; S is subsystem, these are defined: + 0 - netlink messages + 1 - enqueue + 2 - drop & requeue + 3 - dequeue main + 4 - dequeue one prio DRR part + 5 - dequeue class accounting + 6 - class overlimit status computation + 7 - hint tree + 8 - event queue + 10 - rate estimator + 11 - classifier + 12 - fast dequeue cache + + L is level; 0 = none, 1 = basic info, 2 = detailed, 3 = full + q->debug uint32 contains 16 2-bit fields one for subsystem starting + from LSB + */ +#ifdef HTB_DEBUG +#define HTB_DBG_COND(S,L) (((q->debug>>(2*S))&3) >= L) +#define HTB_DBG(S,L,FMT,ARG...) if (HTB_DBG_COND(S,L)) \ + printk(KERN_DEBUG FMT,##ARG) +#define HTB_CHCL(cl) BUG_TRAP((cl)->magic == HTB_CMAGIC) +#define HTB_PASSQ q, +#define HTB_ARGQ struct htb_sched *q, +#define static +#undef __inline__ +#define __inline__ +#undef inline +#define inline +#define HTB_CMAGIC 0xFEFAFEF1 +#define htb_safe_rb_erase(N,R) do { BUG_TRAP((N)->rb_color != -1); \ + if ((N)->rb_color == -1) break; \ + rb_erase(N,R); \ + (N)->rb_color = -1; } while (0) +#else +#define HTB_DBG_COND(S,L) (0) +#define HTB_DBG(S,L,FMT,ARG...) +#define HTB_PASSQ +#define HTB_ARGQ +#define HTB_CHCL(cl) +#define htb_safe_rb_erase(N,R) rb_erase(N,R) +#endif + + /* used internaly to keep status of single class */ enum htb_cmode { - HTB_CANT_SEND, /* class can't send and can't borrow */ - HTB_MAY_BORROW, /* class can't send but may borrow */ - HTB_CAN_SEND /* class can send */ + HTB_CANT_SEND, /* class can't send and can't borrow */ + HTB_MAY_BORROW, /* class can't send but may borrow */ + HTB_CAN_SEND /* class can send */ }; /* interior & leaf nodes; props specific to leaves are marked L: */ -struct htb_class { - /* general class parameters */ - u32 classid; - struct gnet_stats_basic bstats; - struct gnet_stats_queue qstats; - struct gnet_stats_rate_est rate_est; - struct tc_htb_xstats xstats; /* our special stats */ - int refcnt; /* usage count of this class */ +struct htb_class +{ +#ifdef HTB_DEBUG + unsigned magic; +#endif + /* general class parameters */ + u32 classid; + struct gnet_stats_basic bstats; + struct gnet_stats_queue qstats; + struct gnet_stats_rate_est rate_est; + struct tc_htb_xstats xstats;/* our special stats */ + int refcnt; /* usage count of this class */ #ifdef HTB_RATECM - /* rate measurement counters */ - unsigned long rate_bytes, sum_bytes; - unsigned long rate_packets, sum_packets; + /* rate measurement counters */ + unsigned long rate_bytes,sum_bytes; + unsigned long rate_packets,sum_packets; #endif - /* topology */ - int level; /* our level (see above) */ - struct htb_class *parent; /* parent class */ - struct hlist_node hlist; /* classid hash list item */ - struct list_head sibling; /* sibling list item */ - struct list_head children; /* children list */ - - union { - struct htb_class_leaf { - struct Qdisc *q; - int prio; - int aprio; - int quantum; - int deficit[TC_HTB_MAXDEPTH]; - struct list_head drop_list; - } leaf; - struct htb_class_inner { - struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ - struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ - /* When class changes from state 1->2 and disconnects from - parent's feed then we lost ptr value and start from the - first child again. Here we store classid of the - last valid ptr (used when ptr is NULL). */ - u32 last_ptr_id[TC_HTB_NUMPRIO]; - } inner; - } un; - struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ - struct rb_node pq_node; /* node for event queue */ - unsigned long pq_key; /* the same type as jiffies global */ - - int prio_activity; /* for which prios are we active */ - enum htb_cmode cmode; /* current mode of the class */ - - /* class attached filters */ - struct tcf_proto *filter_list; - int filter_cnt; - - int warned; /* only one warning about non work conserving .. */ - - /* token bucket parameters */ - struct qdisc_rate_table *rate; /* rate table of the class itself */ - struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ - long buffer, cbuffer; /* token bucket depth/rate */ - psched_tdiff_t mbuffer; /* max wait time */ - long tokens, ctokens; /* current number of tokens */ - psched_time_t t_c; /* checkpoint time */ + /* topology */ + int level; /* our level (see above) */ + struct htb_class *parent; /* parent class */ + struct list_head hlist; /* classid hash list item */ + struct list_head sibling; /* sibling list item */ + struct list_head children; /* children list */ + + union { + struct htb_class_leaf { + struct Qdisc *q; + int prio; + int aprio; + int quantum; + int deficit[TC_HTB_MAXDEPTH]; + struct list_head drop_list; + } leaf; + struct htb_class_inner { + struct rb_root feed[TC_HTB_NUMPRIO]; /* feed trees */ + struct rb_node *ptr[TC_HTB_NUMPRIO]; /* current class ptr */ + /* When class changes from state 1->2 and disconnects from + parent's feed then we lost ptr value and start from the + first child again. Here we store classid of the + last valid ptr (used when ptr is NULL). */ + u32 last_ptr_id[TC_HTB_NUMPRIO]; + } inner; + } un; + struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ + struct rb_node pq_node; /* node for event queue */ + unsigned long pq_key; /* the same type as jiffies global */ + + int prio_activity; /* for which prios are we active */ + enum htb_cmode cmode; /* current mode of the class */ + + /* class attached filters */ + struct tcf_proto *filter_list; + int filter_cnt; + + int warned; /* only one warning about non work conserving .. */ + + /* token bucket parameters */ + struct qdisc_rate_table *rate; /* rate table of the class itself */ + struct qdisc_rate_table *ceil; /* ceiling rate (limits borrows too) */ + long buffer,cbuffer; /* token bucket depth/rate */ + psched_tdiff_t mbuffer; /* max wait time */ + long tokens,ctokens; /* current number of tokens */ + psched_time_t t_c; /* checkpoint time */ }; /* TODO: maybe compute rate when size is too large .. or drop ? */ -static inline long L2T(struct htb_class *cl, struct qdisc_rate_table *rate, - int size) -{ - int slot = size >> rate->rate.cell_log; - if (slot > 255) { - cl->xstats.giants++; - slot = 255; - } - return rate->data[slot]; +static __inline__ long L2T(struct htb_class *cl,struct qdisc_rate_table *rate, + int size) +{ + int slot = size >> rate->rate.cell_log; + if (slot > 255) { + cl->xstats.giants++; + slot = 255; + } + return rate->data[slot]; } -struct htb_sched { - struct list_head root; /* root classes list */ - struct hlist_head hash[HTB_HSIZE]; /* hashed by classid */ - struct list_head drops[TC_HTB_NUMPRIO];/* active leaves (for drops) */ - - /* self list - roots of self generating tree */ - struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - int row_mask[TC_HTB_MAXDEPTH]; - struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; - - /* self wait list - roots of wait PQs per row */ - struct rb_root wait_pq[TC_HTB_MAXDEPTH]; - - /* time of nearest event per level (row) */ - unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; - - /* cached value of jiffies in dequeue */ - unsigned long jiffies; - - /* whether we hit non-work conserving class during this dequeue; we use */ - int nwc_hit; /* this to disable mindelay complaint in dequeue */ - - int defcls; /* class where unclassified flows go to */ - - /* filters for qdisc itself */ - struct tcf_proto *filter_list; - int filter_cnt; - - int rate2quantum; /* quant = rate / rate2quantum */ - psched_time_t now; /* cached dequeue time */ - struct timer_list timer; /* send delay timer */ +struct htb_sched +{ + struct list_head root; /* root classes list */ + struct list_head hash[HTB_HSIZE]; /* hashed by classid */ + struct list_head drops[TC_HTB_NUMPRIO]; /* active leaves (for drops) */ + + /* self list - roots of self generating tree */ + struct rb_root row[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + int row_mask[TC_HTB_MAXDEPTH]; + struct rb_node *ptr[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + u32 last_ptr_id[TC_HTB_MAXDEPTH][TC_HTB_NUMPRIO]; + + /* self wait list - roots of wait PQs per row */ + struct rb_root wait_pq[TC_HTB_MAXDEPTH]; + + /* time of nearest event per level (row) */ + unsigned long near_ev_cache[TC_HTB_MAXDEPTH]; + + /* cached value of jiffies in dequeue */ + unsigned long jiffies; + + /* whether we hit non-work conserving class during this dequeue; we use */ + int nwc_hit; /* this to disable mindelay complaint in dequeue */ + + int defcls; /* class where unclassified flows go to */ + u32 debug; /* subsystem debug levels */ + + /* filters for qdisc itself */ + struct tcf_proto *filter_list; + int filter_cnt; + + int rate2quantum; /* quant = rate / rate2quantum */ + psched_time_t now; /* cached dequeue time */ + struct timer_list timer; /* send delay timer */ #ifdef HTB_RATECM - struct timer_list rttim; /* rate computer timer */ - int recmp_bucket; /* which hash bucket to recompute next */ + struct timer_list rttim; /* rate computer timer */ + int recmp_bucket; /* which hash bucket to recompute next */ #endif + + /* non shaped skbs; let them go directly thru */ + struct sk_buff_head direct_queue; + int direct_qlen; /* max qlen of above */ - /* non shaped skbs; let them go directly thru */ - struct sk_buff_head direct_queue; - int direct_qlen; /* max qlen of above */ - - long direct_pkts; + long direct_pkts; }; /* compute hash of size HTB_HSIZE for given handle */ -static inline int htb_hash(u32 h) +static __inline__ int htb_hash(u32 h) { #if HTB_HSIZE != 16 -#error "Declare new hash for your HTB_HSIZE" + #error "Declare new hash for your HTB_HSIZE" #endif - h ^= h >> 8; /* stolen from cbq_hash */ - h ^= h >> 4; - return h & 0xf; + h ^= h>>8; /* stolen from cbq_hash */ + h ^= h>>4; + return h & 0xf; } /* find class in global hash table using given handle */ -static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) +static __inline__ struct htb_class *htb_find(u32 handle, struct Qdisc *sch) { struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - - if (TC_H_MAJ(handle) != sch->handle) + struct list_head *p; + if (TC_H_MAJ(handle) != sch->handle) return NULL; - - hlist_for_each_entry(cl, p, q->hash + htb_hash(handle), hlist) { + + list_for_each (p,q->hash+htb_hash(handle)) { + struct htb_class *cl = list_entry(p,struct htb_class,hlist); if (cl->classid == handle) return cl; } @@ -251,8 +304,7 @@ static inline u32 htb_classid(struct htb_class *cl) return (cl && cl != HTB_DIRECT) ? cl->classid : TC_H_UNSPEC; } -static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, - int *qerr) +static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl; @@ -264,8 +316,8 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, note that nfmark can be used too by attaching filter fw with no rules in it */ if (skb->priority == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) selected */ - if ((cl = htb_find(skb->priority, sch)) != NULL && cl->level == 0) + return HTB_DIRECT; /* X:0 (direct flow) selected */ + if ((cl = htb_find(skb->priority,sch)) != NULL && cl->level == 0) return cl; *qerr = NET_XMIT_BYPASS; @@ -274,7 +326,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, #ifdef CONFIG_NET_CLS_ACT switch (result) { case TC_ACT_QUEUED: - case TC_ACT_STOLEN: + case TC_ACT_STOLEN: *qerr = NET_XMIT_SUCCESS; case TC_ACT_SHOT: return NULL; @@ -283,44 +335,97 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, if (result == TC_POLICE_SHOT) return HTB_DIRECT; #endif - if ((cl = (void *)res.class) == NULL) { + if ((cl = (void*)res.class) == NULL) { if (res.classid == sch->handle) - return HTB_DIRECT; /* X:0 (direct flow) */ - if ((cl = htb_find(res.classid, sch)) == NULL) - break; /* filter selected invalid classid */ + return HTB_DIRECT; /* X:0 (direct flow) */ + if ((cl = htb_find(res.classid,sch)) == NULL) + break; /* filter selected invalid classid */ } if (!cl->level) - return cl; /* we hit leaf; return it */ + return cl; /* we hit leaf; return it */ /* we have got inner class; apply inner filter chain */ tcf = cl->filter_list; } /* classification failed; try to use default class */ - cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle), q->defcls), sch); + cl = htb_find(TC_H_MAKE(TC_H_MAJ(sch->handle),q->defcls),sch); if (!cl || cl->level) - return HTB_DIRECT; /* bad default .. this is safe bet */ + return HTB_DIRECT; /* bad default .. this is safe bet */ return cl; } +#ifdef HTB_DEBUG +static void htb_next_rb_node(struct rb_node **n); +#define HTB_DUMTREE(root,memb) if(root) { \ + struct rb_node *n = (root)->rb_node; \ + while (n->rb_left) n = n->rb_left; \ + while (n) { \ + struct htb_class *cl = rb_entry(n, struct htb_class, memb); \ + printk(" %x",cl->classid); htb_next_rb_node (&n); \ + } } + +static void htb_debug_dump (struct htb_sched *q) +{ + int i,p; + printk(KERN_DEBUG "htb*g j=%lu lj=%lu\n",jiffies,q->jiffies); + /* rows */ + for (i=TC_HTB_MAXDEPTH-1;i>=0;i--) { + printk(KERN_DEBUG "htb*r%d m=%x",i,q->row_mask[i]); + for (p=0;prow[i][p].rb_node) continue; + printk(" p%d:",p); + HTB_DUMTREE(q->row[i]+p,node[p]); + } + printk("\n"); + } + /* classes */ + for (i = 0; i < HTB_HSIZE; i++) { + struct list_head *l; + list_for_each (l,q->hash+i) { + struct htb_class *cl = list_entry(l,struct htb_class,hlist); + long diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); + printk(KERN_DEBUG "htb*c%x m=%d t=%ld c=%ld pq=%lu df=%ld ql=%d " + "pa=%x f:", + cl->classid,cl->cmode,cl->tokens,cl->ctokens, + cl->pq_node.rb_color==-1?0:cl->pq_key,diff, + cl->level?0:cl->un.leaf.q->q.qlen,cl->prio_activity); + if (cl->level) + for (p=0;pun.inner.feed[p].rb_node) continue; + printk(" p%d a=%x:",p,cl->un.inner.ptr[p]?rb_entry(cl->un.inner.ptr[p], struct htb_class,node[p])->classid:0); + HTB_DUMTREE(cl->un.inner.feed+p,node[p]); + } + printk("\n"); + } + } +} +#endif /** * htb_add_to_id_tree - adds class to the round robin list * * Routine adds class to the list (actually tree) sorted by classid. * Make sure that class is not already on such list for given prio. */ -static void htb_add_to_id_tree(struct rb_root *root, - struct htb_class *cl, int prio) +static void htb_add_to_id_tree (HTB_ARGQ struct rb_root *root, + struct htb_class *cl,int prio) { struct rb_node **p = &root->rb_node, *parent = NULL; - + HTB_DBG(7,3,"htb_add_id_tree cl=%X prio=%d\n",cl->classid,prio); +#ifdef HTB_DEBUG + if (cl->node[prio].rb_color != -1) { BUG_TRAP(0); return; } + HTB_CHCL(cl); + if (*p) { + struct htb_class *x = rb_entry(*p,struct htb_class,node[prio]); + HTB_CHCL(x); + } +#endif while (*p) { - struct htb_class *c; - parent = *p; + struct htb_class *c; parent = *p; c = rb_entry(parent, struct htb_class, node[prio]); - + HTB_CHCL(c); if (cl->classid > c->classid) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->node[prio], parent, p); @@ -334,11 +439,17 @@ static void htb_add_to_id_tree(struct rb_root *root, * change its mode in cl->pq_key microseconds. Make sure that class is not * already in the queue. */ -static void htb_add_to_wait_tree(struct htb_sched *q, - struct htb_class *cl, long delay) +static void htb_add_to_wait_tree (struct htb_sched *q, + struct htb_class *cl,long delay,int debug_hint) { struct rb_node **p = &q->wait_pq[cl->level].rb_node, *parent = NULL; - + HTB_DBG(7,3,"htb_add_wt cl=%X key=%lu\n",cl->classid,cl->pq_key); +#ifdef HTB_DEBUG + if (cl->pq_node.rb_color != -1) { BUG_TRAP(0); return; } + HTB_CHCL(cl); + if ((delay <= 0 || delay > cl->mbuffer) && net_ratelimit()) + printk(KERN_ERR "HTB: suspicious delay in wait_tree d=%ld cl=%X h=%d\n",delay,cl->classid,debug_hint); +#endif cl->pq_key = q->jiffies + PSCHED_US2JIFFIE(delay); if (cl->pq_key == q->jiffies) cl->pq_key++; @@ -346,14 +457,13 @@ static void htb_add_to_wait_tree(struct htb_sched *q, /* update the nearest event cache */ if (time_after(q->near_ev_cache[cl->level], cl->pq_key)) q->near_ev_cache[cl->level] = cl->pq_key; - + while (*p) { - struct htb_class *c; - parent = *p; + struct htb_class *c; parent = *p; c = rb_entry(parent, struct htb_class, pq_node); if (time_after_eq(cl->pq_key, c->pq_key)) p = &parent->rb_right; - else + else p = &parent->rb_left; } rb_link_node(&cl->pq_node, parent, p); @@ -366,7 +476,7 @@ static void htb_add_to_wait_tree(struct htb_sched *q, * When we are past last key we return NULL. * Average complexity is 2 steps per call. */ -static inline void htb_next_rb_node(struct rb_node **n) +static void htb_next_rb_node(struct rb_node **n) { *n = rb_next(*n); } @@ -377,51 +487,42 @@ static inline void htb_next_rb_node(struct rb_node **n) * The class is added to row at priorities marked in mask. * It does nothing if mask == 0. */ -static inline void htb_add_class_to_row(struct htb_sched *q, - struct htb_class *cl, int mask) +static inline void htb_add_class_to_row(struct htb_sched *q, + struct htb_class *cl,int mask) { + HTB_DBG(7,2,"htb_addrow cl=%X mask=%X rmask=%X\n", + cl->classid,mask,q->row_mask[cl->level]); + HTB_CHCL(cl); q->row_mask[cl->level] |= mask; while (mask) { int prio = ffz(~mask); mask &= ~(1 << prio); - htb_add_to_id_tree(q->row[cl->level] + prio, cl, prio); - } -} - -/* If this triggers, it is a bug in this code, but it need not be fatal */ -static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root) -{ - if (RB_EMPTY_NODE(rb)) { - WARN_ON(1); - } else { - rb_erase(rb, root); - RB_CLEAR_NODE(rb); + htb_add_to_id_tree(HTB_PASSQ q->row[cl->level]+prio,cl,prio); } } - /** * htb_remove_class_from_row - removes class from its row * * The class is removed from row at priorities marked in mask. * It does nothing if mask == 0. */ -static inline void htb_remove_class_from_row(struct htb_sched *q, - struct htb_class *cl, int mask) +static __inline__ void htb_remove_class_from_row(struct htb_sched *q, + struct htb_class *cl,int mask) { int m = 0; - + HTB_CHCL(cl); while (mask) { int prio = ffz(~mask); - mask &= ~(1 << prio); - if (q->ptr[cl->level][prio] == cl->node + prio) - htb_next_rb_node(q->ptr[cl->level] + prio); - - htb_safe_rb_erase(cl->node + prio, q->row[cl->level] + prio); - if (!q->row[cl->level][prio].rb_node) + if (q->ptr[cl->level][prio] == cl->node+prio) + htb_next_rb_node(q->ptr[cl->level]+prio); + htb_safe_rb_erase(cl->node + prio,q->row[cl->level]+prio); + if (!q->row[cl->level][prio].rb_node) m |= 1 << prio; } + HTB_DBG(7,2,"htb_delrow cl=%X mask=%X rmask=%X maskdel=%X\n", + cl->classid,mask,q->row_mask[cl->level],m); q->row_mask[cl->level] &= ~m; } @@ -432,31 +533,34 @@ static inline void htb_remove_class_from_row(struct htb_sched *q, * for priorities it is participating on. cl->cmode must be new * (activated) mode. It does nothing if cl->prio_activity == 0. */ -static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) +static void htb_activate_prios(struct htb_sched *q,struct htb_class *cl) { struct htb_class *p = cl->parent; - long m, mask = cl->prio_activity; + long m,mask = cl->prio_activity; + HTB_DBG(7,2,"htb_act_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); + HTB_CHCL(cl); while (cl->cmode == HTB_MAY_BORROW && p && mask) { - m = mask; - while (m) { + HTB_CHCL(p); + m = mask; while (m) { int prio = ffz(~m); m &= ~(1 << prio); - + if (p->un.inner.feed[prio].rb_node) /* parent already has its feed in use so that reset bit in mask as parent is already ok */ mask &= ~(1 << prio); - - htb_add_to_id_tree(p->un.inner.feed + prio, cl, prio); + + htb_add_to_id_tree(HTB_PASSQ p->un.inner.feed+prio,cl,prio); } + HTB_DBG(7,3,"htb_act_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", + p->classid,p->prio_activity,mask,p->cmode); p->prio_activity |= mask; - cl = p; - p = cl->parent; - + cl = p; p = cl->parent; + HTB_CHCL(cl); } if (cl->cmode == HTB_CAN_SEND && mask) - htb_add_class_to_row(q, cl, mask); + htb_add_class_to_row(q,cl,mask); } /** @@ -469,52 +573,39 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl) static void htb_deactivate_prios(struct htb_sched *q, struct htb_class *cl) { struct htb_class *p = cl->parent; - long m, mask = cl->prio_activity; + long m,mask = cl->prio_activity; + HTB_DBG(7,2,"htb_deact_prios cl=%X mask=%lX cmode=%d\n",cl->classid,mask,cl->cmode); + HTB_CHCL(cl); while (cl->cmode == HTB_MAY_BORROW && p && mask) { - m = mask; - mask = 0; + m = mask; mask = 0; while (m) { int prio = ffz(~m); m &= ~(1 << prio); - - if (p->un.inner.ptr[prio] == cl->node + prio) { + + if (p->un.inner.ptr[prio] == cl->node+prio) { /* we are removing child which is pointed to from parent feed - forget the pointer but remember classid */ p->un.inner.last_ptr_id[prio] = cl->classid; p->un.inner.ptr[prio] = NULL; } - - htb_safe_rb_erase(cl->node + prio, p->un.inner.feed + prio); - - if (!p->un.inner.feed[prio].rb_node) + + htb_safe_rb_erase(cl->node + prio,p->un.inner.feed + prio); + + if (!p->un.inner.feed[prio].rb_node) mask |= 1 << prio; } - + HTB_DBG(7,3,"htb_deact_pr_aft p=%X pact=%X mask=%lX pmode=%d\n", + p->classid,p->prio_activity,mask,p->cmode); p->prio_activity &= ~mask; - cl = p; - p = cl->parent; - + cl = p; p = cl->parent; + HTB_CHCL(cl); } - if (cl->cmode == HTB_CAN_SEND && mask) - htb_remove_class_from_row(q, cl, mask); + if (cl->cmode == HTB_CAN_SEND && mask) + htb_remove_class_from_row(q,cl,mask); } -#if HTB_HYSTERESIS -static inline long htb_lowater(const struct htb_class *cl) -{ - return cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : 0; -} -static inline long htb_hiwater(const struct htb_class *cl) -{ - return cl->cmode == HTB_CAN_SEND ? -cl->buffer : 0; -} -#else -#define htb_lowater(cl) (0) -#define htb_hiwater(cl) (0) -#endif - /** * htb_class_mode - computes and returns current class mode * @@ -526,21 +617,28 @@ static inline long htb_hiwater(const struct htb_class *cl) * 0 .. -cl->{c,}buffer range. It is meant to limit number of * mode transitions per time unit. The speed gain is about 1/6. */ -static inline enum htb_cmode -htb_class_mode(struct htb_class *cl, long *diff) +static __inline__ enum htb_cmode +htb_class_mode(struct htb_class *cl,long *diff) { - long toks; - - if ((toks = (cl->ctokens + *diff)) < htb_lowater(cl)) { - *diff = -toks; - return HTB_CANT_SEND; - } + long toks; - if ((toks = (cl->tokens + *diff)) >= htb_hiwater(cl)) - return HTB_CAN_SEND; + if ((toks = (cl->ctokens + *diff)) < ( +#if HTB_HYSTERESIS + cl->cmode != HTB_CANT_SEND ? -cl->cbuffer : +#endif + 0)) { + *diff = -toks; + return HTB_CANT_SEND; + } + if ((toks = (cl->tokens + *diff)) >= ( +#if HTB_HYSTERESIS + cl->cmode == HTB_CAN_SEND ? -cl->buffer : +#endif + 0)) + return HTB_CAN_SEND; - *diff = -toks; - return HTB_MAY_BORROW; + *diff = -toks; + return HTB_MAY_BORROW; } /** @@ -552,21 +650,24 @@ htb_class_mode(struct htb_class *cl, long *diff) * be different from old one and cl->pq_key has to be valid if changing * to mode other than HTB_CAN_SEND (see htb_add_to_wait_tree). */ -static void +static void htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) -{ - enum htb_cmode new_mode = htb_class_mode(cl, diff); +{ + enum htb_cmode new_mode = htb_class_mode(cl,diff); + + HTB_CHCL(cl); + HTB_DBG(7,1,"htb_chging_clmode %d->%d cl=%X\n",cl->cmode,new_mode,cl->classid); if (new_mode == cl->cmode) - return; - - if (cl->prio_activity) { /* not necessary: speed optimization */ - if (cl->cmode != HTB_CANT_SEND) - htb_deactivate_prios(q, cl); + return; + + if (cl->prio_activity) { /* not necessary: speed optimization */ + if (cl->cmode != HTB_CANT_SEND) + htb_deactivate_prios(q,cl); cl->cmode = new_mode; - if (new_mode != HTB_CANT_SEND) - htb_activate_prios(q, cl); - } else + if (new_mode != HTB_CANT_SEND) + htb_activate_prios(q,cl); + } else cl->cmode = new_mode; } @@ -577,15 +678,14 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, long *diff) * for the prio. It can be called on already active leaf safely. * It also adds leaf into droplist. */ -static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) +static __inline__ void htb_activate(struct htb_sched *q,struct htb_class *cl) { BUG_TRAP(!cl->level && cl->un.leaf.q && cl->un.leaf.q->q.qlen); - + HTB_CHCL(cl); if (!cl->prio_activity) { cl->prio_activity = 1 << (cl->un.leaf.aprio = cl->un.leaf.prio); - htb_activate_prios(q, cl); - list_add_tail(&cl->un.leaf.drop_list, - q->drops + cl->un.leaf.aprio); + htb_activate_prios(q,cl); + list_add_tail(&cl->un.leaf.drop_list,q->drops+cl->un.leaf.aprio); } } @@ -595,120 +695,120 @@ static inline void htb_activate(struct htb_sched *q, struct htb_class *cl) * Make sure that leaf is active. In the other words it can't be called * with non-active leaf. It also removes class from the drop list. */ -static inline void htb_deactivate(struct htb_sched *q, struct htb_class *cl) +static __inline__ void +htb_deactivate(struct htb_sched *q,struct htb_class *cl) { BUG_TRAP(cl->prio_activity); - - htb_deactivate_prios(q, cl); + HTB_CHCL(cl); + htb_deactivate_prios(q,cl); cl->prio_activity = 0; list_del_init(&cl->un.leaf.drop_list); } static int htb_enqueue(struct sk_buff *skb, struct Qdisc *sch) { - int ret; - struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_classify(skb, sch, &ret); - - if (cl == HTB_DIRECT) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen) { - __skb_queue_tail(&q->direct_queue, skb); - q->direct_pkts++; - } else { - kfree_skb(skb); - sch->qstats.drops++; - return NET_XMIT_DROP; - } -#ifdef CONFIG_NET_CLS_ACT - } else if (!cl) { - if (ret == NET_XMIT_BYPASS) - sch->qstats.drops++; - kfree_skb(skb); - return ret; -#endif - } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != - NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; + int ret; + struct htb_sched *q = qdisc_priv(sch); + struct htb_class *cl = htb_classify(skb,sch,&ret); + + if (cl == HTB_DIRECT) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen) { + __skb_queue_tail(&q->direct_queue, skb); + q->direct_pkts++; } else { - cl->bstats.packets++; - cl->bstats.bytes += skb->len; - htb_activate(q, cl); + kfree_skb(skb); + sch->qstats.drops++; + return NET_XMIT_DROP; } - - sch->q.qlen++; - sch->bstats.packets++; - sch->bstats.bytes += skb->len; - return NET_XMIT_SUCCESS; +#ifdef CONFIG_NET_CLS_ACT + } else if (!cl) { + if (ret == NET_XMIT_BYPASS) + sch->qstats.drops++; + kfree_skb (skb); + return ret; +#endif + } else if (cl->un.leaf.q->enqueue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else { + cl->bstats.packets++; cl->bstats.bytes += skb->len; + htb_activate (q,cl); + } + + sch->q.qlen++; + sch->bstats.packets++; sch->bstats.bytes += skb->len; + HTB_DBG(1,1,"htb_enq_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); + return NET_XMIT_SUCCESS; } /* TODO: requeuing packet charges it to policers again !! */ static int htb_requeue(struct sk_buff *skb, struct Qdisc *sch) { - struct htb_sched *q = qdisc_priv(sch); - int ret = NET_XMIT_SUCCESS; - struct htb_class *cl = htb_classify(skb, sch, &ret); - struct sk_buff *tskb; - - if (cl == HTB_DIRECT || !cl) { - /* enqueue to helper queue */ - if (q->direct_queue.qlen < q->direct_qlen && cl) { - __skb_queue_head(&q->direct_queue, skb); - } else { - __skb_queue_head(&q->direct_queue, skb); - tskb = __skb_dequeue_tail(&q->direct_queue); - kfree_skb(tskb); - sch->qstats.drops++; - return NET_XMIT_CN; - } - } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != - NET_XMIT_SUCCESS) { - sch->qstats.drops++; - cl->qstats.drops++; - return NET_XMIT_DROP; - } else - htb_activate(q, cl); - - sch->q.qlen++; - sch->qstats.requeues++; - return NET_XMIT_SUCCESS; + struct htb_sched *q = qdisc_priv(sch); + int ret = NET_XMIT_SUCCESS; + struct htb_class *cl = htb_classify(skb,sch, &ret); + struct sk_buff *tskb; + + if (cl == HTB_DIRECT || !cl) { + /* enqueue to helper queue */ + if (q->direct_queue.qlen < q->direct_qlen && cl) { + __skb_queue_head(&q->direct_queue, skb); + } else { + __skb_queue_head(&q->direct_queue, skb); + tskb = __skb_dequeue_tail(&q->direct_queue); + kfree_skb (tskb); + sch->qstats.drops++; + return NET_XMIT_CN; + } + } else if (cl->un.leaf.q->ops->requeue(skb, cl->un.leaf.q) != NET_XMIT_SUCCESS) { + sch->qstats.drops++; + cl->qstats.drops++; + return NET_XMIT_DROP; + } else + htb_activate (q,cl); + + sch->q.qlen++; + sch->qstats.requeues++; + HTB_DBG(1,1,"htb_req_ok cl=%X skb=%p\n",(cl && cl != HTB_DIRECT)?cl->classid:0,skb); + return NET_XMIT_SUCCESS; } static void htb_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc *)arg; - sch->flags &= ~TCQ_F_THROTTLED; - wmb(); - netif_schedule(sch->dev); + struct Qdisc *sch = (struct Qdisc*)arg; + sch->flags &= ~TCQ_F_THROTTLED; + wmb(); + netif_schedule(sch->dev); } #ifdef HTB_RATECM #define RT_GEN(D,R) R+=D-(R/HTB_EWMAC);D=0 static void htb_rate_timer(unsigned long arg) { - struct Qdisc *sch = (struct Qdisc *)arg; + struct Qdisc *sch = (struct Qdisc*)arg; struct htb_sched *q = qdisc_priv(sch); - struct hlist_node *p; - struct htb_class *cl; - + struct list_head *p; /* lock queue so that we can muck with it */ - spin_lock_bh(&sch->dev->queue_lock); + HTB_QLOCK(sch); + HTB_DBG(10,1,"htb_rttmr j=%ld\n",jiffies); q->rttim.expires = jiffies + HZ; add_timer(&q->rttim); /* scan and recompute one bucket at time */ - if (++q->recmp_bucket >= HTB_HSIZE) + if (++q->recmp_bucket >= HTB_HSIZE) q->recmp_bucket = 0; - - hlist_for_each_entry(cl,p, q->hash + q->recmp_bucket, hlist) { - RT_GEN(cl->sum_bytes, cl->rate_bytes); - RT_GEN(cl->sum_packets, cl->rate_packets); + list_for_each (p,q->hash+q->recmp_bucket) { + struct htb_class *cl = list_entry(p,struct htb_class,hlist); + HTB_DBG(10,2,"htb_rttmr_cl cl=%X sbyte=%lu spkt=%lu\n", + cl->classid,cl->sum_bytes,cl->sum_packets); + RT_GEN (cl->sum_bytes,cl->rate_bytes); + RT_GEN (cl->sum_packets,cl->rate_packets); } - spin_unlock_bh(&sch->dev->queue_lock); + HTB_QUNLOCK(sch); } #endif @@ -723,11 +823,12 @@ static void htb_rate_timer(unsigned long arg) * CAN_SEND) because we can use more precise clock that event queue here. * In such case we remove class from event queue first. */ -static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, - int level, int bytes) -{ - long toks, diff; +static void htb_charge_class(struct htb_sched *q,struct htb_class *cl, + int level,int bytes) +{ + long toks,diff; enum htb_cmode old_mode; + HTB_DBG(5,1,"htb_chrg_cl cl=%X lev=%d len=%d\n",cl->classid,level,bytes); #define HTB_ACCNT(T,B,R) toks = diff + cl->T; \ if (toks > cl->B) toks = cl->B; \ @@ -736,31 +837,47 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, cl->T = toks while (cl) { - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); + HTB_CHCL(cl); + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); +#ifdef HTB_DEBUG + if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { + if (net_ratelimit()) + printk(KERN_ERR "HTB: bad diff in charge, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", + cl->classid, diff, +#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY + q->now.tv_sec * 1000000ULL + q->now.tv_usec, + cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, +#else + (unsigned long long) q->now, + (unsigned long long) cl->t_c, +#endif + q->jiffies); + diff = 1000; + } +#endif if (cl->level >= level) { - if (cl->level == level) - cl->xstats.lends++; - HTB_ACCNT(tokens, buffer, rate); + if (cl->level == level) cl->xstats.lends++; + HTB_ACCNT (tokens,buffer,rate); } else { cl->xstats.borrows++; - cl->tokens += diff; /* we moved t_c; update tokens */ + cl->tokens += diff; /* we moved t_c; update tokens */ } - HTB_ACCNT(ctokens, cbuffer, ceil); + HTB_ACCNT (ctokens,cbuffer,ceil); cl->t_c = q->now; + HTB_DBG(5,2,"htb_chrg_clp cl=%X diff=%ld tok=%ld ctok=%ld\n",cl->classid,diff,cl->tokens,cl->ctokens); - old_mode = cl->cmode; - diff = 0; - htb_change_class_mode(q, cl, &diff); + old_mode = cl->cmode; diff = 0; + htb_change_class_mode(q,cl,&diff); if (old_mode != cl->cmode) { if (old_mode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); + htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree(q, cl, diff); + htb_add_to_wait_tree (q,cl,diff,1); } + #ifdef HTB_RATECM /* update rate counters */ - cl->sum_bytes += bytes; - cl->sum_packets++; + cl->sum_bytes += bytes; cl->sum_packets++; #endif /* update byte stats except for leaves which are already updated */ @@ -779,46 +896,60 @@ static void htb_charge_class(struct htb_sched *q, struct htb_class *cl, * next pending event (0 for no event in pq). * Note: Aplied are events whose have cl->pq_key <= jiffies. */ -static long htb_do_events(struct htb_sched *q, int level) +static long htb_do_events(struct htb_sched *q,int level) { int i; - + HTB_DBG(8,1,"htb_do_events l=%d root=%p rmask=%X\n", + level,q->wait_pq[level].rb_node,q->row_mask[level]); for (i = 0; i < 500; i++) { struct htb_class *cl; long diff; struct rb_node *p = q->wait_pq[level].rb_node; - if (!p) - return 0; - while (p->rb_left) - p = p->rb_left; + if (!p) return 0; + while (p->rb_left) p = p->rb_left; cl = rb_entry(p, struct htb_class, pq_node); if (time_after(cl->pq_key, q->jiffies)) { + HTB_DBG(8,3,"htb_do_ev_ret delay=%ld\n",cl->pq_key - q->jiffies); return cl->pq_key - q->jiffies; } - htb_safe_rb_erase(p, q->wait_pq + level); - diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32) cl->mbuffer); - htb_change_class_mode(q, cl, &diff); + htb_safe_rb_erase(p,q->wait_pq+level); + diff = PSCHED_TDIFF_SAFE(q->now, cl->t_c, (u32)cl->mbuffer); +#ifdef HTB_DEBUG + if (diff > cl->mbuffer || diff < 0 || PSCHED_TLESS(q->now, cl->t_c)) { + if (net_ratelimit()) + printk(KERN_ERR "HTB: bad diff in events, cl=%X diff=%lX now=%Lu then=%Lu j=%lu\n", + cl->classid, diff, +#ifdef CONFIG_NET_SCH_CLK_GETTIMEOFDAY + q->now.tv_sec * 1000000ULL + q->now.tv_usec, + cl->t_c.tv_sec * 1000000ULL + cl->t_c.tv_usec, +#else + (unsigned long long) q->now, + (unsigned long long) cl->t_c, +#endif + q->jiffies); + diff = 1000; + } +#endif + htb_change_class_mode(q,cl,&diff); if (cl->cmode != HTB_CAN_SEND) - htb_add_to_wait_tree(q, cl, diff); + htb_add_to_wait_tree (q,cl,diff,2); } if (net_ratelimit()) printk(KERN_WARNING "htb: too many events !\n"); - return HZ / 10; + return HZ/10; } /* Returns class->node+prio from id-tree where classe's id is >= id. NULL is no such one exists. */ -static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, - u32 id) +static struct rb_node * +htb_id_find_next_upper(int prio,struct rb_node *n,u32 id) { struct rb_node *r = NULL; while (n) { - struct htb_class *cl = - rb_entry(n, struct htb_class, node[prio]); - if (id == cl->classid) - return n; - + struct htb_class *cl = rb_entry(n,struct htb_class,node[prio]); + if (id == cl->classid) return n; + if (id > cl->classid) { n = n->rb_right; } else { @@ -834,49 +965,49 @@ static struct rb_node *htb_id_find_next_upper(int prio, struct rb_node *n, * * Find leaf where current feed pointers points to. */ -static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, - struct rb_node **pptr, u32 * pid) +static struct htb_class * +htb_lookup_leaf(HTB_ARGQ struct rb_root *tree,int prio,struct rb_node **pptr,u32 *pid) { int i; struct { struct rb_node *root; struct rb_node **pptr; u32 *pid; - } stk[TC_HTB_MAXDEPTH], *sp = stk; - + } stk[TC_HTB_MAXDEPTH],*sp = stk; + BUG_TRAP(tree->rb_node); sp->root = tree->rb_node; sp->pptr = pptr; sp->pid = pid; for (i = 0; i < 65535; i++) { - if (!*sp->pptr && *sp->pid) { + HTB_DBG(4,2,"htb_lleaf ptr=%p pid=%X\n",*sp->pptr,*sp->pid); + + if (!*sp->pptr && *sp->pid) { /* ptr was invalidated but id is valid - try to recover the original or next ptr */ - *sp->pptr = - htb_id_find_next_upper(prio, sp->root, *sp->pid); + *sp->pptr = htb_id_find_next_upper(prio,sp->root,*sp->pid); } - *sp->pid = 0; /* ptr is valid now so that remove this hint as it - can become out of date quickly */ - if (!*sp->pptr) { /* we are at right end; rewind & go up */ + *sp->pid = 0; /* ptr is valid now so that remove this hint as it + can become out of date quickly */ + if (!*sp->pptr) { /* we are at right end; rewind & go up */ *sp->pptr = sp->root; - while ((*sp->pptr)->rb_left) + while ((*sp->pptr)->rb_left) *sp->pptr = (*sp->pptr)->rb_left; if (sp > stk) { sp--; - BUG_TRAP(*sp->pptr); - if (!*sp->pptr) - return NULL; - htb_next_rb_node(sp->pptr); + BUG_TRAP(*sp->pptr); if(!*sp->pptr) return NULL; + htb_next_rb_node (sp->pptr); } } else { struct htb_class *cl; - cl = rb_entry(*sp->pptr, struct htb_class, node[prio]); - if (!cl->level) + cl = rb_entry(*sp->pptr,struct htb_class,node[prio]); + HTB_CHCL(cl); + if (!cl->level) return cl; (++sp)->root = cl->un.inner.feed[prio].rb_node; - sp->pptr = cl->un.inner.ptr + prio; - sp->pid = cl->un.inner.last_ptr_id + prio; + sp->pptr = cl->un.inner.ptr+prio; + sp->pid = cl->un.inner.last_ptr_id+prio; } } BUG_TRAP(0); @@ -885,21 +1016,21 @@ static struct htb_class *htb_lookup_leaf(struct rb_root *tree, int prio, /* dequeues packet at given priority and level; call only if you are sure that there is active class at prio/level */ -static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, - int level) +static struct sk_buff * +htb_dequeue_tree(struct htb_sched *q,int prio,int level) { struct sk_buff *skb = NULL; - struct htb_class *cl, *start; + struct htb_class *cl,*start; /* look initial class up in the row */ - start = cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); - + start = cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio, + q->ptr[level]+prio,q->last_ptr_id[level]+prio); + do { next: - BUG_TRAP(cl); - if (!cl) - return NULL; + BUG_TRAP(cl); + if (!cl) return NULL; + HTB_DBG(4,1,"htb_deq_tr prio=%d lev=%d cl=%X defic=%d\n", + prio,level,cl->classid,cl->un.leaf.deficit[level]); /* class can be empty - it is unlikely but can be true if leaf qdisc drops packets in enqueue routine or if someone used @@ -907,69 +1038,64 @@ static struct sk_buff *htb_dequeue_tree(struct htb_sched *q, int prio, simply deactivate and skip such class */ if (unlikely(cl->un.leaf.q->q.qlen == 0)) { struct htb_class *next; - htb_deactivate(q, cl); + htb_deactivate(q,cl); /* row/level might become empty */ if ((q->row_mask[level] & (1 << prio)) == 0) - return NULL; - - next = htb_lookup_leaf(q->row[level] + prio, - prio, q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + return NULL; + + next = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio, + prio,q->ptr[level]+prio,q->last_ptr_id[level]+prio); - if (cl == start) /* fix start if we just deleted it */ + if (cl == start) /* fix start if we just deleted it */ start = next; cl = next; goto next; } - - skb = cl->un.leaf.q->dequeue(cl->un.leaf.q); - if (likely(skb != NULL)) + + if (likely((skb = cl->un.leaf.q->dequeue(cl->un.leaf.q)) != NULL)) break; if (!cl->warned) { - printk(KERN_WARNING - "htb: class %X isn't work conserving ?!\n", - cl->classid); + printk(KERN_WARNING "htb: class %X isn't work conserving ?!\n",cl->classid); cl->warned = 1; } q->nwc_hit++; - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); - cl = htb_lookup_leaf(q->row[level] + prio, prio, - q->ptr[level] + prio, - q->last_ptr_id[level] + prio); + htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); + cl = htb_lookup_leaf (HTB_PASSQ q->row[level]+prio,prio,q->ptr[level]+prio, + q->last_ptr_id[level]+prio); } while (cl != start); if (likely(skb != NULL)) { if ((cl->un.leaf.deficit[level] -= skb->len) < 0) { + HTB_DBG(4,2,"htb_next_cl oldptr=%p quant_add=%d\n", + level?cl->parent->un.inner.ptr[prio]:q->ptr[0][prio],cl->un.leaf.quantum); cl->un.leaf.deficit[level] += cl->un.leaf.quantum; - htb_next_rb_node((level ? cl->parent->un.inner.ptr : q-> - ptr[0]) + prio); + htb_next_rb_node((level?cl->parent->un.inner.ptr:q->ptr[0])+prio); } /* this used to be after charge_class but this constelation gives us slightly better performance */ if (!cl->un.leaf.q->q.qlen) - htb_deactivate(q, cl); - htb_charge_class(q, cl, level, skb->len); + htb_deactivate (q,cl); + htb_charge_class (q,cl,level,skb->len); } return skb; } -static void htb_delay_by(struct Qdisc *sch, long delay) +static void htb_delay_by(struct Qdisc *sch,long delay) { struct htb_sched *q = qdisc_priv(sch); - if (delay <= 0) - delay = 1; - if (unlikely(delay > 5 * HZ)) { + if (delay <= 0) delay = 1; + if (unlikely(delay > 5*HZ)) { if (net_ratelimit()) printk(KERN_INFO "HTB delay %ld > 5sec\n", delay); - delay = 5 * HZ; + delay = 5*HZ; } /* why don't use jiffies here ? because expires can be in past */ mod_timer(&q->timer, q->jiffies + delay); sch->flags |= TCQ_F_THROTTLED; sch->qstats.overlimits++; + HTB_DBG(3,1,"htb_deq t_delay=%ld\n",delay); } static struct sk_buff *htb_dequeue(struct Qdisc *sch) @@ -978,19 +1104,22 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) struct htb_sched *q = qdisc_priv(sch); int level; long min_delay; +#ifdef HTB_DEBUG + int evs_used = 0; +#endif q->jiffies = jiffies; + HTB_DBG(3,1,"htb_deq dircnt=%d qlen=%d\n",skb_queue_len(&q->direct_queue), + sch->q.qlen); /* try to dequeue direct packets as high prio (!) to minimize cpu work */ - skb = __skb_dequeue(&q->direct_queue); - if (skb != NULL) { + if ((skb = __skb_dequeue(&q->direct_queue)) != NULL) { sch->flags &= ~TCQ_F_THROTTLED; sch->q.qlen--; return skb; } - if (!sch->q.qlen) - goto fin; + if (!sch->q.qlen) goto fin; PSCHED_GET_TIME(q->now); min_delay = LONG_MAX; @@ -1000,19 +1129,21 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) int m; long delay; if (time_after_eq(q->jiffies, q->near_ev_cache[level])) { - delay = htb_do_events(q, level); - q->near_ev_cache[level] = - q->jiffies + (delay ? delay : HZ); + delay = htb_do_events(q,level); + q->near_ev_cache[level] = q->jiffies + (delay ? delay : HZ); +#ifdef HTB_DEBUG + evs_used++; +#endif } else - delay = q->near_ev_cache[level] - q->jiffies; - - if (delay && min_delay > delay) + delay = q->near_ev_cache[level] - q->jiffies; + + if (delay && min_delay > delay) min_delay = delay; m = ~q->row_mask[level]; while (m != (int)(-1)) { - int prio = ffz(m); + int prio = ffz (m); m |= 1 << prio; - skb = htb_dequeue_tree(q, prio, level); + skb = htb_dequeue_tree(q,prio,level); if (likely(skb != NULL)) { sch->q.qlen--; sch->flags &= ~TCQ_F_THROTTLED; @@ -1020,28 +1151,40 @@ static struct sk_buff *htb_dequeue(struct Qdisc *sch) } } } - htb_delay_by(sch, min_delay > 5 * HZ ? 5 * HZ : min_delay); +#ifdef HTB_DEBUG + if (!q->nwc_hit && min_delay >= 10*HZ && net_ratelimit()) { + if (min_delay == LONG_MAX) { + printk(KERN_ERR "HTB: dequeue bug (%d,%lu,%lu), report it please !\n", + evs_used,q->jiffies,jiffies); + htb_debug_dump(q); + } else + printk(KERN_WARNING "HTB: mindelay=%ld, some class has " + "too small rate\n",min_delay); + } +#endif + htb_delay_by (sch,min_delay > 5*HZ ? 5*HZ : min_delay); fin: + HTB_DBG(3,1,"htb_deq_end %s j=%lu skb=%p\n",sch->dev->name,q->jiffies,skb); return skb; } /* try to drop from each class (by prio) until one succeed */ -static unsigned int htb_drop(struct Qdisc *sch) +static unsigned int htb_drop(struct Qdisc* sch) { struct htb_sched *q = qdisc_priv(sch); int prio; for (prio = TC_HTB_NUMPRIO - 1; prio >= 0; prio--) { struct list_head *p; - list_for_each(p, q->drops + prio) { + list_for_each (p,q->drops+prio) { struct htb_class *cl = list_entry(p, struct htb_class, un.leaf.drop_list); unsigned int len; - if (cl->un.leaf.q->ops->drop && - (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { + if (cl->un.leaf.q->ops->drop && + (len = cl->un.leaf.q->ops->drop(cl->un.leaf.q))) { sch->q.qlen--; if (!cl->un.leaf.q->q.qlen) - htb_deactivate(q, cl); + htb_deactivate (q,cl); return len; } } @@ -1051,25 +1194,29 @@ static unsigned int htb_drop(struct Qdisc *sch) /* reset all classes */ /* always caled under BH & queue lock */ -static void htb_reset(struct Qdisc *sch) +static void htb_reset(struct Qdisc* sch) { struct htb_sched *q = qdisc_priv(sch); int i; + HTB_DBG(0,1,"htb_reset sch=%p, handle=%X\n",sch,sch->handle); for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; - - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + struct list_head *p; + list_for_each (p,q->hash+i) { + struct htb_class *cl = list_entry(p,struct htb_class,hlist); if (cl->level) - memset(&cl->un.inner, 0, sizeof(cl->un.inner)); + memset(&cl->un.inner,0,sizeof(cl->un.inner)); else { - if (cl->un.leaf.q) + if (cl->un.leaf.q) qdisc_reset(cl->un.leaf.q); INIT_LIST_HEAD(&cl->un.leaf.drop_list); } cl->prio_activity = 0; cl->cmode = HTB_CAN_SEND; +#ifdef HTB_DEBUG + cl->pq_node.rb_color = -1; + memset(cl->node,255,sizeof(cl->node)); +#endif } } @@ -1077,12 +1224,12 @@ static void htb_reset(struct Qdisc *sch) del_timer(&q->timer); __skb_queue_purge(&q->direct_queue); sch->q.qlen = 0; - memset(q->row, 0, sizeof(q->row)); - memset(q->row_mask, 0, sizeof(q->row_mask)); - memset(q->wait_pq, 0, sizeof(q->wait_pq)); - memset(q->ptr, 0, sizeof(q->ptr)); + memset(q->row,0,sizeof(q->row)); + memset(q->row_mask,0,sizeof(q->row_mask)); + memset(q->wait_pq,0,sizeof(q->wait_pq)); + memset(q->ptr,0,sizeof(q->ptr)); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); + INIT_LIST_HEAD(q->drops+i); } static int htb_init(struct Qdisc *sch, struct rtattr *opt) @@ -1091,31 +1238,36 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) struct rtattr *tb[TCA_HTB_INIT]; struct tc_htb_glob *gopt; int i; +#ifdef HTB_DEBUG + printk(KERN_INFO "HTB init, kernel part version %d.%d\n", + HTB_VER >> 16,HTB_VER & 0xffff); +#endif if (!opt || rtattr_parse_nested(tb, TCA_HTB_INIT, opt) || - tb[TCA_HTB_INIT - 1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_INIT - 1]) < sizeof(*gopt)) { + tb[TCA_HTB_INIT-1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_INIT-1]) < sizeof(*gopt)) { printk(KERN_ERR "HTB: hey probably you have bad tc tool ?\n"); return -EINVAL; } - gopt = RTA_DATA(tb[TCA_HTB_INIT - 1]); + gopt = RTA_DATA(tb[TCA_HTB_INIT-1]); if (gopt->version != HTB_VER >> 16) { - printk(KERN_ERR - "HTB: need tc/htb version %d (minor is %d), you have %d\n", - HTB_VER >> 16, HTB_VER & 0xffff, gopt->version); + printk(KERN_ERR "HTB: need tc/htb version %d (minor is %d), you have %d\n", + HTB_VER >> 16,HTB_VER & 0xffff,gopt->version); return -EINVAL; } + q->debug = gopt->debug; + HTB_DBG(0,1,"htb_init sch=%p handle=%X r2q=%d\n",sch,sch->handle,gopt->rate2quantum); INIT_LIST_HEAD(&q->root); for (i = 0; i < HTB_HSIZE; i++) - INIT_HLIST_HEAD(q->hash + i); + INIT_LIST_HEAD(q->hash+i); for (i = 0; i < TC_HTB_NUMPRIO; i++) - INIT_LIST_HEAD(q->drops + i); + INIT_LIST_HEAD(q->drops+i); init_timer(&q->timer); skb_queue_head_init(&q->direct_queue); q->direct_qlen = sch->dev->tx_queue_len; - if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ + if (q->direct_qlen < 2) /* some devices have zero tx_queue_len */ q->direct_qlen = 2; q->timer.function = htb_timer; q->timer.data = (unsigned long)sch; @@ -1137,72 +1289,80 @@ static int htb_init(struct Qdisc *sch, struct rtattr *opt) static int htb_dump(struct Qdisc *sch, struct sk_buff *skb) { struct htb_sched *q = qdisc_priv(sch); - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_glob gopt; - spin_lock_bh(&sch->dev->queue_lock); + HTB_DBG(0,1,"htb_dump sch=%p, handle=%X\n",sch,sch->handle); + HTB_QLOCK(sch); gopt.direct_pkts = q->direct_pkts; +#ifdef HTB_DEBUG + if (HTB_DBG_COND(0,2)) + htb_debug_dump(q); +#endif gopt.version = HTB_VER; gopt.rate2quantum = q->rate2quantum; gopt.defcls = q->defcls; - gopt.debug = 0; - rta = (struct rtattr *)b; + gopt.debug = q->debug; + rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); RTA_PUT(skb, TCA_HTB_INIT, sizeof(gopt), &gopt); rta->rta_len = skb->tail - b; - spin_unlock_bh(&sch->dev->queue_lock); + HTB_QUNLOCK(sch); return skb->len; rtattr_failure: - spin_unlock_bh(&sch->dev->queue_lock); + HTB_QUNLOCK(sch); skb_trim(skb, skb->tail - skb->data); return -1; } static int htb_dump_class(struct Qdisc *sch, unsigned long arg, - struct sk_buff *skb, struct tcmsg *tcm) + struct sk_buff *skb, struct tcmsg *tcm) { - struct htb_class *cl = (struct htb_class *)arg; - unsigned char *b = skb->tail; +#ifdef HTB_DEBUG + struct htb_sched *q = qdisc_priv(sch); +#endif + struct htb_class *cl = (struct htb_class*)arg; + unsigned char *b = skb->tail; struct rtattr *rta; struct tc_htb_opt opt; - spin_lock_bh(&sch->dev->queue_lock); + HTB_DBG(0,1,"htb_dump_class handle=%X clid=%X\n",sch->handle,cl->classid); + + HTB_QLOCK(sch); tcm->tcm_parent = cl->parent ? cl->parent->classid : TC_H_ROOT; tcm->tcm_handle = cl->classid; if (!cl->level && cl->un.leaf.q) tcm->tcm_info = cl->un.leaf.q->handle; - rta = (struct rtattr *)b; + rta = (struct rtattr*)b; RTA_PUT(skb, TCA_OPTIONS, 0, NULL); - memset(&opt, 0, sizeof(opt)); + memset (&opt,0,sizeof(opt)); - opt.rate = cl->rate->rate; - opt.buffer = cl->buffer; - opt.ceil = cl->ceil->rate; - opt.cbuffer = cl->cbuffer; - opt.quantum = cl->un.leaf.quantum; - opt.prio = cl->un.leaf.prio; - opt.level = cl->level; + opt.rate = cl->rate->rate; opt.buffer = cl->buffer; + opt.ceil = cl->ceil->rate; opt.cbuffer = cl->cbuffer; + opt.quantum = cl->un.leaf.quantum; opt.prio = cl->un.leaf.prio; + opt.level = cl->level; RTA_PUT(skb, TCA_HTB_PARMS, sizeof(opt), &opt); rta->rta_len = skb->tail - b; - spin_unlock_bh(&sch->dev->queue_lock); + HTB_QUNLOCK(sch); return skb->len; rtattr_failure: - spin_unlock_bh(&sch->dev->queue_lock); + HTB_QUNLOCK(sch); skb_trim(skb, b - skb->data); return -1; } static int -htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) +htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, + struct gnet_dump *d) { - struct htb_class *cl = (struct htb_class *)arg; + struct htb_class *cl = (struct htb_class*)arg; #ifdef HTB_RATECM - cl->rate_est.bps = cl->rate_bytes / (HTB_EWMAC * HTB_HSIZE); - cl->rate_est.pps = cl->rate_packets / (HTB_EWMAC * HTB_HSIZE); + cl->rate_est.bps = cl->rate_bytes/(HTB_EWMAC*HTB_HSIZE); + cl->rate_est.pps = cl->rate_packets/(HTB_EWMAC*HTB_HSIZE); #endif if (!cl->level && cl->un.leaf.q) @@ -1219,22 +1379,21 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) } static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, - struct Qdisc **old) + struct Qdisc **old) { - struct htb_class *cl = (struct htb_class *)arg; + struct htb_class *cl = (struct htb_class*)arg; if (cl && !cl->level) { - if (new == NULL && (new = qdisc_create_dflt(sch->dev, - &pfifo_qdisc_ops)) - == NULL) - return -ENOBUFS; + if (new == NULL && (new = qdisc_create_dflt(sch->dev, + &pfifo_qdisc_ops)) == NULL) + return -ENOBUFS; sch_tree_lock(sch); if ((*old = xchg(&cl->un.leaf.q, new)) != NULL) { if (cl->prio_activity) - htb_deactivate(qdisc_priv(sch), cl); + htb_deactivate (qdisc_priv(sch),cl); /* TODO: is it correct ? Why CBQ doesn't do it ? */ - sch->q.qlen -= (*old)->q.qlen; + sch->q.qlen -= (*old)->q.qlen; qdisc_reset(*old); } sch_tree_unlock(sch); @@ -1243,16 +1402,20 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, return -ENOENT; } -static struct Qdisc *htb_leaf(struct Qdisc *sch, unsigned long arg) +static struct Qdisc * htb_leaf(struct Qdisc *sch, unsigned long arg) { - struct htb_class *cl = (struct htb_class *)arg; + struct htb_class *cl = (struct htb_class*)arg; return (cl && !cl->level) ? cl->un.leaf.q : NULL; } static unsigned long htb_get(struct Qdisc *sch, u32 classid) { - struct htb_class *cl = htb_find(classid, sch); - if (cl) +#ifdef HTB_DEBUG + struct htb_sched *q = qdisc_priv(sch); +#endif + struct htb_class *cl = htb_find(classid,sch); + HTB_DBG(0,1,"htb_get clid=%X q=%p cl=%p ref=%d\n",classid,q,cl,cl?cl->refcnt:0); + if (cl) cl->refcnt++; return (unsigned long)cl; } @@ -1267,9 +1430,10 @@ static void htb_destroy_filters(struct tcf_proto **fl) } } -static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) +static void htb_destroy_class(struct Qdisc* sch,struct htb_class *cl) { struct htb_sched *q = qdisc_priv(sch); + HTB_DBG(0,1,"htb_destrycls clid=%X ref=%d\n", cl?cl->classid:0,cl?cl->refcnt:0); if (!cl->level) { BUG_TRAP(cl->un.leaf.q); sch->q.qlen -= cl->un.leaf.q->q.qlen; @@ -1277,45 +1441,45 @@ static void htb_destroy_class(struct Qdisc *sch, struct htb_class *cl) } qdisc_put_rtab(cl->rate); qdisc_put_rtab(cl->ceil); - - htb_destroy_filters(&cl->filter_list); - - while (!list_empty(&cl->children)) - htb_destroy_class(sch, list_entry(cl->children.next, - struct htb_class, sibling)); + + htb_destroy_filters (&cl->filter_list); + + while (!list_empty(&cl->children)) + htb_destroy_class (sch,list_entry(cl->children.next, + struct htb_class,sibling)); /* note: this delete may happen twice (see htb_delete) */ - if (!hlist_unhashed(&cl->hlist)) - hlist_del(&cl->hlist); + list_del(&cl->hlist); list_del(&cl->sibling); - + if (cl->prio_activity) - htb_deactivate(q, cl); - + htb_deactivate (q,cl); + if (cl->cmode != HTB_CAN_SEND) - htb_safe_rb_erase(&cl->pq_node, q->wait_pq + cl->level); - + htb_safe_rb_erase(&cl->pq_node,q->wait_pq+cl->level); + kfree(cl); } /* always caled under BH & queue lock */ -static void htb_destroy(struct Qdisc *sch) +static void htb_destroy(struct Qdisc* sch) { struct htb_sched *q = qdisc_priv(sch); + HTB_DBG(0,1,"htb_destroy q=%p\n",q); - del_timer_sync(&q->timer); + del_timer_sync (&q->timer); #ifdef HTB_RATECM - del_timer_sync(&q->rttim); + del_timer_sync (&q->rttim); #endif /* This line used to be after htb_destroy_class call below and surprisingly it worked in 2.4. But it must precede it because filter need its target class alive to be able to call unbind_filter on it (without Oops). */ htb_destroy_filters(&q->filter_list); - - while (!list_empty(&q->root)) - htb_destroy_class(sch, list_entry(q->root.next, - struct htb_class, sibling)); + + while (!list_empty(&q->root)) + htb_destroy_class (sch,list_entry(q->root.next, + struct htb_class,sibling)); __skb_queue_purge(&q->direct_queue); } @@ -1323,25 +1487,24 @@ static void htb_destroy(struct Qdisc *sch) static int htb_delete(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class *)arg; + struct htb_class *cl = (struct htb_class*)arg; + HTB_DBG(0,1,"htb_delete q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); // TODO: why don't allow to delete subtree ? references ? does // tc subsys quarantee us that in htb_destroy it holds no class // refs so that we can remove children safely there ? if (!list_empty(&cl->children) || cl->filter_cnt) return -EBUSY; - + sch_tree_lock(sch); - + /* delete from hash and active; remainder in destroy_class */ - if (!hlist_unhashed(&cl->hlist)) - hlist_del(&cl->hlist); - + list_del_init(&cl->hlist); if (cl->prio_activity) - htb_deactivate(q, cl); + htb_deactivate (q,cl); if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + htb_destroy_class(sch,cl); sch_tree_unlock(sch); return 0; @@ -1349,46 +1512,45 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) static void htb_put(struct Qdisc *sch, unsigned long arg) { - struct htb_class *cl = (struct htb_class *)arg; +#ifdef HTB_DEBUG + struct htb_sched *q = qdisc_priv(sch); +#endif + struct htb_class *cl = (struct htb_class*)arg; + HTB_DBG(0,1,"htb_put q=%p cl=%X ref=%d\n",q,cl?cl->classid:0,cl?cl->refcnt:0); if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + htb_destroy_class(sch,cl); } -static int htb_change_class(struct Qdisc *sch, u32 classid, - u32 parentid, struct rtattr **tca, - unsigned long *arg) +static int htb_change_class(struct Qdisc *sch, u32 classid, + u32 parentid, struct rtattr **tca, unsigned long *arg) { int err = -EINVAL; struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = (struct htb_class *)*arg, *parent; - struct rtattr *opt = tca[TCA_OPTIONS - 1]; + struct htb_class *cl = (struct htb_class*)*arg,*parent; + struct rtattr *opt = tca[TCA_OPTIONS-1]; struct qdisc_rate_table *rtab = NULL, *ctab = NULL; struct rtattr *tb[TCA_HTB_RTAB]; struct tc_htb_opt *hopt; /* extract all subattrs from opt attr */ if (!opt || rtattr_parse_nested(tb, TCA_HTB_RTAB, opt) || - tb[TCA_HTB_PARMS - 1] == NULL || - RTA_PAYLOAD(tb[TCA_HTB_PARMS - 1]) < sizeof(*hopt)) + tb[TCA_HTB_PARMS-1] == NULL || + RTA_PAYLOAD(tb[TCA_HTB_PARMS-1]) < sizeof(*hopt)) goto failure; + + parent = parentid == TC_H_ROOT ? NULL : htb_find (parentid,sch); - parent = parentid == TC_H_ROOT ? NULL : htb_find(parentid, sch); - - hopt = RTA_DATA(tb[TCA_HTB_PARMS - 1]); - - rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB - 1]); - ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB - 1]); - if (!rtab || !ctab) - goto failure; + hopt = RTA_DATA(tb[TCA_HTB_PARMS-1]); + HTB_DBG(0,1,"htb_chg cl=%p(%X), clid=%X, parid=%X, opt/prio=%d, rate=%u, buff=%d, quant=%d\n", cl,cl?cl->classid:0,classid,parentid,(int)hopt->prio,hopt->rate.rate,hopt->buffer,hopt->quantum); + rtab = qdisc_get_rtab(&hopt->rate, tb[TCA_HTB_RTAB-1]); + ctab = qdisc_get_rtab(&hopt->ceil, tb[TCA_HTB_CTAB-1]); + if (!rtab || !ctab) goto failure; - if (!cl) { /* new class */ + if (!cl) { /* new class */ struct Qdisc *new_q; - int prio; - /* check for valid classid */ - if (!classid || TC_H_MAJ(classid ^ sch->handle) - || htb_find(classid, sch)) + if (!classid || TC_H_MAJ(classid^sch->handle) || htb_find(classid,sch)) goto failure; /* check maximal depth */ @@ -1399,16 +1561,15 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, err = -ENOBUFS; if ((cl = kzalloc(sizeof(*cl), GFP_KERNEL)) == NULL) goto failure; - + cl->refcnt = 1; INIT_LIST_HEAD(&cl->sibling); - INIT_HLIST_NODE(&cl->hlist); + INIT_LIST_HEAD(&cl->hlist); INIT_LIST_HEAD(&cl->children); INIT_LIST_HEAD(&cl->un.leaf.drop_list); - RB_CLEAR_NODE(&cl->pq_node); - - for (prio = 0; prio < TC_HTB_NUMPRIO; prio++) - RB_CLEAR_NODE(&cl->node[prio]); +#ifdef HTB_DEBUG + cl->magic = HTB_CMAGIC; +#endif /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock @@ -1418,53 +1579,53 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (parent && !parent->level) { /* turn parent into inner node */ sch->q.qlen -= parent->un.leaf.q->q.qlen; - qdisc_destroy(parent->un.leaf.q); - if (parent->prio_activity) - htb_deactivate(q, parent); + qdisc_destroy (parent->un.leaf.q); + if (parent->prio_activity) + htb_deactivate (q,parent); /* remove from evt list because of level change */ if (parent->cmode != HTB_CAN_SEND) { - htb_safe_rb_erase(&parent->pq_node, q->wait_pq); + htb_safe_rb_erase(&parent->pq_node,q->wait_pq /*+0*/); parent->cmode = HTB_CAN_SEND; } parent->level = (parent->parent ? parent->parent->level - : TC_HTB_MAXDEPTH) - 1; - memset(&parent->un.inner, 0, sizeof(parent->un.inner)); + : TC_HTB_MAXDEPTH) - 1; + memset (&parent->un.inner,0,sizeof(parent->un.inner)); } /* leaf (we) needs elementary qdisc */ cl->un.leaf.q = new_q ? new_q : &noop_qdisc; - cl->classid = classid; - cl->parent = parent; + cl->classid = classid; cl->parent = parent; /* set class to be in HTB_CAN_SEND state */ cl->tokens = hopt->buffer; cl->ctokens = hopt->cbuffer; - cl->mbuffer = PSCHED_JIFFIE2US(HZ * 60); /* 1min */ + cl->mbuffer = PSCHED_JIFFIE2US(HZ*60); /* 1min */ PSCHED_GET_TIME(cl->t_c); cl->cmode = HTB_CAN_SEND; /* attach to the hash list and parent's family */ - hlist_add_head(&cl->hlist, q->hash + htb_hash(classid)); - list_add_tail(&cl->sibling, - parent ? &parent->children : &q->root); - } else - sch_tree_lock(sch); + list_add_tail(&cl->hlist, q->hash+htb_hash(classid)); + list_add_tail(&cl->sibling, parent ? &parent->children : &q->root); +#ifdef HTB_DEBUG + { + int i; + for (i = 0; i < TC_HTB_NUMPRIO; i++) cl->node[i].rb_color = -1; + cl->pq_node.rb_color = -1; + } +#endif + } else sch_tree_lock(sch); /* it used to be a nasty bug here, we have to check that node - is really leaf before changing cl->un.leaf ! */ + is really leaf before changing cl->un.leaf ! */ if (!cl->level) { cl->un.leaf.quantum = rtab->rate.rate / q->rate2quantum; if (!hopt->quantum && cl->un.leaf.quantum < 1000) { - printk(KERN_WARNING - "HTB: quantum of class %X is small. Consider r2q change.\n", - cl->classid); + printk(KERN_WARNING "HTB: quantum of class %X is small. Consider r2q change.\n", cl->classid); cl->un.leaf.quantum = 1000; } if (!hopt->quantum && cl->un.leaf.quantum > 200000) { - printk(KERN_WARNING - "HTB: quantum of class %X is big. Consider r2q change.\n", - cl->classid); + printk(KERN_WARNING "HTB: quantum of class %X is big. Consider r2q change.\n", cl->classid); cl->un.leaf.quantum = 200000; } if (hopt->quantum) @@ -1475,22 +1636,16 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, cl->buffer = hopt->buffer; cl->cbuffer = hopt->cbuffer; - if (cl->rate) - qdisc_put_rtab(cl->rate); - cl->rate = rtab; - if (cl->ceil) - qdisc_put_rtab(cl->ceil); - cl->ceil = ctab; + if (cl->rate) qdisc_put_rtab(cl->rate); cl->rate = rtab; + if (cl->ceil) qdisc_put_rtab(cl->ceil); cl->ceil = ctab; sch_tree_unlock(sch); *arg = (unsigned long)cl; return 0; failure: - if (rtab) - qdisc_put_rtab(rtab); - if (ctab) - qdisc_put_rtab(ctab); + if (rtab) qdisc_put_rtab(rtab); + if (ctab) qdisc_put_rtab(ctab); return err; } @@ -1499,28 +1654,28 @@ static struct tcf_proto **htb_find_tcf(struct Qdisc *sch, unsigned long arg) struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; struct tcf_proto **fl = cl ? &cl->filter_list : &q->filter_list; - + HTB_DBG(0,2,"htb_tcf q=%p clid=%X fref=%d fl=%p\n",q,cl?cl->classid:0,cl?cl->filter_cnt:q->filter_cnt,*fl); return fl; } static unsigned long htb_bind_filter(struct Qdisc *sch, unsigned long parent, - u32 classid) + u32 classid) { struct htb_sched *q = qdisc_priv(sch); - struct htb_class *cl = htb_find(classid, sch); - + struct htb_class *cl = htb_find (classid,sch); + HTB_DBG(0,2,"htb_bind q=%p clid=%X cl=%p fref=%d\n",q,classid,cl,cl?cl->filter_cnt:q->filter_cnt); /*if (cl && !cl->level) return 0; - The line above used to be there to prevent attaching filters to - leaves. But at least tc_index filter uses this just to get class - for other reasons so that we have to allow for it. - ---- - 19.6.2002 As Werner explained it is ok - bind filter is just - another way to "lock" the class - unlike "get" this lock can - be broken by class during destroy IIUC. + The line above used to be there to prevent attaching filters to + leaves. But at least tc_index filter uses this just to get class + for other reasons so that we have to allow for it. + ---- + 19.6.2002 As Werner explained it is ok - bind filter is just + another way to "lock" the class - unlike "get" this lock can + be broken by class during destroy IIUC. */ - if (cl) - cl->filter_cnt++; - else + if (cl) + cl->filter_cnt++; + else q->filter_cnt++; return (unsigned long)cl; } @@ -1529,10 +1684,10 @@ static void htb_unbind_filter(struct Qdisc *sch, unsigned long arg) { struct htb_sched *q = qdisc_priv(sch); struct htb_class *cl = (struct htb_class *)arg; - - if (cl) - cl->filter_cnt--; - else + HTB_DBG(0,2,"htb_unbind q=%p cl=%p fref=%d\n",q,cl,cl?cl->filter_cnt:q->filter_cnt); + if (cl) + cl->filter_cnt--; + else q->filter_cnt--; } @@ -1545,10 +1700,9 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < HTB_HSIZE; i++) { - struct hlist_node *p; - struct htb_class *cl; - - hlist_for_each_entry(cl, p, q->hash + i, hlist) { + struct list_head *p; + list_for_each (p,q->hash+i) { + struct htb_class *cl = list_entry(p,struct htb_class,hlist); if (arg->count < arg->skip) { arg->count++; continue; @@ -1596,13 +1750,12 @@ static struct Qdisc_ops htb_qdisc_ops = { static int __init htb_module_init(void) { - return register_qdisc(&htb_qdisc_ops); + return register_qdisc(&htb_qdisc_ops); } -static void __exit htb_module_exit(void) +static void __exit htb_module_exit(void) { - unregister_qdisc(&htb_qdisc_ops); + unregister_qdisc(&htb_qdisc_ops); } - module_init(htb_module_init) module_exit(htb_module_exit) MODULE_LICENSE("GPL"); diff --git a/trunk/net/sched/sch_netem.c b/trunk/net/sched/sch_netem.c index 45939bafbdf8..a08ec4c7c55d 100644 --- a/trunk/net/sched/sch_netem.c +++ b/trunk/net/sched/sch_netem.c @@ -192,8 +192,8 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ if (q->corrupt && q->corrupt >= get_crandom(&q->corrupt_cor)) { if (!(skb = skb_unshare(skb, GFP_ATOMIC)) - || (skb->ip_summed == CHECKSUM_PARTIAL - && skb_checksum_help(skb))) { + || (skb->ip_summed == CHECKSUM_HW + && skb_checksum_help(skb, 0))) { sch->qstats.drops++; return NET_XMIT_DROP; } diff --git a/trunk/net/sctp/input.c b/trunk/net/sctp/input.c index 03f65de75d88..42b66e74bbb5 100644 --- a/trunk/net/sctp/input.c +++ b/trunk/net/sctp/input.c @@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb)) + if (sk_filter(sk, skb, 1)) goto discard_release; /* Create an SCTP packet structure. */ @@ -255,13 +255,10 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); - if (sock_owned_by_user(sk)) { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); + if (sock_owned_by_user(sk)) sctp_add_backlog(sk, skb); - } else { - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); + else sctp_inq_push(&chunk->rcvr->inqueue, chunk); - } sctp_bh_unlock_sock(sk); @@ -274,7 +271,6 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: - SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; diff --git a/trunk/net/sctp/inqueue.c b/trunk/net/sctp/inqueue.c index cf6deed7e849..cf0c767d43ae 100644 --- a/trunk/net/sctp/inqueue.c +++ b/trunk/net/sctp/inqueue.c @@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ -void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) +void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) { /* Directly call the packet handling routine. */ @@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - list_add_tail(&chunk->list, &q->in_chunk_list); + list_add_tail(&packet->list, &q->in_chunk_list); q->immediate.func(q->immediate.data); } diff --git a/trunk/net/sctp/ipv6.c b/trunk/net/sctp/ipv6.c index 249e5033c1a8..99c0cefc04e0 100644 --- a/trunk/net/sctp/ipv6.c +++ b/trunk/net/sctp/ipv6.c @@ -78,6 +78,7 @@ #include +extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *); static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inetaddr_event, }; @@ -321,9 +322,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; - rcu_read_lock(); + read_lock(&addrconf_lock); if ((in6_dev = __in6_dev_get(dev)) == NULL) { - rcu_read_unlock(); + read_unlock(&addrconf_lock); return; } @@ -342,7 +343,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_unlock(&in6_dev->lock); - rcu_read_unlock(); + read_unlock(&addrconf_lock); } /* Initialize a sockaddr_storage from in incoming skb. */ diff --git a/trunk/net/sctp/outqueue.c b/trunk/net/sctp/outqueue.c index 37074a39ecbb..30b710c54e64 100644 --- a/trunk/net/sctp/outqueue.c +++ b/trunk/net/sctp/outqueue.c @@ -467,7 +467,6 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, switch(reason) { case SCTP_RTXR_T3_RTX: - SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -476,15 +475,12 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); break; case SCTP_RTXR_FAST_RTX: - SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: - SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); - break; default: - BUG(); + break; } sctp_retransmit_mark(q, transport, fast_retransmit); diff --git a/trunk/net/sctp/proc.c b/trunk/net/sctp/proc.c index a356d8d310a9..5b3b0e0ae7e5 100644 --- a/trunk/net/sctp/proc.c +++ b/trunk/net/sctp/proc.c @@ -57,21 +57,6 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), - SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), - SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), - SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), - SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), - SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), - SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), - SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), - SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), - SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), - SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), - SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), - SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), - SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), - SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), - SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; @@ -343,8 +328,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, - assoc->sndbuf_used, (sk->sk_rcvbuf - assoc->rwnd), + assoc->sndbuf_used, sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/trunk/net/sctp/protocol.c b/trunk/net/sctp/protocol.c index fac7674438a4..1ab03a27a76e 100644 --- a/trunk/net/sctp/protocol.c +++ b/trunk/net/sctp/protocol.c @@ -61,7 +61,7 @@ #include /* Global data structures. */ -struct sctp_globals sctp_globals __read_mostly; +struct sctp_globals sctp_globals; struct proc_dir_entry *proc_net_sctp; DEFINE_SNMP_STAT(struct sctp_mib, sctp_statistics) __read_mostly; @@ -82,6 +82,13 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep __read_mostly; kmem_cache_t *sctp_bucket_cachep __read_mostly; +extern int sctp_snmp_proc_init(void); +extern int sctp_snmp_proc_exit(void); +extern int sctp_eps_proc_init(void); +extern int sctp_eps_proc_exit(void); +extern int sctp_assocs_proc_init(void); +extern int sctp_assocs_proc_exit(void); + /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { @@ -1042,7 +1049,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_rto_beta = SCTP_RTO_BETA; /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; + sctp_valid_cookie_life = 60 * HZ; /* Whether Cookie Preservative is enabled(1) or not(0) */ sctp_cookie_preserve_enable = 1; diff --git a/trunk/net/sctp/sm_statefuns.c b/trunk/net/sctp/sm_statefuns.c index 1c42fe983a5b..5b5ae7958322 100644 --- a/trunk/net/sctp/sm_statefuns.c +++ b/trunk/net/sctp/sm_statefuns.c @@ -187,9 +187,10 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_SHUTDOWN_COMP, 0, 0, 0, GFP_ATOMIC); - if (ev) - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); + if (!ev) + goto nomem; + + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Upon reception of the SHUTDOWN COMPLETE chunk the endpoint * will verify that it is in SHUTDOWN-ACK-SENT state, if it is @@ -214,6 +215,9 @@ sctp_disposition_t sctp_sf_do_4_C(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; + +nomem: + return SCTP_DISPOSITION_NOMEM; } /* @@ -343,6 +347,8 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, GFP_ATOMIC)) goto nomem_init; + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + /* B) "Z" shall respond immediately with an INIT ACK chunk. */ /* If there are errors need to be reported for unknown parameters, @@ -354,11 +360,11 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sizeof(sctp_chunkhdr_t); if (sctp_assoc_set_bind_addr_from_ep(new_asoc, GFP_ATOMIC) < 0) - goto nomem_init; + goto nomem_ack; repl = sctp_make_init_ack(new_asoc, chunk, GFP_ATOMIC, len); if (!repl) - goto nomem_init; + goto nomem_ack; /* If there are errors need to be reported for unknown parameters, * include them in the outgoing INIT ACK as "Unrecognized parameter" @@ -382,8 +388,6 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, sctp_chunk_free(err_chunk); } - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* @@ -396,11 +400,12 @@ sctp_disposition_t sctp_sf_do_5_1B_init(const struct sctp_endpoint *ep, return SCTP_DISPOSITION_DELETE_TCB; +nomem_ack: + if (err_chunk) + sctp_chunk_free(err_chunk); nomem_init: sctp_association_free(new_asoc); nomem: - if (err_chunk) - sctp_chunk_free(err_chunk); return SCTP_DISPOSITION_NOMEM; } @@ -595,7 +600,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, struct sctp_association *new_asoc; sctp_init_chunk_t *peer_init; struct sctp_chunk *repl; - struct sctp_ulpevent *ev, *ai_ev = NULL; + struct sctp_ulpevent *ev; int error = 0; struct sctp_chunk *err_chk_p; @@ -654,10 +659,20 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, }; } + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, + SCTP_STATE(SCTP_STATE_ESTABLISHED)); + SCTP_INC_STATS(SCTP_MIB_CURRESTAB); + SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); + sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - /* Delay state machine commands until later. - * - * Re-build the bind address for the association is done in + if (new_asoc->autoclose) + sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, + SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); + + sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); + + /* Re-build the bind address for the association is done in * the sctp_unpack_cookie() already. */ /* This is a brand-new association, so these are not yet side @@ -672,7 +687,9 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) - goto nomem_init; + goto nomem_repl; + + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); /* RFC 2960 5.1 Normal Establishment of an Association * @@ -687,53 +704,28 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter , SCTP * delivers this notification to inform the application that of the * peers requested adaption layer. */ if (new_asoc->peer.adaption_ind) { - ai_ev = sctp_ulpevent_make_adaption_indication(new_asoc, + ev = sctp_ulpevent_make_adaption_indication(new_asoc, GFP_ATOMIC); - if (!ai_ev) - goto nomem_aiev; - } - - /* Add all the state machine commands now since we've created - * everything. This way we don't introduce memory corruptions - * during side-effect processing and correclty count established - * associations. - */ - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_ASOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, - SCTP_STATE(SCTP_STATE_ESTABLISHED)); - SCTP_INC_STATS(SCTP_MIB_CURRESTAB); - SCTP_INC_STATS(SCTP_MIB_PASSIVEESTABS); - sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL()); - - if (new_asoc->autoclose) - sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_START, - SCTP_TO(SCTP_EVENT_TIMEOUT_AUTOCLOSE)); - - sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); - - /* This will send the COOKIE ACK */ - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); - - /* Queue the ASSOC_CHANGE event */ - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); + if (!ev) + goto nomem_ev; - /* Send up the Adaptation Layer Indication event */ - if (ai_ev) sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ai_ev)); + SCTP_ULPEVENT(ev)); + } return SCTP_DISPOSITION_CONSUME; -nomem_aiev: - sctp_ulpevent_free(ev); nomem_ev: sctp_chunk_free(repl); +nomem_repl: nomem_init: sctp_association_free(new_asoc); nomem: @@ -1368,8 +1360,10 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_process_init(new_asoc, chunk->chunk_hdr->type, sctp_source(chunk), (sctp_init_chunk_t *)chunk->chunk_hdr, - GFP_ATOMIC)) - goto nomem; + GFP_ATOMIC)) { + retval = SCTP_DISPOSITION_NOMEM; + goto nomem_init; + } /* Make sure no new addresses are being added during the * restart. Do not do this check for COOKIE-WAIT state, @@ -1380,7 +1374,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( if (!sctp_sf_check_restart_addrs(new_asoc, asoc, chunk, commands)) { retval = SCTP_DISPOSITION_CONSUME; - goto nomem_retval; + goto cleanup_asoc; } } @@ -1436,17 +1430,17 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); retval = SCTP_DISPOSITION_CONSUME; - return retval; - -nomem: - retval = SCTP_DISPOSITION_NOMEM; -nomem_retval: - if (new_asoc) - sctp_association_free(new_asoc); cleanup: if (err_chunk) sctp_chunk_free(err_chunk); return retval; +nomem: + retval = SCTP_DISPOSITION_NOMEM; + goto cleanup; +nomem_init: +cleanup_asoc: + sctp_association_free(new_asoc); + goto cleanup; } /* @@ -1617,10 +1611,15 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, */ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL()); + /* Update the content of current association. */ + sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); + repl = sctp_make_cookie_ack(new_asoc, chunk); if (!repl) goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); + /* Report association restart to upper layer. */ ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0, new_asoc->c.sinit_num_ostreams, @@ -1629,9 +1628,6 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep, if (!ev) goto nomem_ev; - /* Update the content of current association. */ - sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); return SCTP_DISPOSITION_CONSUME; @@ -1755,7 +1751,7 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, sctp_cmd_seq_t *commands, struct sctp_association *new_asoc) { - struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; + struct sctp_ulpevent *ev = NULL; struct sctp_chunk *repl; /* Clarification from Implementor's Guide: @@ -1782,25 +1778,29 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, * SCTP user upon reception of a valid COOKIE * ECHO chunk. */ - ev = sctp_ulpevent_make_assoc_change(asoc, 0, + ev = sctp_ulpevent_make_assoc_change(new_asoc, 0, SCTP_COMM_UP, 0, - asoc->c.sinit_num_ostreams, - asoc->c.sinit_max_instreams, + new_asoc->c.sinit_num_ostreams, + new_asoc->c.sinit_max_instreams, GFP_ATOMIC); if (!ev) goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); /* Sockets API Draft Section 5.3.1.6 * When a peer sends a Adaption Layer Indication parameter, * SCTP delivers this notification to inform the application * that of the peers requested adaption layer. */ - if (asoc->peer.adaption_ind) { - ai_ev = sctp_ulpevent_make_adaption_indication(asoc, + if (new_asoc->peer.adaption_ind) { + ev = sctp_ulpevent_make_adaption_indication(new_asoc, GFP_ATOMIC); - if (!ai_ev) + if (!ev) goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, + SCTP_ULPEVENT(ev)); } } sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); @@ -1809,21 +1809,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_d(const struct sctp_endpoint *ep, if (!repl) goto nomem; - if (ev) - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ev)); - if (ai_ev) - sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, - SCTP_ULPEVENT(ai_ev)); - sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); sctp_add_cmd_sf(commands, SCTP_CMD_TRANSMIT, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; nomem: - if (ai_ev) - sctp_ulpevent_free(ai_ev); if (ev) sctp_ulpevent_free(ev); return SCTP_DISPOSITION_NOMEM; @@ -2672,11 +2663,9 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: - SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3028,6 +3017,7 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!sctp_chunk_length_valid(chunk, sizeof(sctp_chunkhdr_t))) return sctp_sf_violation_chunklen(ep, asoc, type, arg, commands); + /* 10.2 H) SHUTDOWN COMPLETE notification * * When SCTP completes the shutdown procedures (section 9.2) this @@ -3038,14 +3028,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, if (!ev) goto nomem; - /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ - reply = sctp_make_shutdown_complete(asoc, chunk); - if (!reply) - goto nomem_chunk; - - /* Do all the commands now (after allocation), so that we - * have consistent state if memory allocation failes - */ sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); /* Upon the receipt of the SHUTDOWN ACK, the SHUTDOWN sender shall @@ -3057,6 +3039,11 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD)); + /* ...send a SHUTDOWN COMPLETE chunk to its peer, */ + reply = sctp_make_shutdown_complete(asoc, chunk); + if (!reply) + goto nomem; + sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE, SCTP_STATE(SCTP_STATE_CLOSED)); SCTP_INC_STATS(SCTP_MIB_SHUTDOWNS); @@ -3067,8 +3054,6 @@ sctp_disposition_t sctp_sf_do_9_2_final(const struct sctp_endpoint *ep, sctp_add_cmd_sf(commands, SCTP_CMD_DELETE_TCB, SCTP_NULL()); return SCTP_DISPOSITION_DELETE_TCB; -nomem_chunk: - sctp_ulpevent_free(ev); nomem: return SCTP_DISPOSITION_NOMEM; } @@ -3667,7 +3652,6 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4564,8 +4548,6 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; - SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); - if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4634,7 +4616,6 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { - SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -4669,7 +4650,6 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -4729,7 +4709,6 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); - SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -4774,8 +4753,6 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); - if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4837,8 +4814,6 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; - SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); - /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in * RFC2960 [5] section 8.1 and 8.2. @@ -4905,7 +4880,6 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); - SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -4936,8 +4910,6 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; - SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); - /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper * layer, the endpoint enters SHUTDOWN-PENDING state and diff --git a/trunk/net/sctp/socket.c b/trunk/net/sctp/socket.c index 79c3e072cf28..85caf7963886 100644 --- a/trunk/net/sctp/socket.c +++ b/trunk/net/sctp/socket.c @@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. */ -static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, - struct sctp_transport *trans, - struct sctp_association *asoc, - struct sctp_sock *sp, - int hb_change, - int pmtud_change, - int sackdelay_change) +int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) { int error; @@ -2970,7 +2970,7 @@ SCTP_STATIC struct sock *sctp_accept(struct sock *sk, int flags, int *err) goto out; } - timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); + timeo = sock_rcvtimeo(sk, sk->sk_socket->file->f_flags & O_NONBLOCK); error = sctp_wait_for_accept(sk, timeo); if (error) @@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = sctp_rto_max; + sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = sctp_rto_initial; - sp->rtoinfo.srto_max = sctp_rto_max; - sp->rtoinfo.srto_min = sctp_rto_min; + sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); + sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); + sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. @@ -3061,7 +3061,8 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; + sp->assocparams.sasoc_cookie_life = + jiffies_to_msecs(sctp_valid_cookie_life); /* Initialize default event subscriptions. By default, all the * options are off. @@ -3071,10 +3072,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = sctp_hb_interval; + sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = sctp_sack_timeout; + sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; @@ -5618,8 +5619,6 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, /* Copy the bind_addr list from the original endpoint to the new * endpoint so that we can handle restarts properly */ - if (PF_INET6 == assoc->base.sk->sk_family) - flags = SCTP_ADDR6_ALLOWED; if (assoc->peer.ipv4_address) flags |= SCTP_ADDR4_PEERSUPP; if (assoc->peer.ipv6_address) diff --git a/trunk/net/sctp/sysctl.c b/trunk/net/sctp/sysctl.c index 633cd178654b..dc6f3ff32358 100644 --- a/trunk/net/sctp/sysctl.c +++ b/trunk/net/sctp/sysctl.c @@ -45,10 +45,9 @@ #include #include -static int zero = 0; -static int one = 1; -static int timer_max = 86400000; /* ms in one day */ -static int int_max = INT_MAX; +static ctl_handler sctp_sysctl_jiffies_ms; +static long rto_timer_min = 1; +static long rto_timer_max = 86400000; /* One day */ static long sack_timer_min = 1; static long sack_timer_max = 500; @@ -57,45 +56,45 @@ static ctl_table sctp_table[] = { .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &timer_max + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, + .extra1 = &rto_timer_min, + .extra2 = &rto_timer_max }, { .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &timer_max + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, + .extra1 = &rto_timer_min, + .extra2 = &rto_timer_max }, { .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &timer_max + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, + .extra1 = &rto_timer_min, + .extra2 = &rto_timer_max }, { .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &timer_max + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, + .extra1 = &rto_timer_min, + .extra2 = &rto_timer_max }, { .ctl_name = NET_SCTP_MAX_BURST, @@ -103,10 +102,7 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &zero, - .extra2 = &int_max + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, @@ -114,10 +110,7 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &int_max + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_SNDBUF_POLICY, @@ -125,8 +118,7 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -134,8 +126,7 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -143,10 +134,7 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &int_max + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, @@ -154,21 +142,18 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &int_max + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, - .maxlen = sizeof(unsigned int), + .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &one, - .extra2 = &timer_max + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, + .extra1 = &rto_timer_min, + .extra2 = &rto_timer_max }, { .ctl_name = NET_SCTP_PRESERVE_ENABLE, @@ -176,26 +161,23 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .mode = 0644, + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .mode = 0644, + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -203,8 +185,7 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -212,8 +193,7 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec, - .strategy = &sysctl_intvec + .proc_handler = &proc_dointvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -221,8 +201,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, + .proc_handler = &proc_doulongvec_ms_jiffies_minmax, + .strategy = &sctp_sysctl_jiffies_ms, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -262,3 +242,37 @@ void sctp_sysctl_unregister(void) { unregister_sysctl_table(sctp_sysctl_header); } + +/* Strategy function to convert jiffies to milliseconds. */ +static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, + void __user *oldval, size_t __user *oldlenp, + void __user *newval, size_t newlen, void **context) { + + if (oldval) { + size_t olen; + + if (oldlenp) { + if (get_user(olen, oldlenp)) + return -EFAULT; + + if (olen != sizeof (int)) + return -EINVAL; + } + if (put_user((*(int *)(table->data) * 1000) / HZ, + (int __user *)oldval) || + (oldlenp && put_user(sizeof (int), oldlenp))) + return -EFAULT; + } + if (newval && newlen) { + int new; + + if (newlen != sizeof (int)) + return -EINVAL; + + if (get_user(new, (int __user *)newval)) + return -EFAULT; + + *(int *)(table->data) = (new * HZ) / 1000; + } + return 1; +} diff --git a/trunk/net/sctp/transport.c b/trunk/net/sctp/transport.c index 3e5936a5f671..2763aa93de1a 100644 --- a/trunk/net/sctp/transport.c +++ b/trunk/net/sctp/transport.c @@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rtt = 0; - peer->rto = msecs_to_jiffies(sctp_rto_initial); + peer->rto = sctp_rto_initial; peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; diff --git a/trunk/net/socket.c b/trunk/net/socket.c index 1bc4167e0da8..6d261bf206fc 100644 --- a/trunk/net/socket.c +++ b/trunk/net/socket.c @@ -42,7 +42,7 @@ * Andi Kleen : Some small cleanups, optimizations, * and fixed a copy_from_user() bug. * Tigran Aivazian : sys_send(args) calls sys_sendto(args, NULL, 0) - * Tigran Aivazian : Made listen(2) backlog sanity checks + * Tigran Aivazian : Made listen(2) backlog sanity checks * protocol-independent * * @@ -53,17 +53,17 @@ * * * This module is effectively the top level interface to the BSD socket - * paradigm. + * paradigm. * * Based upon Swansea University Computer Society NET3.039 */ #include +#include #include #include #include #include -#include #include #include #include @@ -96,24 +96,25 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare); static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); + size_t size, loff_t pos); static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); -static int sock_mmap(struct file *file, struct vm_area_struct *vma); + size_t size, loff_t pos); +static int sock_mmap(struct file *file, struct vm_area_struct * vma); static int sock_close(struct inode *inode, struct file *file); static unsigned int sock_poll(struct file *file, struct poll_table_struct *wait); -static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg); +static long sock_ioctl(struct file *file, + unsigned int cmd, unsigned long arg); #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, - unsigned int cmd, unsigned long arg); + unsigned int cmd, unsigned long arg); #endif static int sock_fasync(int fd, struct file *filp, int on); static ssize_t sock_readv(struct file *file, const struct iovec *vector, unsigned long count, loff_t *ppos); static ssize_t sock_writev(struct file *file, const struct iovec *vector, - unsigned long count, loff_t *ppos); + unsigned long count, loff_t *ppos); static ssize_t sock_sendpage(struct file *file, struct page *page, int offset, size_t size, loff_t *ppos, int more); @@ -146,8 +147,52 @@ static struct file_operations socket_file_ops = { * The protocol list. Each protocol is registered in here. */ +static struct net_proto_family *net_families[NPROTO]; + +#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT) +static atomic_t net_family_lockct = ATOMIC_INIT(0); static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; + +/* The strategy is: modifications net_family vector are short, do not + sleep and veeery rare, but read access should be free of any exclusive + locks. + */ + +static void net_family_write_lock(void) +{ + spin_lock(&net_family_lock); + while (atomic_read(&net_family_lockct) != 0) { + spin_unlock(&net_family_lock); + + yield(); + + spin_lock(&net_family_lock); + } +} + +static __inline__ void net_family_write_unlock(void) +{ + spin_unlock(&net_family_lock); +} + +static __inline__ void net_family_read_lock(void) +{ + atomic_inc(&net_family_lockct); + spin_unlock_wait(&net_family_lock); +} + +static __inline__ void net_family_read_unlock(void) +{ + atomic_dec(&net_family_lockct); +} + +#else +#define net_family_write_lock() do { } while(0) +#define net_family_write_unlock() do { } while(0) +#define net_family_read_lock() do { } while(0) +#define net_family_read_unlock() do { } while(0) +#endif + /* * Statistics counters of the socket lists @@ -156,20 +201,19 @@ static const struct net_proto_family *net_families[NPROTO] __read_mostly; static DEFINE_PER_CPU(int, sockets_in_use) = 0; /* - * Support routines. - * Move socket addresses back and forth across the kernel/user - * divide and look after the messy bits. + * Support routines. Move socket addresses back and forth across the kernel/user + * divide and look after the messy bits. */ -#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - +#define MAX_SOCK_ADDR 128 /* 108 for Unix domain - 16 for IP, 16 for IPX, 24 for IPv6, - about 80 for AX.25 + about 80 for AX.25 must be at least one bigger than the AF_UNIX size (see net/unix/af_unix.c - :unix_mkname()). + :unix_mkname()). */ - + /** * move_addr_to_kernel - copy a socket address into kernel space * @uaddr: Address in user space @@ -183,11 +227,11 @@ static DEFINE_PER_CPU(int, sockets_in_use) = 0; int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) { - if (ulen < 0 || ulen > MAX_SOCK_ADDR) + if(ulen<0||ulen>MAX_SOCK_ADDR) return -EINVAL; - if (ulen == 0) + if(ulen==0) return 0; - if (copy_from_user(kaddr, uaddr, ulen)) + if(copy_from_user(kaddr,uaddr,ulen)) return -EFAULT; return audit_sockaddr(ulen, kaddr); } @@ -208,52 +252,51 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, void *kaddr) * length of the data is written over the length limit the user * specified. Zero is returned for a success. */ - -int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, - int __user *ulen) + +int move_addr_to_user(void *kaddr, int klen, void __user *uaddr, int __user *ulen) { int err; int len; - err = get_user(len, ulen); - if (err) + if((err=get_user(len, ulen))) return err; - if (len > klen) - len = klen; - if (len < 0 || len > MAX_SOCK_ADDR) + if(len>klen) + len=klen; + if(len<0 || len> MAX_SOCK_ADDR) return -EINVAL; - if (len) { + if(len) + { if (audit_sockaddr(klen, kaddr)) return -ENOMEM; - if (copy_to_user(uaddr, kaddr, len)) + if(copy_to_user(uaddr,kaddr,len)) return -EFAULT; } /* - * "fromlen shall refer to the value before truncation.." - * 1003.1g + * "fromlen shall refer to the value before truncation.." + * 1003.1g */ return __put_user(klen, ulen); } #define SOCKFS_MAGIC 0x534F434B -static kmem_cache_t *sock_inode_cachep __read_mostly; +static kmem_cache_t * sock_inode_cachep __read_mostly; static struct inode *sock_alloc_inode(struct super_block *sb) { struct socket_alloc *ei; - - ei = kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); + ei = (struct socket_alloc *)kmem_cache_alloc(sock_inode_cachep, SLAB_KERNEL); if (!ei) return NULL; init_waitqueue_head(&ei->socket.wait); - + ei->socket.fasync_list = NULL; ei->socket.state = SS_UNCONNECTED; ei->socket.flags = 0; ei->socket.ops = NULL; ei->socket.sk = NULL; ei->socket.file = NULL; + ei->socket.flags = 0; return &ei->vfs_inode; } @@ -264,25 +307,22 @@ static void sock_destroy_inode(struct inode *inode) container_of(inode, struct socket_alloc, vfs_inode)); } -static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags) +static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags) { - struct socket_alloc *ei = (struct socket_alloc *)foo; + struct socket_alloc *ei = (struct socket_alloc *) foo; - if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) - == SLAB_CTOR_CONSTRUCTOR) + if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) == + SLAB_CTOR_CONSTRUCTOR) inode_init_once(&ei->vfs_inode); } - + static int init_inodecache(void) { sock_inode_cachep = kmem_cache_create("sock_inode_cache", - sizeof(struct socket_alloc), - 0, - (SLAB_HWCACHE_ALIGN | - SLAB_RECLAIM_ACCOUNT | - SLAB_MEM_SPREAD), - init_once, - NULL); + sizeof(struct socket_alloc), + 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT| + SLAB_MEM_SPREAD), + init_once, NULL); if (sock_inode_cachep == NULL) return -ENOMEM; return 0; @@ -295,8 +335,7 @@ static struct super_operations sockfs_ops = { }; static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) + int flags, const char *dev_name, void *data, struct vfsmount *mnt) { return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, mnt); @@ -309,13 +348,12 @@ static struct file_system_type sock_fs_type = { .get_sb = sockfs_get_sb, .kill_sb = kill_anon_super, }; - static int sockfs_delete_dentry(struct dentry *dentry) { return 1; } static struct dentry_operations sockfs_dentry_operations = { - .d_delete = sockfs_delete_dentry, + .d_delete = sockfs_delete_dentry, }; /* @@ -439,12 +477,10 @@ struct socket *sockfd_lookup(int fd, int *err) struct file *file; struct socket *sock; - file = fget(fd); - if (!file) { + if (!(file = fget(fd))) { *err = -EBADF; return NULL; } - sock = sock_from_file(file, err); if (!sock) fput(file); @@ -469,7 +505,7 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) /** * sock_alloc - allocate a socket - * + * * Allocate a new inode and socket object. The two are bound together * and initialised. The socket is then returned. If we are out of inodes * NULL is returned. @@ -477,8 +513,8 @@ static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed) static struct socket *sock_alloc(void) { - struct inode *inode; - struct socket *sock; + struct inode * inode; + struct socket * sock; inode = new_inode(sock_mnt->mnt_sb); if (!inode) @@ -486,7 +522,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); - inode->i_mode = S_IFSOCK | S_IRWXUGO; + inode->i_mode = S_IFSOCK|S_IRWXUGO; inode->i_uid = current->fsuid; inode->i_gid = current->fsgid; @@ -500,7 +536,7 @@ static struct socket *sock_alloc(void) * a back door. Remember to keep it shut otherwise you'll let the * creepy crawlies in. */ - + static int sock_no_open(struct inode *irrelevant, struct file *dontcare) { return -ENXIO; @@ -517,9 +553,9 @@ const struct file_operations bad_sock_fops = { * * The socket is released from the protocol stack if it has a release * callback, and the inode is then released if the socket is bound to - * an inode not a file. + * an inode not a file. */ - + void sock_release(struct socket *sock) { if (sock->ops) { @@ -539,10 +575,10 @@ void sock_release(struct socket *sock) iput(SOCK_INODE(sock)); return; } - sock->file = NULL; + sock->file=NULL; } -static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { struct sock_iocb *si = kiocb_to_siocb(iocb); @@ -585,14 +621,14 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec; + msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; result = sock_sendmsg(sock, msg, size); set_fs(oldfs); return result; } -static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, +static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size, int flags) { int err; @@ -611,14 +647,14 @@ static inline int __sock_recvmsg(struct kiocb *iocb, struct socket *sock, return sock->ops->recvmsg(iocb, sock, msg, size, flags); } -int sock_recvmsg(struct socket *sock, struct msghdr *msg, +int sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { struct kiocb iocb; struct sock_iocb siocb; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = __sock_recvmsg(&iocb, sock, msg, size, flags); if (-EIOCBQUEUED == ret) @@ -626,8 +662,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, return ret; } -int kernel_recvmsg(struct socket *sock, struct msghdr *msg, - struct kvec *vec, size_t num, size_t size, int flags) +int kernel_recvmsg(struct socket *sock, struct msghdr *msg, + struct kvec *vec, size_t num, + size_t size, int flags) { mm_segment_t oldfs = get_fs(); int result; @@ -637,7 +674,8 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, * the following is safe, since for compiler definitions of kvec and * iovec are identical, yielding the same in-core layout and alignment */ - msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num; + msg->msg_iov = (struct iovec *)vec, + msg->msg_iovlen = num; result = sock_recvmsg(sock, msg, size, flags); set_fs(oldfs); return result; @@ -664,8 +702,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, - struct sock_iocb *siocb) + char __user *ubuf, size_t size, struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { siocb = kmalloc(sizeof(*siocb), GFP_KERNEL); @@ -683,21 +720,20 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, struct iovec *iov, unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; + for (i = 0 ; i < nr_segs ; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; + msg->msg_iov = (struct iovec *) iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; @@ -712,7 +748,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, struct msghdr msg; int ret; - init_sync_kiocb(&iocb, NULL); + init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); @@ -722,7 +758,7 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -735,25 +771,24 @@ static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, if (!x) return -ENOMEM; return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, struct iovec *iov, unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; int i; - for (i = 0; i < nr_segs; i++) - size += iov[i].iov_len; + for (i = 0 ; i < nr_segs ; i++) + size += iov[i].iov_len; msg->msg_name = NULL; msg->msg_namelen = 0; msg->msg_control = NULL; msg->msg_controllen = 0; - msg->msg_iov = (struct iovec *)iov; + msg->msg_iov = (struct iovec *) iov; msg->msg_iovlen = nr_segs; msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0; if (sock->type == SOCK_SEQPACKET) @@ -780,7 +815,7 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, } static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) + size_t count, loff_t pos) { struct sock_iocb siocb, *x; @@ -794,48 +829,46 @@ static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, return -ENOMEM; return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + &x->async_iov, 1); } + /* * Atomic setting of ioctl hooks to avoid race * with module unload. */ static DEFINE_MUTEX(br_ioctl_mutex); -static int (*br_ioctl_hook) (unsigned int cmd, void __user *arg) = NULL; +static int (*br_ioctl_hook)(unsigned int cmd, void __user *arg) = NULL; -void brioctl_set(int (*hook) (unsigned int, void __user *)) +void brioctl_set(int (*hook)(unsigned int, void __user *)) { mutex_lock(&br_ioctl_mutex); br_ioctl_hook = hook; mutex_unlock(&br_ioctl_mutex); } - EXPORT_SYMBOL(brioctl_set); static DEFINE_MUTEX(vlan_ioctl_mutex); -static int (*vlan_ioctl_hook) (void __user *arg); +static int (*vlan_ioctl_hook)(void __user *arg); -void vlan_ioctl_set(int (*hook) (void __user *)) +void vlan_ioctl_set(int (*hook)(void __user *)) { mutex_lock(&vlan_ioctl_mutex); vlan_ioctl_hook = hook; mutex_unlock(&vlan_ioctl_mutex); } - EXPORT_SYMBOL(vlan_ioctl_set); static DEFINE_MUTEX(dlci_ioctl_mutex); -static int (*dlci_ioctl_hook) (unsigned int, void __user *); +static int (*dlci_ioctl_hook)(unsigned int, void __user *); -void dlci_ioctl_set(int (*hook) (unsigned int, void __user *)) +void dlci_ioctl_set(int (*hook)(unsigned int, void __user *)) { mutex_lock(&dlci_ioctl_mutex); dlci_ioctl_hook = hook; mutex_unlock(&dlci_ioctl_mutex); } - EXPORT_SYMBOL(dlci_ioctl_set); /* @@ -857,8 +890,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST) { err = dev_ioctl(cmd, argp); } else -#endif /* CONFIG_WIRELESS_EXT */ - switch (cmd) { +#endif /* CONFIG_WIRELESS_EXT */ + switch (cmd) { case FIOSETOWN: case SIOCSPGRP: err = -EFAULT; @@ -868,8 +901,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case FIOGETOWN: case SIOCGPGRP: - err = put_user(sock->file->f_owner.pid, - (int __user *)argp); + err = put_user(sock->file->f_owner.pid, (int __user *)argp); break; case SIOCGIFBR: case SIOCSIFBR: @@ -880,7 +912,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) request_module("bridge"); mutex_lock(&br_ioctl_mutex); - if (br_ioctl_hook) + if (br_ioctl_hook) err = br_ioctl_hook(cmd, argp); mutex_unlock(&br_ioctl_mutex); break; @@ -897,7 +929,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) break; case SIOCGIFDIVERT: case SIOCSIFDIVERT: - /* Convert this to call through a hook */ + /* Convert this to call through a hook */ err = divert_ioctl(cmd, argp); break; case SIOCADDDLCI: @@ -922,7 +954,7 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg) if (err == -ENOIOCTLCMD) err = dev_ioctl(cmd, argp); break; - } + } return err; } @@ -930,7 +962,7 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) { int err; struct socket *sock = NULL; - + err = security_socket_create(family, type, protocol, 1); if (err) goto out; @@ -941,33 +973,26 @@ int sock_create_lite(int family, int type, int protocol, struct socket **res) goto out; } + security_socket_post_create(sock, family, type, protocol, 1); sock->type = type; - err = security_socket_post_create(sock, family, type, protocol, 1); - if (err) - goto out_release; - out: *res = sock; return err; -out_release: - sock_release(sock); - sock = NULL; - goto out; } /* No kernel lock held - perfect */ -static unsigned int sock_poll(struct file *file, poll_table *wait) +static unsigned int sock_poll(struct file *file, poll_table * wait) { struct socket *sock; /* - * We can't return errors to poll, so it's either yes or no. + * We can't return errors to poll, so it's either yes or no. */ sock = file->private_data; return sock->ops->poll(file, sock, wait); } -static int sock_mmap(struct file *file, struct vm_area_struct *vma) +static int sock_mmap(struct file * file, struct vm_area_struct * vma) { struct socket *sock = file->private_data; @@ -977,11 +1002,12 @@ static int sock_mmap(struct file *file, struct vm_area_struct *vma) static int sock_close(struct inode *inode, struct file *filp) { /* - * It was possible the inode is NULL we were - * closing an unfinished socket. + * It was possible the inode is NULL we were + * closing an unfinished socket. */ - if (!inode) { + if (!inode) + { printk(KERN_DEBUG "sock_close: NULL inode\n"); return 0; } @@ -1007,52 +1033,57 @@ static int sock_close(struct inode *inode, struct file *filp) static int sock_fasync(int fd, struct file *filp, int on) { - struct fasync_struct *fa, *fna = NULL, **prev; + struct fasync_struct *fa, *fna=NULL, **prev; struct socket *sock; struct sock *sk; - if (on) { + if (on) + { fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL); - if (fna == NULL) + if(fna==NULL) return -ENOMEM; } sock = filp->private_data; - sk = sock->sk; - if (sk == NULL) { + if ((sk=sock->sk) == NULL) { kfree(fna); return -EINVAL; } lock_sock(sk); - prev = &(sock->fasync_list); + prev=&(sock->fasync_list); - for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) - if (fa->fa_file == filp) + for (fa=*prev; fa!=NULL; prev=&fa->fa_next,fa=*prev) + if (fa->fa_file==filp) break; - if (on) { - if (fa != NULL) { + if(on) + { + if(fa!=NULL) + { write_lock_bh(&sk->sk_callback_lock); - fa->fa_fd = fd; + fa->fa_fd=fd; write_unlock_bh(&sk->sk_callback_lock); kfree(fna); goto out; } - fna->fa_file = filp; - fna->fa_fd = fd; - fna->magic = FASYNC_MAGIC; - fna->fa_next = sock->fasync_list; + fna->fa_file=filp; + fna->fa_fd=fd; + fna->magic=FASYNC_MAGIC; + fna->fa_next=sock->fasync_list; write_lock_bh(&sk->sk_callback_lock); - sock->fasync_list = fna; + sock->fasync_list=fna; write_unlock_bh(&sk->sk_callback_lock); - } else { - if (fa != NULL) { + } + else + { + if (fa!=NULL) + { write_lock_bh(&sk->sk_callback_lock); - *prev = fa->fa_next; + *prev=fa->fa_next; write_unlock_bh(&sk->sk_callback_lock); kfree(fa); } @@ -1069,9 +1100,10 @@ int sock_wake_async(struct socket *sock, int how, int band) { if (!sock || !sock->fasync_list) return -1; - switch (how) { + switch (how) + { case 1: - + if (test_bit(SOCK_ASYNC_WAITDATA, &sock->flags)) break; goto call_kill; @@ -1080,7 +1112,7 @@ int sock_wake_async(struct socket *sock, int how, int band) break; /* fall through */ case 0: -call_kill: + call_kill: __kill_fasync(sock->fasync_list, SIGIO, band); break; case 3: @@ -1089,15 +1121,13 @@ int sock_wake_async(struct socket *sock, int how, int band) return 0; } -static int __sock_create(int family, int type, int protocol, - struct socket **res, int kern) +static int __sock_create(int family, int type, int protocol, struct socket **res, int kern) { int err; struct socket *sock; - const struct net_proto_family *pf; /* - * Check protocol is in range + * Check protocol is in range */ if (family < 0 || family >= NPROTO) return -EAFNOSUPPORT; @@ -1110,11 +1140,10 @@ static int __sock_create(int family, int type, int protocol, deadlock in module load. */ if (family == PF_INET && type == SOCK_PACKET) { - static int warned; + static int warned; if (!warned) { warned = 1; - printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", - current->comm); + printk(KERN_INFO "%s uses obsolete (PF_INET,SOCK_PACKET)\n", current->comm); } family = PF_PACKET; } @@ -1122,84 +1151,79 @@ static int __sock_create(int family, int type, int protocol, err = security_socket_create(family, type, protocol, kern); if (err) return err; - - /* - * Allocate the socket and allow the family to set things up. if - * the protocol is 0, the family is instructed to select an appropriate - * default. - */ - sock = sock_alloc(); - if (!sock) { - if (net_ratelimit()) - printk(KERN_WARNING "socket: no more sockets\n"); - return -ENFILE; /* Not exactly a match, but its the - closest posix thing */ - } - - sock->type = type; - + #if defined(CONFIG_KMOD) - /* Attempt to load a protocol module if the find failed. - * - * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user + /* Attempt to load a protocol module if the find failed. + * + * 12/09/1996 Marcin: But! this makes REALLY only sense, if the user * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) - request_module("net-pf-%d", family); + if (net_families[family]==NULL) + { + request_module("net-pf-%d",family); + } #endif - rcu_read_lock(); - pf = rcu_dereference(net_families[family]); - err = -EAFNOSUPPORT; - if (!pf) - goto out_release; + net_family_read_lock(); + if (net_families[family] == NULL) { + err = -EAFNOSUPPORT; + goto out; + } + +/* + * Allocate the socket and allow the family to set things up. if + * the protocol is 0, the family is instructed to select an appropriate + * default. + */ + + if (!(sock = sock_alloc())) { + if (net_ratelimit()) + printk(KERN_WARNING "socket: no more sockets\n"); + err = -ENFILE; /* Not exactly a match, but its the + closest posix thing */ + goto out; + } + + sock->type = type; /* * We will call the ->create function, that possibly is in a loadable * module, so we have to bump that loadable module refcnt first. */ - if (!try_module_get(pf->owner)) + err = -EAFNOSUPPORT; + if (!try_module_get(net_families[family]->owner)) goto out_release; - /* Now protected by module ref count */ - rcu_read_unlock(); - - err = pf->create(sock, protocol); - if (err < 0) + if ((err = net_families[family]->create(sock, protocol)) < 0) { + sock->ops = NULL; goto out_module_put; + } /* * Now to bump the refcnt of the [loadable] module that owns this * socket at sock_release time we decrement its refcnt. */ - if (!try_module_get(sock->ops->owner)) - goto out_module_busy; - + if (!try_module_get(sock->ops->owner)) { + sock->ops = NULL; + goto out_module_put; + } /* * Now that we're done with the ->create function, the [loadable] * module can have its refcnt decremented */ - module_put(pf->owner); - err = security_socket_post_create(sock, family, type, protocol, kern); - if (err) - goto out_release; + module_put(net_families[family]->owner); *res = sock; + security_socket_post_create(sock, family, type, protocol, kern); - return 0; - -out_module_busy: - err = -EAFNOSUPPORT; -out_module_put: - sock->ops = NULL; - module_put(pf->owner); -out_sock_release: - sock_release(sock); +out: + net_family_read_unlock(); return err; - +out_module_put: + module_put(net_families[family]->owner); out_release: - rcu_read_unlock(); - goto out_sock_release; + sock_release(sock); + goto out; } int sock_create(int family, int type, int protocol, struct socket **res) @@ -1238,8 +1262,7 @@ asmlinkage long sys_socket(int family, int type, int protocol) * Create a pair of connected sockets. */ -asmlinkage long sys_socketpair(int family, int type, int protocol, - int __user *usockvec) +asmlinkage long sys_socketpair(int family, int type, int protocol, int __user *usockvec) { struct socket *sock1, *sock2; int fd1, fd2, err; @@ -1258,7 +1281,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, goto out_release_1; err = sock1->ops->socketpair(sock1, sock2); - if (err < 0) + if (err < 0) goto out_release_both; fd1 = fd2 = -1; @@ -1277,7 +1300,7 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, * Not kernel problem. */ - err = put_user(fd1, &usockvec[0]); + err = put_user(fd1, &usockvec[0]); if (!err) err = put_user(fd2, &usockvec[1]); if (!err) @@ -1288,18 +1311,19 @@ asmlinkage long sys_socketpair(int family, int type, int protocol, return err; out_close_1: - sock_release(sock2); + sock_release(sock2); sys_close(fd1); return err; out_release_both: - sock_release(sock2); + sock_release(sock2); out_release_1: - sock_release(sock1); + sock_release(sock1); out: return err; } + /* * Bind a name to a socket. Nothing much to do here since it's * the protocol's responsibility to handle the local address. @@ -1314,39 +1338,35 @@ asmlinkage long sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen) char address[MAX_SOCK_ADDR]; int err, fput_needed; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if(sock) { - err = move_addr_to_kernel(umyaddr, addrlen, address); - if (err >= 0) { - err = security_socket_bind(sock, - (struct sockaddr *)address, - addrlen); + if((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) + { + if((err=move_addr_to_kernel(umyaddr,addrlen,address))>=0) { + err = security_socket_bind(sock, (struct sockaddr *)address, addrlen); if (!err) err = sock->ops->bind(sock, - (struct sockaddr *) - address, addrlen); + (struct sockaddr *)address, addrlen); } fput_light(sock->file, fput_needed); - } + } return err; } + /* * Perform a listen. Basically, we allow the protocol to do anything * necessary for a listen, and if that works, we mark the socket as * ready for listening. */ -int sysctl_somaxconn __read_mostly = SOMAXCONN; +int sysctl_somaxconn = SOMAXCONN; asmlinkage long sys_listen(int fd, int backlog) { struct socket *sock; int err, fput_needed; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock) { - if ((unsigned)backlog > sysctl_somaxconn) + + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { + if ((unsigned) backlog > sysctl_somaxconn) backlog = sysctl_somaxconn; err = security_socket_listen(sock, backlog); @@ -1358,6 +1378,7 @@ asmlinkage long sys_listen(int fd, int backlog) return err; } + /* * For accept, we attempt to create a new socket, set up the link * with the client, wake up the client, then return the new @@ -1370,8 +1391,7 @@ asmlinkage long sys_listen(int fd, int backlog) * clean when we restucture accept also. */ -asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, - int __user *upeer_addrlen) +asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, int __user *upeer_addrlen) { struct socket *sock, *newsock; struct file *newfile; @@ -1383,7 +1403,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out; err = -ENFILE; - if (!(newsock = sock_alloc())) + if (!(newsock = sock_alloc())) goto out_put; newsock->type = sock->type; @@ -1415,13 +1435,11 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out_fd; if (upeer_sockaddr) { - if (newsock->ops->getname(newsock, (struct sockaddr *)address, - &len, 2) < 0) { + if(newsock->ops->getname(newsock, (struct sockaddr *)address, &len, 2)<0) { err = -ECONNABORTED; goto out_fd; } - err = move_addr_to_user(address, len, upeer_sockaddr, - upeer_addrlen); + err = move_addr_to_user(address, len, upeer_sockaddr, upeer_addrlen); if (err < 0) goto out_fd; } @@ -1443,6 +1461,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, goto out_put; } + /* * Attempt to connect to a socket with the server address. The address * is in user space so we verify it is OK and move it to kernel space. @@ -1455,8 +1474,7 @@ asmlinkage long sys_accept(int fd, struct sockaddr __user *upeer_sockaddr, * include the -EINPROGRESS status for such sockets. */ -asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, - int addrlen) +asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, int addrlen) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1469,12 +1487,11 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, if (err < 0) goto out_put; - err = - security_socket_connect(sock, (struct sockaddr *)address, addrlen); + err = security_socket_connect(sock, (struct sockaddr *)address, addrlen); if (err) goto out_put; - err = sock->ops->connect(sock, (struct sockaddr *)address, addrlen, + err = sock->ops->connect(sock, (struct sockaddr *) address, addrlen, sock->file->f_flags); out_put: fput_light(sock->file, fput_needed); @@ -1487,13 +1504,12 @@ asmlinkage long sys_connect(int fd, struct sockaddr __user *uservaddr, * name to user space. */ -asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - + sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1518,27 +1534,22 @@ asmlinkage long sys_getsockname(int fd, struct sockaddr __user *usockaddr, * name to user space. */ -asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, - int __user *usockaddr_len) +asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, int __user *usockaddr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; int len, err, fput_needed; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { err = security_socket_getpeername(sock); if (err) { fput_light(sock->file, fput_needed); return err; } - err = - sock->ops->getname(sock, (struct sockaddr *)address, &len, - 1); + err = sock->ops->getname(sock, (struct sockaddr *)address, &len, 1); if (!err) - err = move_addr_to_user(address, len, usockaddr, - usockaddr_len); + err=move_addr_to_user(address,len, usockaddr, usockaddr_len); fput_light(sock->file, fput_needed); } return err; @@ -1550,9 +1561,8 @@ asmlinkage long sys_getpeername(int fd, struct sockaddr __user *usockaddr, * the protocol. */ -asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, - unsigned flags, struct sockaddr __user *addr, - int addr_len) +asmlinkage long sys_sendto(int fd, void __user * buff, size_t len, unsigned flags, + struct sockaddr __user *addr, int addr_len) { struct socket *sock; char address[MAX_SOCK_ADDR]; @@ -1569,55 +1579,54 @@ asmlinkage long sys_sendto(int fd, void __user *buff, size_t len, sock = sock_from_file(sock_file, &err); if (!sock) goto out_put; - iov.iov_base = buff; - iov.iov_len = len; - msg.msg_name = NULL; - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_namelen = 0; + iov.iov_base=buff; + iov.iov_len=len; + msg.msg_name=NULL; + msg.msg_iov=&iov; + msg.msg_iovlen=1; + msg.msg_control=NULL; + msg.msg_controllen=0; + msg.msg_namelen=0; if (addr) { err = move_addr_to_kernel(addr, addr_len, address); if (err < 0) goto out_put; - msg.msg_name = address; - msg.msg_namelen = addr_len; + msg.msg_name=address; + msg.msg_namelen=addr_len; } if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; msg.msg_flags = flags; err = sock_sendmsg(sock, &msg, len); -out_put: +out_put: fput_light(sock_file, fput_needed); return err; } /* - * Send a datagram down a socket. + * Send a datagram down a socket. */ -asmlinkage long sys_send(int fd, void __user *buff, size_t len, unsigned flags) +asmlinkage long sys_send(int fd, void __user * buff, size_t len, unsigned flags) { return sys_sendto(fd, buff, len, flags, NULL, 0); } /* - * Receive a frame from the socket and optionally record the address of the + * Receive a frame from the socket and optionally record the address of the * sender. We verify the buffers are writable and if needed move the * sender address from kernel to user space. */ -asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, - unsigned flags, struct sockaddr __user *addr, - int __user *addr_len) +asmlinkage long sys_recvfrom(int fd, void __user * ubuf, size_t size, unsigned flags, + struct sockaddr __user *addr, int __user *addr_len) { struct socket *sock; struct iovec iov; struct msghdr msg; char address[MAX_SOCK_ADDR]; - int err, err2; + int err,err2; struct file *sock_file; int fput_needed; @@ -1629,22 +1638,23 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, if (!sock) goto out; - msg.msg_control = NULL; - msg.msg_controllen = 0; - msg.msg_iovlen = 1; - msg.msg_iov = &iov; - iov.iov_len = size; - iov.iov_base = ubuf; - msg.msg_name = address; - msg.msg_namelen = MAX_SOCK_ADDR; + msg.msg_control=NULL; + msg.msg_controllen=0; + msg.msg_iovlen=1; + msg.msg_iov=&iov; + iov.iov_len=size; + iov.iov_base=ubuf; + msg.msg_name=address; + msg.msg_namelen=MAX_SOCK_ADDR; if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; - err = sock_recvmsg(sock, &msg, size, flags); + err=sock_recvmsg(sock, &msg, size, flags); - if (err >= 0 && addr != NULL) { - err2 = move_addr_to_user(address, msg.msg_namelen, addr, addr_len); - if (err2 < 0) - err = err2; + if(err >= 0 && addr != NULL) + { + err2=move_addr_to_user(address, msg.msg_namelen, addr, addr_len); + if(err2<0) + err=err2; } out: fput_light(sock_file, fput_needed); @@ -1652,11 +1662,10 @@ asmlinkage long sys_recvfrom(int fd, void __user *ubuf, size_t size, } /* - * Receive a datagram from a socket. + * Receive a datagram from a socket. */ -asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, - unsigned flags) +asmlinkage long sys_recv(int fd, void __user * ubuf, size_t size, unsigned flags) { return sys_recvfrom(fd, ubuf, size, flags, NULL, NULL); } @@ -1666,29 +1675,24 @@ asmlinkage long sys_recv(int fd, void __user *ubuf, size_t size, * to pass the user mode parameter for the protocols to sort out. */ -asmlinkage long sys_setsockopt(int fd, int level, int optname, - char __user *optval, int optlen) +asmlinkage long sys_setsockopt(int fd, int level, int optname, char __user *optval, int optlen) { int err, fput_needed; struct socket *sock; if (optlen < 0) return -EINVAL; - - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { - err = security_socket_setsockopt(sock, level, optname); + + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) + { + err = security_socket_setsockopt(sock,level,optname); if (err) goto out_put; if (level == SOL_SOCKET) - err = - sock_setsockopt(sock, level, optname, optval, - optlen); + err=sock_setsockopt(sock,level,optname,optval,optlen); else - err = - sock->ops->setsockopt(sock, level, optname, optval, - optlen); + err=sock->ops->setsockopt(sock, level, optname, optval, optlen); out_put: fput_light(sock->file, fput_needed); } @@ -1700,32 +1704,27 @@ asmlinkage long sys_setsockopt(int fd, int level, int optname, * to pass a user mode parameter for the protocols to sort out. */ -asmlinkage long sys_getsockopt(int fd, int level, int optname, - char __user *optval, int __user *optlen) +asmlinkage long sys_getsockopt(int fd, int level, int optname, char __user *optval, int __user *optlen) { int err, fput_needed; struct socket *sock; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed)) != NULL) { err = security_socket_getsockopt(sock, level, optname); if (err) goto out_put; if (level == SOL_SOCKET) - err = - sock_getsockopt(sock, level, optname, optval, - optlen); + err=sock_getsockopt(sock,level,optname,optval,optlen); else - err = - sock->ops->getsockopt(sock, level, optname, optval, - optlen); + err=sock->ops->getsockopt(sock, level, optname, optval, optlen); out_put: fput_light(sock->file, fput_needed); } return err; } + /* * Shutdown a socket. */ @@ -1735,8 +1734,8 @@ asmlinkage long sys_shutdown(int fd, int how) int err, fput_needed; struct socket *sock; - sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (sock != NULL) { + if ((sock = sockfd_lookup_light(fd, &err, &fput_needed))!=NULL) + { err = security_socket_shutdown(sock, how); if (!err) err = sock->ops->shutdown(sock, how); @@ -1745,42 +1744,41 @@ asmlinkage long sys_shutdown(int fd, int how) return err; } -/* A couple of helpful macros for getting the address of the 32/64 bit +/* A couple of helpful macros for getting the address of the 32/64 bit * fields which are the same type (int / unsigned) on our platforms. */ #define COMPAT_MSG(msg, member) ((MSG_CMSG_COMPAT & flags) ? &msg##_compat->member : &msg->member) #define COMPAT_NAMELEN(msg) COMPAT_MSG(msg, msg_namelen) #define COMPAT_FLAGS(msg) COMPAT_MSG(msg, msg_flags) + /* * BSD sendmsg interface */ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) { - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; char address[MAX_SOCK_ADDR]; struct iovec iovstack[UIO_FASTIOV], *iov = iovstack; unsigned char ctl[sizeof(struct cmsghdr) + 20] - __attribute__ ((aligned(sizeof(__kernel_size_t)))); - /* 20 is size of ipv6_pktinfo */ + __attribute__ ((aligned (sizeof(__kernel_size_t)))); + /* 20 is size of ipv6_pktinfo */ unsigned char *ctl_buf = ctl; struct msghdr msg_sys; int err, ctl_len, iov_size, total_len; int fput_needed; - + err = -EFAULT; if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) + } else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); - if (!sock) + if (!sock) goto out; /* do not move before msg_sys is valid */ @@ -1788,7 +1786,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - /* Check whether to allocate the iovec area */ + /* Check whether to allocate the iovec area*/ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1802,7 +1800,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = verify_compat_iovec(&msg_sys, iov, address, VERIFY_READ); } else err = verify_iovec(&msg_sys, iov, address, VERIFY_READ); - if (err < 0) + if (err < 0) goto out_freeiov; total_len = err; @@ -1810,19 +1808,18 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) if (msg_sys.msg_controllen > INT_MAX) goto out_freeiov; - ctl_len = msg_sys.msg_controllen; + ctl_len = msg_sys.msg_controllen; if ((MSG_CMSG_COMPAT & flags) && ctl_len) { - err = - cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, - sizeof(ctl)); + err = cmsghdr_from_user_compat_to_kern(&msg_sys, sock->sk, ctl, sizeof(ctl)); if (err) goto out_freeiov; ctl_buf = msg_sys.msg_control; ctl_len = msg_sys.msg_controllen; } else if (ctl_len) { - if (ctl_len > sizeof(ctl)) { + if (ctl_len > sizeof(ctl)) + { ctl_buf = sock_kmalloc(sock->sk, ctl_len, GFP_KERNEL); - if (ctl_buf == NULL) + if (ctl_buf == NULL) goto out_freeiov; } err = -EFAULT; @@ -1831,8 +1828,7 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) * Afterwards, it will be a kernel pointer. Thus the compiler-assisted * checking falls down on this. */ - if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, - ctl_len)) + if (copy_from_user(ctl_buf, (void __user *) msg_sys.msg_control, ctl_len)) goto out_freectl; msg_sys.msg_control = ctl_buf; } @@ -1843,14 +1839,14 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) err = sock_sendmsg(sock, &msg_sys, total_len); out_freectl: - if (ctl_buf != ctl) + if (ctl_buf != ctl) sock_kfree_s(sock->sk, ctl_buf, ctl_len); out_freeiov: if (iov != iovstack) sock_kfree_s(sock->sk, iov, iov_size); out_put: fput_light(sock->file, fput_needed); -out: +out: return err; } @@ -1858,14 +1854,12 @@ asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags) * BSD recvmsg interface */ -asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, - unsigned int flags) +asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flags) { - struct compat_msghdr __user *msg_compat = - (struct compat_msghdr __user *)msg; + struct compat_msghdr __user *msg_compat = (struct compat_msghdr __user *)msg; struct socket *sock; struct iovec iovstack[UIO_FASTIOV]; - struct iovec *iov = iovstack; + struct iovec *iov=iovstack; struct msghdr msg_sys; unsigned long cmsg_ptr; int err, iov_size, total_len, len; @@ -1877,13 +1871,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, /* user mode address pointers */ struct sockaddr __user *uaddr; int __user *uaddr_len; - + if (MSG_CMSG_COMPAT & flags) { if (get_compat_msghdr(&msg_sys, msg_compat)) return -EFAULT; - } - else if (copy_from_user(&msg_sys, msg, sizeof(struct msghdr))) - return -EFAULT; + } else + if (copy_from_user(&msg_sys,msg,sizeof(struct msghdr))) + return -EFAULT; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) @@ -1892,8 +1886,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, err = -EMSGSIZE; if (msg_sys.msg_iovlen > UIO_MAXIOV) goto out_put; - - /* Check whether to allocate the iovec area */ + + /* Check whether to allocate the iovec area*/ err = -ENOMEM; iov_size = msg_sys.msg_iovlen * sizeof(struct iovec); if (msg_sys.msg_iovlen > UIO_FASTIOV) { @@ -1903,11 +1897,11 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, } /* - * Save the user-mode address (verify_iovec will change the - * kernel msghdr to use the kernel address space) + * Save the user-mode address (verify_iovec will change the + * kernel msghdr to use the kernel address space) */ - - uaddr = (void __user *)msg_sys.msg_name; + + uaddr = (void __user *) msg_sys.msg_name; uaddr_len = COMPAT_NAMELEN(msg); if (MSG_CMSG_COMPAT & flags) { err = verify_compat_iovec(&msg_sys, iov, addr, VERIFY_WRITE); @@ -1915,13 +1909,13 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, err = verify_iovec(&msg_sys, iov, addr, VERIFY_WRITE); if (err < 0) goto out_freeiov; - total_len = err; + total_len=err; cmsg_ptr = (unsigned long)msg_sys.msg_control; msg_sys.msg_flags = 0; if (MSG_CMSG_COMPAT & flags) msg_sys.msg_flags = MSG_CMSG_COMPAT; - + if (sock->file->f_flags & O_NONBLOCK) flags |= MSG_DONTWAIT; err = sock_recvmsg(sock, &msg_sys, total_len, flags); @@ -1930,8 +1924,7 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, len = err; if (uaddr != NULL) { - err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, - uaddr_len); + err = move_addr_to_user(addr, msg_sys.msg_namelen, uaddr, uaddr_len); if (err < 0) goto out_freeiov; } @@ -1940,10 +1933,10 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, if (err) goto out_freeiov; if (MSG_CMSG_COMPAT & flags) - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, &msg_compat->msg_controllen); else - err = __put_user((unsigned long)msg_sys.msg_control - cmsg_ptr, + err = __put_user((unsigned long)msg_sys.msg_control-cmsg_ptr, &msg->msg_controllen); if (err) goto out_freeiov; @@ -1962,187 +1955,163 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, /* Argument list sizes for sys_socketcall */ #define AL(x) ((x) * sizeof(unsigned long)) -static const unsigned char nargs[18]={ - AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), - AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), - AL(6),AL(2),AL(5),AL(5),AL(3),AL(3) -}; - +static unsigned char nargs[18]={AL(0),AL(3),AL(3),AL(3),AL(2),AL(3), + AL(3),AL(3),AL(4),AL(4),AL(4),AL(6), + AL(6),AL(2),AL(5),AL(5),AL(3),AL(3)}; #undef AL /* - * System call vectors. + * System call vectors. * * Argument checking cleaned up. Saved 20% in size. * This function doesn't need to set the kernel lock because - * it is set by the callees. + * it is set by the callees. */ asmlinkage long sys_socketcall(int call, unsigned long __user *args) { unsigned long a[6]; - unsigned long a0, a1; + unsigned long a0,a1; int err; - if (call < 1 || call > SYS_RECVMSG) + if(call<1||call>SYS_RECVMSG) return -EINVAL; /* copy_from_user should be SMP safe. */ if (copy_from_user(a, args, nargs[call])) return -EFAULT; - err = audit_socketcall(nargs[call] / sizeof(unsigned long), a); + err = audit_socketcall(nargs[call]/sizeof(unsigned long), a); if (err) return err; - a0 = a[0]; - a1 = a[1]; - - switch (call) { - case SYS_SOCKET: - err = sys_socket(a0, a1, a[2]); - break; - case SYS_BIND: - err = sys_bind(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_CONNECT: - err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); - break; - case SYS_LISTEN: - err = sys_listen(a0, a1); - break; - case SYS_ACCEPT: - err = - sys_accept(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_GETSOCKNAME: - err = - sys_getsockname(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_GETPEERNAME: - err = - sys_getpeername(a0, (struct sockaddr __user *)a1, - (int __user *)a[2]); - break; - case SYS_SOCKETPAIR: - err = sys_socketpair(a0, a1, a[2], (int __user *)a[3]); - break; - case SYS_SEND: - err = sys_send(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_SENDTO: - err = sys_sendto(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], a[5]); - break; - case SYS_RECV: - err = sys_recv(a0, (void __user *)a1, a[2], a[3]); - break; - case SYS_RECVFROM: - err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], - (struct sockaddr __user *)a[4], - (int __user *)a[5]); - break; - case SYS_SHUTDOWN: - err = sys_shutdown(a0, a1); - break; - case SYS_SETSOCKOPT: - err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); - break; - case SYS_GETSOCKOPT: - err = - sys_getsockopt(a0, a1, a[2], (char __user *)a[3], - (int __user *)a[4]); - break; - case SYS_SENDMSG: - err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]); - break; - case SYS_RECVMSG: - err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]); - break; - default: - err = -EINVAL; - break; + a0=a[0]; + a1=a[1]; + + switch(call) + { + case SYS_SOCKET: + err = sys_socket(a0,a1,a[2]); + break; + case SYS_BIND: + err = sys_bind(a0,(struct sockaddr __user *)a1, a[2]); + break; + case SYS_CONNECT: + err = sys_connect(a0, (struct sockaddr __user *)a1, a[2]); + break; + case SYS_LISTEN: + err = sys_listen(a0,a1); + break; + case SYS_ACCEPT: + err = sys_accept(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); + break; + case SYS_GETSOCKNAME: + err = sys_getsockname(a0,(struct sockaddr __user *)a1, (int __user *)a[2]); + break; + case SYS_GETPEERNAME: + err = sys_getpeername(a0, (struct sockaddr __user *)a1, (int __user *)a[2]); + break; + case SYS_SOCKETPAIR: + err = sys_socketpair(a0,a1, a[2], (int __user *)a[3]); + break; + case SYS_SEND: + err = sys_send(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_SENDTO: + err = sys_sendto(a0,(void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], a[5]); + break; + case SYS_RECV: + err = sys_recv(a0, (void __user *)a1, a[2], a[3]); + break; + case SYS_RECVFROM: + err = sys_recvfrom(a0, (void __user *)a1, a[2], a[3], + (struct sockaddr __user *)a[4], (int __user *)a[5]); + break; + case SYS_SHUTDOWN: + err = sys_shutdown(a0,a1); + break; + case SYS_SETSOCKOPT: + err = sys_setsockopt(a0, a1, a[2], (char __user *)a[3], a[4]); + break; + case SYS_GETSOCKOPT: + err = sys_getsockopt(a0, a1, a[2], (char __user *)a[3], (int __user *)a[4]); + break; + case SYS_SENDMSG: + err = sys_sendmsg(a0, (struct msghdr __user *) a1, a[2]); + break; + case SYS_RECVMSG: + err = sys_recvmsg(a0, (struct msghdr __user *) a1, a[2]); + break; + default: + err = -EINVAL; + break; } return err; } -#endif /* __ARCH_WANT_SYS_SOCKETCALL */ +#endif /* __ARCH_WANT_SYS_SOCKETCALL */ -/** - * sock_register - add a socket protocol handler - * @ops: description of protocol - * +/* * This function is called by a protocol handler that wants to * advertise its address family, and have it linked into the - * socket interface. The value ops->family coresponds to the - * socket system call protocol family. + * SOCKET module. */ -int sock_register(const struct net_proto_family *ops) + +int sock_register(struct net_proto_family *ops) { int err; if (ops->family >= NPROTO) { - printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, - NPROTO); + printk(KERN_CRIT "protocol %d >= NPROTO(%d)\n", ops->family, NPROTO); return -ENOBUFS; } - - spin_lock(&net_family_lock); - if (net_families[ops->family]) - err = -EEXIST; - else { - net_families[ops->family] = ops; + net_family_write_lock(); + err = -EEXIST; + if (net_families[ops->family] == NULL) { + net_families[ops->family]=ops; err = 0; } - spin_unlock(&net_family_lock); - - printk(KERN_INFO "NET: Registered protocol family %d\n", ops->family); + net_family_write_unlock(); + printk(KERN_INFO "NET: Registered protocol family %d\n", + ops->family); return err; } -/** - * sock_unregister - remove a protocol handler - * @family: protocol family to remove - * +/* * This function is called by a protocol handler that wants to * remove its address family, and have it unlinked from the - * new socket creation. - * - * If protocol handler is a module, then it can use module reference - * counts to protect against new references. If protocol handler is not - * a module then it needs to provide its own protection in - * the ops->create routine. + * SOCKET module. */ -void sock_unregister(int family) -{ - BUG_ON(family < 0 || family >= NPROTO); - spin_lock(&net_family_lock); - net_families[family] = NULL; - spin_unlock(&net_family_lock); - - synchronize_rcu(); +int sock_unregister(int family) +{ + if (family < 0 || family >= NPROTO) + return -1; - printk(KERN_INFO "NET: Unregistered protocol family %d\n", family); + net_family_write_lock(); + net_families[family]=NULL; + net_family_write_unlock(); + printk(KERN_INFO "NET: Unregistered protocol family %d\n", + family); + return 0; } static int __init sock_init(void) { /* - * Initialize sock SLAB cache. + * Initialize sock SLAB cache. */ - + sk_init(); /* - * Initialize skbuff SLAB cache + * Initialize skbuff SLAB cache */ skb_init(); /* - * Initialize the protocols module. + * Initialize the protocols module. */ init_inodecache(); @@ -2168,7 +2137,7 @@ void socket_seq_show(struct seq_file *seq) int counter = 0; for_each_possible_cpu(cpu) - counter += per_cpu(sockets_in_use, cpu); + counter += per_cpu(sockets_in_use, cpu); /* It can be negative, by the way. 8) */ if (counter < 0) @@ -2176,11 +2145,11 @@ void socket_seq_show(struct seq_file *seq) seq_printf(seq, "sockets: used %d\n", counter); } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_PROC_FS */ #ifdef CONFIG_COMPAT static long compat_sock_ioctl(struct file *file, unsigned cmd, - unsigned long arg) + unsigned long arg) { struct socket *sock = file->private_data; int ret = -ENOIOCTLCMD; @@ -2192,109 +2161,6 @@ static long compat_sock_ioctl(struct file *file, unsigned cmd, } #endif -int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen) -{ - return sock->ops->bind(sock, addr, addrlen); -} - -int kernel_listen(struct socket *sock, int backlog) -{ - return sock->ops->listen(sock, backlog); -} - -int kernel_accept(struct socket *sock, struct socket **newsock, int flags) -{ - struct sock *sk = sock->sk; - int err; - - err = sock_create_lite(sk->sk_family, sk->sk_type, sk->sk_protocol, - newsock); - if (err < 0) - goto done; - - err = sock->ops->accept(sock, *newsock, flags); - if (err < 0) { - sock_release(*newsock); - goto done; - } - - (*newsock)->ops = sock->ops; - -done: - return err; -} - -int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen, - int flags) -{ - return sock->ops->connect(sock, addr, addrlen, flags); -} - -int kernel_getsockname(struct socket *sock, struct sockaddr *addr, - int *addrlen) -{ - return sock->ops->getname(sock, addr, addrlen, 0); -} - -int kernel_getpeername(struct socket *sock, struct sockaddr *addr, - int *addrlen) -{ - return sock->ops->getname(sock, addr, addrlen, 1); -} - -int kernel_getsockopt(struct socket *sock, int level, int optname, - char *optval, int *optlen) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); - else - err = sock->ops->getsockopt(sock, level, optname, optval, - optlen); - set_fs(oldfs); - return err; -} - -int kernel_setsockopt(struct socket *sock, int level, int optname, - char *optval, int optlen) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, optval, optlen); - else - err = sock->ops->setsockopt(sock, level, optname, optval, - optlen); - set_fs(oldfs); - return err; -} - -int kernel_sendpage(struct socket *sock, struct page *page, int offset, - size_t size, int flags) -{ - if (sock->ops->sendpage) - return sock->ops->sendpage(sock, page, offset, size, flags); - - return sock_no_sendpage(sock, page, offset, size, flags); -} - -int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) -{ - mm_segment_t oldfs = get_fs(); - int err; - - set_fs(KERNEL_DS); - err = sock->ops->ioctl(sock, cmd, arg); - set_fs(oldfs); - - return err; -} - /* ABI emulation layers need these two */ EXPORT_SYMBOL(move_addr_to_kernel); EXPORT_SYMBOL(move_addr_to_user); @@ -2311,13 +2177,3 @@ EXPORT_SYMBOL(sock_wake_async); EXPORT_SYMBOL(sockfd_lookup); EXPORT_SYMBOL(kernel_sendmsg); EXPORT_SYMBOL(kernel_recvmsg); -EXPORT_SYMBOL(kernel_bind); -EXPORT_SYMBOL(kernel_listen); -EXPORT_SYMBOL(kernel_accept); -EXPORT_SYMBOL(kernel_connect); -EXPORT_SYMBOL(kernel_getsockname); -EXPORT_SYMBOL(kernel_getpeername); -EXPORT_SYMBOL(kernel_getsockopt); -EXPORT_SYMBOL(kernel_setsockopt); -EXPORT_SYMBOL(kernel_sendpage); -EXPORT_SYMBOL(kernel_sock_ioctl); diff --git a/trunk/net/sunrpc/socklib.c b/trunk/net/sunrpc/socklib.c index 6f17527b9e69..eb330d4f66d6 100644 --- a/trunk/net/sunrpc/socklib.c +++ b/trunk/net/sunrpc/socklib.c @@ -168,7 +168,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) return -1; if ((unsigned short)csum_fold(desc.csum)) return -1; - if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE)) + if (unlikely(skb->ip_summed == CHECKSUM_HW)) netdev_rx_csum_fault(skb->dev); return 0; no_checksum: diff --git a/trunk/net/sunrpc/svcsock.c b/trunk/net/sunrpc/svcsock.c index 953aff89bcac..d9a95732df46 100644 --- a/trunk/net/sunrpc/svcsock.c +++ b/trunk/net/sunrpc/svcsock.c @@ -388,7 +388,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) /* send head */ if (slen == xdr->head[0].iov_len) flags = 0; - len = kernel_sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); + len = sock->ops->sendpage(sock, rqstp->rq_respages[0], 0, xdr->head[0].iov_len, flags); if (len != xdr->head[0].iov_len) goto out; slen -= xdr->head[0].iov_len; @@ -400,7 +400,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) while (pglen > 0) { if (slen == size) flags = 0; - result = kernel_sendpage(sock, *ppage, base, size, flags); + result = sock->ops->sendpage(sock, *ppage, base, size, flags); if (result > 0) len += result; if (result != size) @@ -413,7 +413,7 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) } /* send tail */ if (xdr->tail[0].iov_len) { - result = kernel_sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], + result = sock->ops->sendpage(sock, rqstp->rq_respages[rqstp->rq_restailpage], ((unsigned long)xdr->tail[0].iov_base)& (PAGE_SIZE-1), xdr->tail[0].iov_len, 0); @@ -434,10 +434,13 @@ svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr) static int svc_recv_available(struct svc_sock *svsk) { + mm_segment_t oldfs; struct socket *sock = svsk->sk_sock; int avail, err; - err = kernel_sock_ioctl(sock, TIOCINQ, (unsigned long) &avail); + oldfs = get_fs(); set_fs(KERNEL_DS); + err = sock->ops->ioctl(sock, TIOCINQ, (unsigned long) &avail); + set_fs(oldfs); return (err >= 0)? avail : err; } @@ -469,7 +472,7 @@ svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr, int buflen) * at accept time. FIXME */ alen = sizeof(rqstp->rq_addr); - kernel_getpeername(sock, (struct sockaddr *)&rqstp->rq_addr, &alen); + sock->ops->getname(sock, (struct sockaddr *)&rqstp->rq_addr, &alen, 1); dprintk("svc: socket %p recvfrom(%p, %Zu) = %d\n", rqstp->rq_sock, iov[0].iov_base, iov[0].iov_len, len); @@ -755,6 +758,7 @@ svc_tcp_accept(struct svc_sock *svsk) struct svc_serv *serv = svsk->sk_server; struct socket *sock = svsk->sk_sock; struct socket *newsock; + const struct proto_ops *ops; struct svc_sock *newsvsk; int err, slen; @@ -762,23 +766,29 @@ svc_tcp_accept(struct svc_sock *svsk) if (!sock) return; - clear_bit(SK_CONN, &svsk->sk_flags); - err = kernel_accept(sock, &newsock, O_NONBLOCK); - if (err < 0) { + err = sock_create_lite(PF_INET, SOCK_STREAM, IPPROTO_TCP, &newsock); + if (err) { if (err == -ENOMEM) printk(KERN_WARNING "%s: no more sockets!\n", serv->sv_name); - else if (err != -EAGAIN && net_ratelimit()) - printk(KERN_WARNING "%s: accept failed (err %d)!\n", - serv->sv_name, -err); return; } + dprintk("svc: tcp_accept %p allocated\n", newsock); + newsock->ops = ops = sock->ops; + + clear_bit(SK_CONN, &svsk->sk_flags); + if ((err = ops->accept(sock, newsock, O_NONBLOCK)) < 0) { + if (err != -EAGAIN && net_ratelimit()) + printk(KERN_WARNING "%s: accept failed (err %d)!\n", + serv->sv_name, -err); + goto failed; /* aborted connection or whatever */ + } set_bit(SK_CONN, &svsk->sk_flags); svc_sock_enqueue(svsk); slen = sizeof(sin); - err = kernel_getpeername(newsock, (struct sockaddr *) &sin, &slen); + err = ops->getname(newsock, (struct sockaddr *) &sin, &slen, 1); if (err < 0) { if (net_ratelimit()) printk(KERN_WARNING "%s: peername failed (err %d)!\n", @@ -1396,14 +1406,14 @@ svc_create_socket(struct svc_serv *serv, int protocol, struct sockaddr_in *sin) if (sin != NULL) { if (type == SOCK_STREAM) sock->sk->sk_reuse = 1; /* allow address reuse */ - error = kernel_bind(sock, (struct sockaddr *) sin, + error = sock->ops->bind(sock, (struct sockaddr *) sin, sizeof(*sin)); if (error < 0) goto bummer; } if (protocol == IPPROTO_TCP) { - if ((error = kernel_listen(sock, 64)) < 0) + if ((error = sock->ops->listen(sock, 64)) < 0) goto bummer; } diff --git a/trunk/net/sunrpc/xprtsock.c b/trunk/net/sunrpc/xprtsock.c index 897bdd982315..441bd53f5eca 100644 --- a/trunk/net/sunrpc/xprtsock.c +++ b/trunk/net/sunrpc/xprtsock.c @@ -174,6 +174,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a struct page **ppage = xdr->pages; unsigned int len, pglen = xdr->page_len; int err, ret = 0; + ssize_t (*sendpage)(struct socket *, struct page *, int, size_t, int); if (unlikely(!sock)) return -ENOTCONN; @@ -206,6 +207,7 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a base &= ~PAGE_CACHE_MASK; } + sendpage = sock->ops->sendpage ? : sock_no_sendpage; do { int flags = XS_SENDMSG_FLAGS; @@ -218,7 +220,10 @@ static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int a if (pglen != len || xdr->tail[0].iov_len != 0) flags |= MSG_MORE; - err = kernel_sendpage(sock, *ppage, base, len, flags); + /* Hmm... We might be dealing with highmem pages */ + if (PageHighMem(*ppage)) + sendpage = sock_no_sendpage; + err = sendpage(sock, *ppage, base, len, flags); if (ret == 0) ret = err; else if (err > 0) @@ -981,7 +986,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock) do { myaddr.sin_port = htons(port); - err = kernel_bind(sock, (struct sockaddr *) &myaddr, + err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, sizeof(myaddr)); if (err == 0) { xprt->port = port; @@ -1076,7 +1081,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt) */ memset(&any, 0, sizeof(any)); any.sa_family = AF_UNSPEC; - result = kernel_connect(sock, &any, sizeof(any), 0); + result = sock->ops->connect(sock, &any, sizeof(any), 0); if (result) dprintk("RPC: AF_UNSPEC connect return code %d\n", result); @@ -1146,7 +1151,7 @@ static void xs_tcp_connect_worker(void *args) /* Tell the socket layer to start connecting... */ xprt->stat.connect_count++; xprt->stat.connect_start = jiffies; - status = kernel_connect(sock, (struct sockaddr *) &xprt->addr, + status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, sizeof(xprt->addr), O_NONBLOCK); dprintk("RPC: %p connect status %d connected %d sock state %d\n", xprt, -status, xprt_connected(xprt), sock->sk->sk_state); diff --git a/trunk/net/unix/af_unix.c b/trunk/net/unix/af_unix.c index b43a27828df5..de6ec519272e 100644 --- a/trunk/net/unix/af_unix.c +++ b/trunk/net/unix/af_unix.c @@ -117,7 +117,7 @@ #include #include -int sysctl_unix_max_dgram_qlen __read_mostly = 10; +int sysctl_unix_max_dgram_qlen = 10; struct hlist_head unix_socket_table[UNIX_HASH_SIZE + 1]; DEFINE_SPINLOCK(unix_table_lock); @@ -2060,7 +2060,10 @@ static int __init af_unix_init(void) int rc = -1; struct sk_buff *dummy_skb; - BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)); + if (sizeof(struct unix_skb_parms) > sizeof(dummy_skb->cb)) { + printk(KERN_CRIT "%s: panic\n", __FUNCTION__); + goto out; + } rc = proto_register(&unix_proto, 1); if (rc != 0) { diff --git a/trunk/net/xfrm/Kconfig b/trunk/net/xfrm/Kconfig index 0faab6332586..0c1c04322baf 100644 --- a/trunk/net/xfrm/Kconfig +++ b/trunk/net/xfrm/Kconfig @@ -6,24 +6,14 @@ config XFRM depends on NET config XFRM_USER - tristate "Transformation user configuration interface" + tristate "IPsec user configuration interface" depends on INET && XFRM ---help--- - Support for Transformation(XFRM) user configuration interface - like IPsec used by native Linux tools. + Support for IPsec user configuration interface used + by native Linux tools. If unsure, say Y. -config XFRM_SUB_POLICY - bool "Transformation sub policy support (EXPERIMENTAL)" - depends on XFRM && EXPERIMENTAL - ---help--- - Support sub policy for developers. By using sub policy with main - one, two policies can be applied to the same packet at once. - Policy which lives shorter time in kernel should be a sub. - - If unsure, say N. - config NET_KEY tristate "PF_KEY sockets" select XFRM diff --git a/trunk/net/xfrm/Makefile b/trunk/net/xfrm/Makefile index de3c1a625a46..693aac1aa833 100644 --- a/trunk/net/xfrm/Makefile +++ b/trunk/net/xfrm/Makefile @@ -2,7 +2,6 @@ # Makefile for the XFRM subsystem. # -obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_hash.o \ - xfrm_input.o xfrm_algo.o +obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o obj-$(CONFIG_XFRM_USER) += xfrm_user.o diff --git a/trunk/net/xfrm/xfrm_hash.c b/trunk/net/xfrm/xfrm_hash.c deleted file mode 100644 index 37643bb8768a..000000000000 --- a/trunk/net/xfrm/xfrm_hash.c +++ /dev/null @@ -1,41 +0,0 @@ -/* xfrm_hash.c: Common hash table code. - * - * Copyright (C) 2006 David S. Miller (davem@davemloft.net) - */ - -#include -#include -#include -#include -#include -#include - -#include "xfrm_hash.h" - -struct hlist_head *xfrm_hash_alloc(unsigned int sz) -{ - struct hlist_head *n; - - if (sz <= PAGE_SIZE) - n = kmalloc(sz, GFP_KERNEL); - else if (hashdist) - n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); - else - n = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(sz)); - - if (n) - memset(n, 0, sz); - - return n; -} - -void xfrm_hash_free(struct hlist_head *n, unsigned int sz) -{ - if (sz <= PAGE_SIZE) - kfree(n); - else if (hashdist) - vfree(n); - else - free_pages((unsigned long)n, get_order(sz)); -} diff --git a/trunk/net/xfrm/xfrm_hash.h b/trunk/net/xfrm/xfrm_hash.h deleted file mode 100644 index d3abb0b7dc62..000000000000 --- a/trunk/net/xfrm/xfrm_hash.h +++ /dev/null @@ -1,128 +0,0 @@ -#ifndef _XFRM_HASH_H -#define _XFRM_HASH_H - -#include -#include - -static inline unsigned int __xfrm4_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a4); -} - -static inline unsigned int __xfrm6_addr_hash(xfrm_address_t *addr) -{ - return ntohl(addr->a6[2] ^ addr->a6[3]); -} - -static inline unsigned int __xfrm4_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a4 ^ saddr->a4); -} - -static inline unsigned int __xfrm6_daddr_saddr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr) -{ - return ntohl(daddr->a6[2] ^ daddr->a6[3] ^ - saddr->a6[2] ^ saddr->a6[3]); -} - -static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, - u32 reqid, unsigned short family, - unsigned int hmask) -{ - unsigned int h = family ^ reqid; - switch (family) { - case AF_INET: - h ^= __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - case AF_INET6: - h ^= __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - } - return (h ^ (h >> 16)) & hmask; -} - -static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr, - unsigned short family, - unsigned int hmask) -{ - unsigned int h = family; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(saddr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(saddr); - break; - }; - return (h ^ (h >> 16)) & hmask; -} - -static inline unsigned int -__xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family, - unsigned int hmask) -{ - unsigned int h = spi ^ proto; - switch (family) { - case AF_INET: - h ^= __xfrm4_addr_hash(daddr); - break; - case AF_INET6: - h ^= __xfrm6_addr_hash(daddr); - break; - } - return (h ^ (h >> 10) ^ (h >> 20)) & hmask; -} - -static inline unsigned int __idx_hash(u32 index, unsigned int hmask) -{ - return (index ^ (index >> 8)) & hmask; -} - -static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) -{ - xfrm_address_t *daddr = &sel->daddr; - xfrm_address_t *saddr = &sel->saddr; - unsigned int h = 0; - - switch (family) { - case AF_INET: - if (sel->prefixlen_d != 32 || - sel->prefixlen_s != 32) - return hmask + 1; - - h = __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - - case AF_INET6: - if (sel->prefixlen_d != 128 || - sel->prefixlen_s != 128) - return hmask + 1; - - h = __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - -static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) -{ - unsigned int h = 0; - - switch (family) { - case AF_INET: - h = __xfrm4_daddr_saddr_hash(daddr, saddr); - break; - - case AF_INET6: - h = __xfrm6_daddr_saddr_hash(daddr, saddr); - break; - }; - h ^= (h >> 16); - return h & hmask; -} - -extern struct hlist_head *xfrm_hash_alloc(unsigned int sz); -extern void xfrm_hash_free(struct hlist_head *n, unsigned int sz); - -#endif /* _XFRM_HASH_H */ diff --git a/trunk/net/xfrm/xfrm_input.c b/trunk/net/xfrm/xfrm_input.c index dfc90bb1cf1f..891a6090cc09 100644 --- a/trunk/net/xfrm/xfrm_input.c +++ b/trunk/net/xfrm/xfrm_input.c @@ -82,6 +82,8 @@ void __init xfrm_input_init(void) { secpath_cachep = kmem_cache_create("secpath_cache", sizeof(struct sec_path), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); + if (!secpath_cachep) + panic("XFRM: failed to allocate secpath_cache\n"); } diff --git a/trunk/net/xfrm/xfrm_policy.c b/trunk/net/xfrm/xfrm_policy.c index b6e2e79d7261..3da67ca2c3ce 100644 --- a/trunk/net/xfrm/xfrm_policy.c +++ b/trunk/net/xfrm/xfrm_policy.c @@ -22,19 +22,16 @@ #include #include #include -#include #include #include -#include "xfrm_hash.h" - DEFINE_MUTEX(xfrm_cfg_mutex); EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_count); +struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_list); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -42,7 +39,8 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; -static HLIST_HEAD(xfrm_policy_gc_list); +static struct list_head xfrm_policy_gc_list = + LIST_HEAD_INIT(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); @@ -312,10 +310,8 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - INIT_HLIST_NODE(&policy->bydst); - INIT_HLIST_NODE(&policy->byidx); - rwlock_init(&policy->lock); atomic_set(&policy->refcnt, 1); + rwlock_init(&policy->lock); init_timer(&policy->timer); policy->timer.data = (unsigned long)policy; policy->timer.function = xfrm_policy_timer; @@ -361,16 +357,17 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) static void xfrm_policy_gc_task(void *data) { struct xfrm_policy *policy; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; + struct list_head *entry, *tmp; + struct list_head gc_list = LIST_HEAD_INIT(gc_list); spin_lock_bh(&xfrm_policy_gc_lock); - gc_list.first = xfrm_policy_gc_list.first; - INIT_HLIST_HEAD(&xfrm_policy_gc_list); + list_splice_init(&xfrm_policy_gc_list, &gc_list); spin_unlock_bh(&xfrm_policy_gc_lock); - hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) + list_for_each_safe(entry, tmp, &gc_list) { + policy = list_entry(entry, struct xfrm_policy, list); xfrm_policy_gc_kill(policy); + } } /* Rule must be locked. Release descentant resources, announce @@ -392,275 +389,70 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) } spin_lock(&xfrm_policy_gc_lock); - hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); + list_add(&policy->list, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } -struct xfrm_policy_hash { - struct hlist_head *table; - unsigned int hmask; -}; - -static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; -static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; -static struct hlist_head *xfrm_policy_byidx __read_mostly; -static unsigned int xfrm_idx_hmask __read_mostly; -static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; - -static inline unsigned int idx_hash(u32 index) -{ - return __idx_hash(index, xfrm_idx_hmask); -} - -static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) -{ - unsigned int hmask = xfrm_policy_bydst[dir].hmask; - unsigned int hash = __sel_hash(sel, family, hmask); - - return (hash == hmask + 1 ? - &xfrm_policy_inexact[dir] : - xfrm_policy_bydst[dir].table + hash); -} - -static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) -{ - unsigned int hmask = xfrm_policy_bydst[dir].hmask; - unsigned int hash = __addr_hash(daddr, saddr, family, hmask); - - return xfrm_policy_bydst[dir].table + hash; -} - -static void xfrm_dst_hash_transfer(struct hlist_head *list, - struct hlist_head *ndsttable, - unsigned int nhashmask) -{ - struct hlist_node *entry, *tmp; - struct xfrm_policy *pol; - - hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { - unsigned int h; - - h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, - pol->family, nhashmask); - hlist_add_head(&pol->bydst, ndsttable+h); - } -} - -static void xfrm_idx_hash_transfer(struct hlist_head *list, - struct hlist_head *nidxtable, - unsigned int nhashmask) -{ - struct hlist_node *entry, *tmp; - struct xfrm_policy *pol; - - hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { - unsigned int h; - - h = __idx_hash(pol->index, nhashmask); - hlist_add_head(&pol->byidx, nidxtable+h); - } -} - -static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) -{ - return ((old_hmask + 1) << 1) - 1; -} - -static void xfrm_bydst_resize(int dir) -{ - unsigned int hmask = xfrm_policy_bydst[dir].hmask; - unsigned int nhashmask = xfrm_new_hash_mask(hmask); - unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *odst = xfrm_policy_bydst[dir].table; - struct hlist_head *ndst = xfrm_hash_alloc(nsize); - int i; - - if (!ndst) - return; - - write_lock_bh(&xfrm_policy_lock); - - for (i = hmask; i >= 0; i--) - xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); - - xfrm_policy_bydst[dir].table = ndst; - xfrm_policy_bydst[dir].hmask = nhashmask; - - write_unlock_bh(&xfrm_policy_lock); - - xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); -} - -static void xfrm_byidx_resize(int total) -{ - unsigned int hmask = xfrm_idx_hmask; - unsigned int nhashmask = xfrm_new_hash_mask(hmask); - unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); - struct hlist_head *oidx = xfrm_policy_byidx; - struct hlist_head *nidx = xfrm_hash_alloc(nsize); - int i; - - if (!nidx) - return; - - write_lock_bh(&xfrm_policy_lock); - - for (i = hmask; i >= 0; i--) - xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); - - xfrm_policy_byidx = nidx; - xfrm_idx_hmask = nhashmask; - - write_unlock_bh(&xfrm_policy_lock); - - xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); -} - -static inline int xfrm_bydst_should_resize(int dir, int *total) -{ - unsigned int cnt = xfrm_policy_count[dir]; - unsigned int hmask = xfrm_policy_bydst[dir].hmask; - - if (total) - *total += cnt; - - if ((hmask + 1) < xfrm_policy_hashmax && - cnt > hmask) - return 1; - - return 0; -} - -static inline int xfrm_byidx_should_resize(int total) -{ - unsigned int hmask = xfrm_idx_hmask; - - if ((hmask + 1) < xfrm_policy_hashmax && - total > hmask) - return 1; - - return 0; -} - -static DEFINE_MUTEX(hash_resize_mutex); - -static void xfrm_hash_resize(void *__unused) -{ - int dir, total; - - mutex_lock(&hash_resize_mutex); - - total = 0; - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - if (xfrm_bydst_should_resize(dir, &total)) - xfrm_bydst_resize(dir); - } - if (xfrm_byidx_should_resize(total)) - xfrm_byidx_resize(total); - - mutex_unlock(&hash_resize_mutex); -} - -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); - /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(u8 type, int dir) +static u32 xfrm_gen_index(int dir) { + u32 idx; + struct xfrm_policy *p; static u32 idx_generator; for (;;) { - struct hlist_node *entry; - struct hlist_head *list; - struct xfrm_policy *p; - u32 idx; - int found; - idx = (idx_generator | dir); idx_generator += 8; if (idx == 0) idx = 8; - list = xfrm_policy_byidx + idx_hash(idx); - found = 0; - hlist_for_each_entry(p, entry, list, byidx) { - if (p->index == idx) { - found = 1; + for (p = xfrm_policy_list[dir]; p; p = p->next) { + if (p->index == idx) break; - } } - if (!found) + if (!p) return idx; } } -static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) -{ - u32 *p1 = (u32 *) s1; - u32 *p2 = (u32 *) s2; - int len = sizeof(struct xfrm_selector) / sizeof(u32); - int i; - - for (i = 0; i < len; i++) { - if (p1[i] != p2[i]) - return 1; - } - - return 0; -} - int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { - struct xfrm_policy *pol; - struct xfrm_policy *delpol; - struct hlist_head *chain; - struct hlist_node *entry, *newpos, *last; + struct xfrm_policy *pol, **p; + struct xfrm_policy *delpol = NULL; + struct xfrm_policy **newpos = NULL; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(&policy->selector, policy->family, dir); - delpol = NULL; - newpos = NULL; - last = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (!delpol && - pol->type == policy->type && - !selector_cmp(&pol->selector, &policy->selector) && + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { + if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; } + *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - last = &pol->bydst; + p = &pol->next; continue; } if (!newpos) - newpos = &pol->bydst; + newpos = p; if (delpol) break; - last = &pol->bydst; + p = &pol->next; } - if (!newpos) - newpos = last; if (newpos) - hlist_add_after(newpos, &policy->bydst); - else - hlist_add_head(&policy->bydst, chain); + p = newpos; xfrm_pol_hold(policy); - xfrm_policy_count[dir]++; + policy->next = *p; + *p = policy; atomic_inc(&flow_cache_genid); - if (delpol) { - hlist_del(&delpol->bydst); - hlist_del(&delpol->byidx); - xfrm_policy_count[dir]--; - } - policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); - hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); + policy->index = delpol ? delpol->index : xfrm_gen_index(dir); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -669,13 +461,10 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); - else if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; - entry = &policy->bydst; - hlist_for_each_entry_continue(policy, entry, bydst) { + for (policy = policy->next; policy; policy = policy->next) { struct dst_entry *dst; write_lock(&policy->lock); @@ -704,146 +493,87 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { - struct xfrm_policy *pol, *ret; - struct hlist_head *chain; - struct hlist_node *entry; + struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - chain = policy_hash_bysel(sel, sel->family, dir); - ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (pol->type == type && - !selector_cmp(sel, &pol->selector) && - xfrm_sec_ctx_match(ctx, pol->security)) { + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && + (xfrm_sec_ctx_match(ctx, pol->security))) { xfrm_pol_hold(pol); - if (delete) { - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; - } - ret = pol; + if (delete) + *p = pol->next; break; } } write_unlock_bh(&xfrm_policy_lock); - if (ret && delete) { + if (pol && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(ret); + xfrm_policy_kill(pol); } - return ret; + return pol; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) { - struct xfrm_policy *pol, *ret; - struct hlist_head *chain; - struct hlist_node *entry; + struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - chain = xfrm_policy_byidx + idx_hash(id); - ret = NULL; - hlist_for_each_entry(pol, entry, chain, byidx) { - if (pol->type == type && pol->index == id) { + for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + if (pol->index == id) { xfrm_pol_hold(pol); - if (delete) { - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; - } - ret = pol; + if (delete) + *p = pol->next; break; } } write_unlock_bh(&xfrm_policy_lock); - if (ret && delete) { + if (pol && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(ret); + xfrm_policy_kill(pol); } - return ret; + return pol; } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(u8 type) +void xfrm_policy_flush(void) { + struct xfrm_policy *xp; int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - struct xfrm_policy *pol; - struct hlist_node *entry; - int i; - - again1: - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) - continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); + while ((xp = xfrm_policy_list[dir]) != NULL) { + xfrm_policy_list[dir] = xp->next; write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_kill(pol); + xfrm_policy_kill(xp); write_lock_bh(&xfrm_policy_lock); - goto again1; } - - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - again2: - hlist_for_each_entry(pol, entry, - xfrm_policy_bydst[dir].table + i, - bydst) { - if (pol->type != type) - continue; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - write_unlock_bh(&xfrm_policy_lock); - - xfrm_policy_kill(pol); - - write_lock_bh(&xfrm_policy_lock); - goto again2; - } - } - - xfrm_policy_count[dir] = 0; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *pol; - struct hlist_node *entry; - int dir, count, error; + struct xfrm_policy *xp; + int dir; + int count = 0; + int error = 0; read_lock_bh(&xfrm_policy_lock); - count = 0; for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type == type) - count++; - } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type == type) - count++; - } - } + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + count++; } if (count == 0) { @@ -852,28 +582,13 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - struct hlist_head *table = xfrm_policy_bydst[dir].table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) { - if (pol->type != type) - continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); + for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + error = func(xp, dir%XFRM_POLICY_MAX, --count, data); if (error) goto out; } - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) { - if (pol->type != type) - continue; - error = func(pol, dir % XFRM_POLICY_MAX, --count, data); - if (error) - goto out; - } - } } - error = 0; + out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -882,79 +597,29 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, - u8 type, u16 family, int dir) +static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) { - struct xfrm_selector *sel = &pol->selector; - int match; - - if (pol->family != family || - pol->type != type) - return 0; + struct xfrm_policy *pol; - match = xfrm_selector_match(sel, fl, family); - if (match) { - if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) - return 1; - } + read_lock_bh(&xfrm_policy_lock); + for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + struct xfrm_selector *sel = &pol->selector; + int match; - return 0; -} + if (pol->family != family) + continue; -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, - u16 family, u8 dir) -{ - struct xfrm_policy *pol, *ret; - xfrm_address_t *daddr, *saddr; - struct hlist_node *entry; - struct hlist_head *chain; - u32 priority = ~0U; - - daddr = xfrm_flowi_daddr(fl, family); - saddr = xfrm_flowi_saddr(fl, family); - if (unlikely(!daddr || !saddr)) - return NULL; + match = xfrm_selector_match(sel, fl, family); - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(daddr, saddr, family, dir); - ret = NULL; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir)) { - ret = pol; - priority = ret->priority; - break; - } - } - chain = &xfrm_policy_inexact[dir]; - hlist_for_each_entry(pol, entry, chain, bydst) { - if (xfrm_policy_match(pol, fl, type, family, dir) && - pol->priority < priority) { - ret = pol; - break; + if (match) { + if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { + xfrm_pol_hold(pol); + break; + } } } - if (ret) - xfrm_pol_hold(ret); read_unlock_bh(&xfrm_policy_lock); - - return ret; -} - -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) -{ - struct xfrm_policy *pol; - -#ifdef CONFIG_XFRM_SUB_POLICY - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); - if (pol) - goto end; -#endif - pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); - -#ifdef CONFIG_XFRM_SUB_POLICY -end: -#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; } @@ -976,7 +641,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) { struct xfrm_policy *pol; @@ -987,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) - err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); if (match && !err) xfrm_pol_hold(pol); @@ -1000,29 +665,24 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct hlist_head *chain = policy_hash_bysel(&pol->selector, - pol->family, dir); - - hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); - xfrm_policy_count[dir]++; + pol->next = xfrm_policy_list[dir]; + xfrm_policy_list[dir] = pol; xfrm_pol_hold(pol); - - if (xfrm_bydst_should_resize(dir, NULL)) - schedule_work(&xfrm_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - if (hlist_unhashed(&pol->bydst)) - return NULL; - - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - xfrm_policy_count[dir]--; + struct xfrm_policy **polp; - return pol; + for (polp = &xfrm_policy_list[dir]; + *polp != NULL; polp = &(*polp)->next) { + if (*polp == pol) { + *polp = pol->next; + return pol; + } + } + return NULL; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) @@ -1044,17 +704,12 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; -#ifdef CONFIG_XFRM_SUB_POLICY - if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) - return -EINVAL; -#endif - write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -1083,7 +738,6 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; - newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -1107,32 +761,17 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } -static int -xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, - unsigned short family) -{ - int err; - struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - - if (unlikely(afinfo == NULL)) - return -EINVAL; - err = afinfo->get_saddr(local, remote); - xfrm_policy_put_afinfo(afinfo); - return err; -} - /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); - xfrm_address_t tmp; for (nx=0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; @@ -1140,15 +779,9 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode == XFRM_MODE_TUNNEL) { + if (tmpl->mode) { remote = &tmpl->id.daddr; local = &tmpl->saddr; - if (xfrm_addr_any(local, family)) { - error = xfrm_get_saddr(&tmp, remote, family); - if (error) - goto fail; - local = &tmp; - } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); @@ -1176,45 +809,6 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, return error; } -static int -xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) -{ - struct xfrm_state *tp[XFRM_MAX_DEPTH]; - struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; - int cnx = 0; - int error; - int ret; - int i; - - for (i = 0; i < npols; i++) { - if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { - error = -ENOBUFS; - goto fail; - } - - ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); - if (ret < 0) { - error = ret; - goto fail; - } else - cnx += ret; - } - - /* found states are sorted for outbound processing */ - if (npols > 1) - xfrm_state_sort(xfrm, tpp, cnx, family); - - return cnx; - - fail: - for (cnx--; cnx>=0; cnx--) - xfrm_state_put(tpp[cnx]); - return error; - -} - /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -1261,11 +855,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; - struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - int npols; - int pol_dead; - int xfrm_nr; - int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -1273,26 +862,19 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - + u32 sk_sid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; - for (pi = 0; pi < ARRAY_SIZE(pols); pi++) - pols[pi] = NULL; - npols = 0; - pol_dead = 0; - xfrm_nr = 0; - if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || - !xfrm_policy_count[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, dst_orig->ops->family, + policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, dir, xfrm_policy_lookup); } @@ -1301,9 +883,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; - pols[0] = policy; - npols ++; - xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -1312,13 +891,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, goto error; case XFRM_POLICY_ALLOW: -#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } -#endif /* Try to find matching bundle. * @@ -1334,36 +911,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, if (dst) break; -#ifdef CONFIG_XFRM_SUB_POLICY - if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, - fl, family, - XFRM_POLICY_OUT); - if (pols[1]) { - if (pols[1]->action == XFRM_POLICY_BLOCK) { - err = -EPERM; - goto error; - } - npols ++; - xfrm_nr += pols[1]->xfrm_nr; - } - } - - /* - * Because neither flowi nor bundle information knows about - * transformation template size. On more than one policy usage - * we can realize whether all of them is bypass or not after - * they are searched. See above not-transformed bypass - * is surrounded by non-sub policy configuration, too. - */ - if (xfrm_nr == 0) { - /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); - return 0; - } - -#endif - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); + nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -1376,7 +924,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); + nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -1384,7 +932,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pols_put(pols, npols); + xfrm_pol_put(policy); goto restart; } err = nx; @@ -1394,7 +942,7 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pols_put(pols, npols); + xfrm_pol_put(policy); return 0; } @@ -1408,14 +956,8 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, goto error; } - for (pi = 0; pi < npols; pi++) { - read_lock_bh(&pols[pi]->lock); - pol_dead |= pols[pi]->dead; - read_unlock_bh(&pols[pi]->lock); - } - write_lock_bh(&policy->lock); - if (unlikely(pol_dead || stale_bundle(dst))) { + if (unlikely(policy->dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -1435,34 +977,17 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, } *dst_p = dst; dst_release(dst_orig); - xfrm_pols_put(pols, npols); + xfrm_pol_put(policy); return 0; error: dst_release(dst_orig); - xfrm_pols_put(pols, npols); + xfrm_pol_put(policy); *dst_p = NULL; return err; } EXPORT_SYMBOL(xfrm_lookup); -static inline int -xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) -{ - struct xfrm_state *x; - int err; - - if (!skb->sp || idx < 0 || idx >= skb->sp->len) - return 0; - x = skb->sp->xvec[idx]; - if (!x->type->reject) - return 0; - xfrm_state_hold(x); - err = x->type->reject(x, skb, fl); - xfrm_state_put(x); - return err; -} - /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -1479,19 +1004,10 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->id.spi == tmpl->id.spi || !tmpl->id.spi) && (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && - ((tmpl->aalgos & (1<props.aalgo)) || - !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) && - !(x->props.mode != XFRM_MODE_TRANSPORT && - xfrm_state_addr_cmp(tmpl, x, family)); + (tmpl->aalgos & (1<props.aalgo)) && + !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); } -/* - * 0 or more than 0 is returned when validation is succeeded (either bypass - * because of optional transport mode, or next index of the mathced secpath - * state with the template. - * -1 is returned when no matching template is found. - * Otherwise "-2 - errored_index" is returned. - */ static inline int xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, unsigned short family) @@ -1499,18 +1015,15 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (tmpl->mode == XFRM_MODE_TRANSPORT) + if (!tmpl->mode) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { - if (start == -1) - start = -2-idx; + if (sp->xvec[idx]->props.mode) break; - } } return start; } @@ -1519,25 +1032,21 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); - int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); - err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); - return err; + return 0; } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) +static inline int secpath_has_tunnel(struct sec_path *sp, int k) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { - *idxp = k; + if (sp->xvec[k]->props.mode) return 1; - } } return 0; @@ -1547,18 +1056,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; - struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - int npols = 0; - int xfrm_nr; - int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - int xerr_idx = -1; + u32 sk_sid; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); + sk_sid = security_sk_sid(sk, &fl, fl_dir); + /* First, check used SA against their selectors. */ if (skb->sp) { int i; @@ -1572,90 +1079,46 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); if (!pol) - pol = flow_cache_lookup(&fl, family, fl_dir, + pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, xfrm_policy_lookup); - if (!pol) { - if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) { - xfrm_secpath_reject(xerr_idx, skb, &fl); - return 0; - } - return 1; - } + if (!pol) + return !skb->sp || !secpath_has_tunnel(skb->sp, 0); pol->curlft.use_time = (unsigned long)xtime.tv_sec; - pols[0] = pol; - npols ++; -#ifdef CONFIG_XFRM_SUB_POLICY - if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { - pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, - &fl, family, - XFRM_POLICY_IN); - if (pols[1]) { - pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; - npols ++; - } - } -#endif - if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; - struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; - struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; - struct xfrm_tmpl **tpp = tp; - int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; - for (pi = 0; pi < npols; pi++) { - if (pols[pi] != pol && - pols[pi]->action != XFRM_POLICY_ALLOW) - goto reject; - if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) - goto reject_error; - for (i = 0; i < pols[pi]->xfrm_nr; i++) - tpp[ti++] = &pols[pi]->xfrm_vec[i]; - } - xfrm_nr = ti; - if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); - tpp = stp; - } - /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(tpp[i], sp, k, family); - if (k < 0) { - if (k < -1) - /* "-2 - errored_index" returned */ - xerr_idx = -(2+k); + for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + if (k < 0) goto reject; - } } - if (secpath_has_nontransport(sp, k, &xerr_idx)) + if (secpath_has_tunnel(sp, k)) goto reject; - xfrm_pols_put(pols, npols); + xfrm_pol_put(pol); return 1; } reject: - xfrm_secpath_reject(xerr_idx, skb, &fl); -reject_error: - xfrm_pols_put(pols, npols); + xfrm_pol_put(pol); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1703,7 +1166,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1733,50 +1196,33 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) -{ - struct dst_entry *dst, **dstp; - - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = *gc_list_p; - *gc_list_p = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); -} - static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) { - struct dst_entry *gc_list = NULL; - int dir; + int i; + struct xfrm_policy *pol; + struct dst_entry *dst, **dstp, *gc_list = NULL; read_lock_bh(&xfrm_policy_lock); - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - struct xfrm_policy *pol; - struct hlist_node *entry; - struct hlist_head *table; - int i; - - hlist_for_each_entry(pol, entry, - &xfrm_policy_inexact[dir], bydst) - prune_one_bundle(pol, func, &gc_list); - - table = xfrm_policy_bydst[dir].table; - for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { - hlist_for_each_entry(pol, entry, table + i, bydst) - prune_one_bundle(pol, func, &gc_list); + for (i=0; i<2*XFRM_POLICY_MAX; i++) { + for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); } } read_unlock_bh(&xfrm_policy_lock); while (gc_list) { - struct dst_entry *dst = gc_list; + dst = gc_list; gc_list = dst->next; dst_free(dst); } @@ -1792,12 +1238,22 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -static int xfrm_flush_bundles(void) +int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; } +static int always_true(struct dst_entry *dst) +{ + return 1; +} + +void xfrm_flush_all_bundles(void) +{ + xfrm_prune_bundles(always_true); +} + void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -1825,7 +1281,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1842,16 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; - if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) - return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; - if (xdst->genid != dst->xfrm->genid) - return 0; - - if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && - !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) - return 0; mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { @@ -2000,33 +1448,12 @@ static struct notifier_block xfrm_dev_notifier = { static void __init xfrm_policy_init(void) { - unsigned int hmask, sz; - int dir; - xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), - 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, + 0, SLAB_HWCACHE_ALIGN, NULL, NULL); - - hmask = 8 - 1; - sz = (hmask+1) * sizeof(struct hlist_head); - - xfrm_policy_byidx = xfrm_hash_alloc(sz); - xfrm_idx_hmask = hmask; - if (!xfrm_policy_byidx) - panic("XFRM: failed to allocate byidx hash\n"); - - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { - struct xfrm_policy_hash *htab; - - INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); - - htab = &xfrm_policy_bydst[dir]; - htab->table = xfrm_hash_alloc(sz); - htab->hmask = hmask; - if (!htab->table) - panic("XFRM: failed to allocate bydst hash\n"); - } + if (!xfrm_dst_cache) + panic("XFRM: failed to allocate xfrm_dst_cache\n"); INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); register_netdevice_notifier(&xfrm_dev_notifier); diff --git a/trunk/net/xfrm/xfrm_state.c b/trunk/net/xfrm/xfrm_state.c index 9f63edd39346..0021aad5db43 100644 --- a/trunk/net/xfrm/xfrm_state.c +++ b/trunk/net/xfrm/xfrm_state.c @@ -18,11 +18,8 @@ #include #include #include -#include #include -#include "xfrm_hash.h" - struct sock *xfrm_nl; EXPORT_SYMBOL(xfrm_nl); @@ -35,7 +32,7 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); /* Each xfrm_state may be linked to two tables: 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl) - 2. Hash table by (daddr,family,reqid) to find what SAs exist for given + 2. Hash table by daddr to find what SAs exist for given destination/tunnel endpoint. (output) */ @@ -47,123 +44,8 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct hlist_head *xfrm_state_bydst __read_mostly; -static struct hlist_head *xfrm_state_bysrc __read_mostly; -static struct hlist_head *xfrm_state_byspi __read_mostly; -static unsigned int xfrm_state_hmask __read_mostly; -static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; -static unsigned int xfrm_state_num; -static unsigned int xfrm_state_genid; - -static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr, - xfrm_address_t *saddr, - u32 reqid, - unsigned short family) -{ - return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask); -} - -static inline unsigned int xfrm_src_hash(xfrm_address_t *addr, - unsigned short family) -{ - return __xfrm_src_hash(addr, family, xfrm_state_hmask); -} - -static inline unsigned int -xfrm_spi_hash(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) -{ - return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask); -} - -static void xfrm_hash_transfer(struct hlist_head *list, - struct hlist_head *ndsttable, - struct hlist_head *nsrctable, - struct hlist_head *nspitable, - unsigned int nhashmask) -{ - struct hlist_node *entry, *tmp; - struct xfrm_state *x; - - hlist_for_each_entry_safe(x, entry, tmp, list, bydst) { - unsigned int h; - - h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr, - x->props.reqid, x->props.family, - nhashmask); - hlist_add_head(&x->bydst, ndsttable+h); - - h = __xfrm_src_hash(&x->props.saddr, x->props.family, - nhashmask); - hlist_add_head(&x->bysrc, nsrctable+h); - - h = __xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, - x->props.family, nhashmask); - hlist_add_head(&x->byspi, nspitable+h); - } -} - -static unsigned long xfrm_hash_new_size(void) -{ - return ((xfrm_state_hmask + 1) << 1) * - sizeof(struct hlist_head); -} - -static DEFINE_MUTEX(hash_resize_mutex); - -static void xfrm_hash_resize(void *__unused) -{ - struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi; - unsigned long nsize, osize; - unsigned int nhashmask, ohashmask; - int i; - - mutex_lock(&hash_resize_mutex); - - nsize = xfrm_hash_new_size(); - ndst = xfrm_hash_alloc(nsize); - if (!ndst) - goto out_unlock; - nsrc = xfrm_hash_alloc(nsize); - if (!nsrc) { - xfrm_hash_free(ndst, nsize); - goto out_unlock; - } - nspi = xfrm_hash_alloc(nsize); - if (!nspi) { - xfrm_hash_free(ndst, nsize); - xfrm_hash_free(nsrc, nsize); - goto out_unlock; - } - - spin_lock_bh(&xfrm_state_lock); - - nhashmask = (nsize / sizeof(struct hlist_head)) - 1U; - for (i = xfrm_state_hmask; i >= 0; i--) - xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi, - nhashmask); - - odst = xfrm_state_bydst; - osrc = xfrm_state_bysrc; - ospi = xfrm_state_byspi; - ohashmask = xfrm_state_hmask; - - xfrm_state_bydst = ndst; - xfrm_state_bysrc = nsrc; - xfrm_state_byspi = nspi; - xfrm_state_hmask = nhashmask; - - spin_unlock_bh(&xfrm_state_lock); - - osize = (ohashmask + 1) * sizeof(struct hlist_head); - xfrm_hash_free(odst, osize); - xfrm_hash_free(osrc, osize); - xfrm_hash_free(ospi, osize); - -out_unlock: - mutex_unlock(&hash_resize_mutex); -} - -static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); +static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -172,9 +54,11 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static HLIST_HEAD(xfrm_state_gc_list); +static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); +static int xfrm_state_gc_flush_bundles; + int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -185,13 +69,14 @@ void km_state_expired(struct xfrm_state *x, int hard, u32 pid); static void xfrm_state_gc_destroy(struct xfrm_state *x) { - del_timer_sync(&x->timer); - del_timer_sync(&x->rtimer); + if (del_timer(&x->timer)) + BUG(); + if (del_timer(&x->rtimer)) + BUG(); kfree(x->aalg); kfree(x->ealg); kfree(x->calg); kfree(x->encap); - kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -205,17 +90,22 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct hlist_node *entry, *tmp; - struct hlist_head gc_list; + struct list_head *entry, *tmp; + struct list_head gc_list = LIST_HEAD_INIT(gc_list); + + if (xfrm_state_gc_flush_bundles) { + xfrm_state_gc_flush_bundles = 0; + xfrm_flush_bundles(); + } spin_lock_bh(&xfrm_state_gc_lock); - gc_list.first = xfrm_state_gc_list.first; - INIT_HLIST_HEAD(&xfrm_state_gc_list); + list_splice_init(&xfrm_state_gc_list, &gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) + list_for_each_safe(entry, tmp, &gc_list) { + x = list_entry(entry, struct xfrm_state, bydst); xfrm_state_gc_destroy(x); - + } wake_up(&km_waitq); } @@ -278,9 +168,9 @@ static void xfrm_timer_handler(unsigned long data) if (warn) km_state_expired(x, 0, 0); resched: - if (next != LONG_MAX) - mod_timer(&x->timer, jiffies + make_jiffies(next)); - + if (next != LONG_MAX && + !mod_timer(&x->timer, jiffies + make_jiffies(next))) + xfrm_state_hold(x); goto out; expired: @@ -295,6 +185,7 @@ static void xfrm_timer_handler(unsigned long data) out: spin_unlock(&x->lock); + xfrm_state_put(x); } static void xfrm_replay_timer_handler(unsigned long data); @@ -308,9 +199,8 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_HLIST_NODE(&x->bydst); - INIT_HLIST_NODE(&x->bysrc); - INIT_HLIST_NODE(&x->byspi); + INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -335,7 +225,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - hlist_add_head(&x->bydst, &xfrm_state_gc_list); + list_add(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -348,12 +238,27 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - hlist_del(&x->bydst); - hlist_del(&x->bysrc); - if (x->id.spi) - hlist_del(&x->byspi); - xfrm_state_num--; + list_del(&x->bydst); + __xfrm_state_put(x); + if (x->id.spi) { + list_del(&x->byspi); + __xfrm_state_put(x); + } spin_unlock(&xfrm_state_lock); + if (del_timer(&x->timer)) + __xfrm_state_put(x); + if (del_timer(&x->rtimer)) + __xfrm_state_put(x); + + /* The number two in this test is the reference + * mentioned in the comment below plus the reference + * our caller holds. A larger value means that + * there are DSTs attached to this xfrm_state. + */ + if (atomic_read(&x->refcnt) > 2) { + xfrm_state_gc_flush_bundles = 1; + schedule_work(&xfrm_state_gc_work); + } /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that @@ -382,15 +287,14 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; + struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i <= xfrm_state_hmask; i++) { - struct hlist_node *entry; - struct xfrm_state *x; + for (i = 0; i < XFRM_DST_HSIZE; i++) { restart: - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - xfrm_id_proto_match(x->id.proto, proto)) { + (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -421,103 +325,29 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) -{ - unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); - struct xfrm_state *x; - struct hlist_node *entry; - - hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { - if (x->props.family != family || - x->id.spi != spi || - x->id.proto != proto) - continue; - - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6)) - continue; - break; - }; - - xfrm_state_hold(x); - return x; - } - - return NULL; -} - -static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) -{ - unsigned int h = xfrm_src_hash(saddr, family); - struct xfrm_state *x; - struct hlist_node *entry; - - hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { - if (x->props.family != family || - x->id.proto != proto) - continue; - - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)daddr, - (struct in6_addr *) - x->id.daddr.a6) || - !ipv6_addr_equal((struct in6_addr *)saddr, - (struct in6_addr *) - x->props.saddr.a6)) - continue; - break; - }; - - xfrm_state_hold(x); - return x; - } - - return NULL; -} - -static inline struct xfrm_state * -__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) -{ - if (use_spi) - return __xfrm_state_lookup(&x->id.daddr, x->id.spi, - x->id.proto, family); - else - return __xfrm_state_lookup_byaddr(&x->id.daddr, - &x->props.saddr, - x->id.proto, family); -} - struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family); - struct hlist_node *entry; + unsigned h = xfrm_dst_hash(daddr, family); struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; + struct xfrm_state_afinfo *afinfo; + afinfo = xfrm_state_get_afinfo(family); + if (afinfo == NULL) { + *err = -EAFNOSUPPORT; + return NULL; + } + spin_lock_bh(&xfrm_state_lock); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && - !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && @@ -537,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !security_xfrm_state_pol_flow_match(x, pol, fl)) + !xfrm_sec_ctx_match(pol->security, x->security)) continue; if (!best || best->km.dying > x->km.dying || @@ -549,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - security_xfrm_state_pol_flow_match(x, pol, fl)) + xfrm_sec_ctx_match(pol->security, x->security)) error = -ESRCH; } } @@ -558,8 +388,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto, family)) != NULL) { + (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -573,24 +403,17 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); - error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); - if (error) { - x->km.state = XFRM_STATE_DEAD; - xfrm_state_put(x); - x = NULL; - goto out; - } - if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add_tail(&x->bydst, xfrm_state_bydst+h); + xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); } else { @@ -606,167 +429,59 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); return x; } static void __xfrm_state_insert(struct xfrm_state *x) { - unsigned int h; + unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - x->genid = ++xfrm_state_genid; + list_add(&x->bydst, xfrm_state_bydst+h); + xfrm_state_hold(x); - h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr, - x->props.reqid, x->props.family); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - h = xfrm_src_hash(&x->props.saddr, x->props.family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); - if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, - x->props.family); - - hlist_add_head(&x->byspi, xfrm_state_byspi+h); - } + if (!mod_timer(&x->timer, jiffies + HZ)) + xfrm_state_hold(x); - mod_timer(&x->timer, jiffies + HZ); - if (x->replay_maxage) - mod_timer(&x->rtimer, jiffies + x->replay_maxage); + if (x->replay_maxage && + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + xfrm_state_hold(x); wake_up(&km_waitq); - - xfrm_state_num++; - - if (x->bydst.next != NULL && - (xfrm_state_hmask + 1) < xfrm_state_hashmax && - xfrm_state_num > xfrm_state_hmask) - schedule_work(&xfrm_hash_work); -} - -/* xfrm_state_lock is held */ -static void __xfrm_state_bump_genids(struct xfrm_state *xnew) -{ - unsigned short family = xnew->props.family; - u32 reqid = xnew->props.reqid; - struct xfrm_state *x; - struct hlist_node *entry; - unsigned int h; - - h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family); - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { - if (x->props.family == family && - x->props.reqid == reqid && - !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && - !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) - x->genid = xfrm_state_genid; - } } void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); - __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); -} -EXPORT_SYMBOL(xfrm_state_insert); - -/* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) -{ - unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family); - struct hlist_node *entry; - struct xfrm_state *x; - - hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { - if (x->props.reqid != reqid || - x->props.mode != mode || - x->props.family != family || - x->km.state != XFRM_STATE_ACQ || - x->id.spi != 0) - continue; - - switch (family) { - case AF_INET: - if (x->id.daddr.a4 != daddr->a4 || - x->props.saddr.a4 != saddr->a4) - continue; - break; - case AF_INET6: - if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr) || - !ipv6_addr_equal((struct in6_addr *) - x->props.saddr.a6, - (struct in6_addr *)saddr)) - continue; - break; - }; - xfrm_state_hold(x); - return x; - } - - if (!create) - return NULL; - - x = xfrm_state_alloc(); - if (likely(x)) { - switch (family) { - case AF_INET: - x->sel.daddr.a4 = daddr->a4; - x->sel.saddr.a4 = saddr->a4; - x->sel.prefixlen_d = 32; - x->sel.prefixlen_s = 32; - x->props.saddr.a4 = saddr->a4; - x->id.daddr.a4 = daddr->a4; - break; - - case AF_INET6: - ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, - (struct in6_addr *)saddr); - x->sel.prefixlen_d = 128; - x->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, - (struct in6_addr *)saddr); - ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, - (struct in6_addr *)daddr); - break; - }; - - x->km.state = XFRM_STATE_ACQ; - x->id.proto = proto; - x->props.family = family; - x->props.mode = mode; - x->props.reqid = reqid; - x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x); - x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x->timer); - hlist_add_head(&x->bydst, xfrm_state_bydst+h); - h = xfrm_src_hash(saddr, family); - hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); - wake_up(&km_waitq); - } - - return x; + xfrm_flush_all_bundles(); } +EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { + struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; - int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; + afinfo = xfrm_state_get_afinfo(family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(x, use_spi, family); + x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -774,7 +489,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (use_spi && x->km.seq) { + if (x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -782,17 +497,20 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (use_spi && !x1) - x1 = __find_acq_core(family, x->props.mode, x->props.reqid, - x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + if (!x1) + x1 = afinfo->find_acq( + x->props.mode, x->props.reqid, x->id.proto, + &x->id.daddr, &x->props.saddr, 0); - __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + + if (!err) + xfrm_flush_all_bundles(); if (x1) { xfrm_state_delete(x1); @@ -805,12 +523,16 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { + struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; - int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); + + afinfo = xfrm_state_get_afinfo(x->props.family); + if (unlikely(afinfo == NULL)) + return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(x, use_spi, x->props.family); + x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); err = -ESRCH; if (!x1) @@ -830,6 +552,7 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -845,15 +568,11 @@ int xfrm_state_update(struct xfrm_state *x) if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); - if (x->coaddr && x1->coaddr) { - memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); - } - if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) - memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; - mod_timer(&x1->timer, jiffies + HZ); + if (!mod_timer(&x1->timer, jiffies + HZ)) + xfrm_state_hold(x1); if (x1->curlft.use_time) xfrm_state_check_expire(x1); @@ -878,7 +597,8 @@ int xfrm_state_check_expire(struct xfrm_state *x) if (x->curlft.bytes >= x->lft.hard_byte_limit || x->curlft.packets >= x->lft.hard_packet_limit) { x->km.state = XFRM_STATE_EXPIRED; - mod_timer(&x->timer, jiffies); + if (!mod_timer(&x->timer, jiffies)) + xfrm_state_hold(x); return -EINVAL; } @@ -920,93 +640,46 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(daddr, spi, proto, family); + x = afinfo->state_lookup(daddr, spi, proto); spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); -struct xfrm_state * -xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto, unsigned short family) -{ - struct xfrm_state *x; - - spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); - spin_unlock_bh(&xfrm_state_lock); - return x; -} -EXPORT_SYMBOL(xfrm_state_lookup_byaddr); - struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; - - spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); - spin_unlock_bh(&xfrm_state_lock); - - return x; -} -EXPORT_SYMBOL(xfrm_find_acq); - -#ifdef CONFIG_XFRM_SUB_POLICY -int -xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, - unsigned short family) -{ - int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) - return -EAFNOSUPPORT; - - spin_lock_bh(&xfrm_state_lock); - if (afinfo->tmpl_sort) - err = afinfo->tmpl_sort(dst, src, n); - spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); - return err; -} -EXPORT_SYMBOL(xfrm_tmpl_sort); - -int -xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, - unsigned short family) -{ - int err = 0; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return -EAFNOSUPPORT; + return NULL; spin_lock_bh(&xfrm_state_lock); - if (afinfo->state_sort) - err = afinfo->state_sort(dst, src, n); + x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); xfrm_state_put_afinfo(afinfo); - return err; + return x; } -EXPORT_SYMBOL(xfrm_state_sort); -#endif +EXPORT_SYMBOL(xfrm_find_acq); /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; + struct xfrm_state *x; - for (i = 0; i <= xfrm_state_hmask; i++) { - struct hlist_node *entry; - struct xfrm_state *x; - - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && - x->km.state == XFRM_STATE_ACQ) { + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -1042,7 +715,7 @@ EXPORT_SYMBOL(xfrm_get_acqseq); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) { - unsigned int h; + u32 h; struct xfrm_state *x0; if (x->id.spi) @@ -1072,7 +745,8 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - hlist_add_head(&x->byspi, xfrm_state_byspi+h); + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); } @@ -1084,14 +758,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; - struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (xfrm_id_proto_match(x->id.proto, proto)) + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) count++; } } @@ -1100,9 +773,9 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), goto out; } - for (i = 0; i <= xfrm_state_hmask; i++) { - hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { - if (!xfrm_id_proto_match(x->id.proto, proto)) + for (i = 0; i < XFRM_DST_HSIZE; i++) { + list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) continue; err = func(x, --count, data); if (err) @@ -1159,8 +832,10 @@ void xfrm_replay_notify(struct xfrm_state *x, int event) km_state_notify(x, &c); if (x->replay_maxage && - !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) + !mod_timer(&x->rtimer, jiffies + x->replay_maxage)) { + xfrm_state_hold(x); x->xflags &= ~XFRM_TIME_DEFER; + } } EXPORT_SYMBOL(xfrm_replay_notify); @@ -1178,6 +853,7 @@ static void xfrm_replay_timer_handler(unsigned long data) } spin_unlock(&x->lock); + xfrm_state_put(x); } int xfrm_replay_check(struct xfrm_state *x, u32 seq) @@ -1321,25 +997,6 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); -int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) -{ - int err = -EINVAL; - int ret; - struct xfrm_mgr *km; - - read_lock(&xfrm_km_lock); - list_for_each_entry(km, &xfrm_km_list, list) { - if (km->report) { - ret = km->report(proto, sel, addr); - if (!ret) - err = ret; - } - } - read_unlock(&xfrm_km_lock); - return err; -} -EXPORT_SYMBOL(km_report); - int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; @@ -1361,7 +1018,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk, optname, data, + pol = km->compile_policy(sk->sk_family, optname, data, optlen, &err); if (err >= 0) break; @@ -1408,8 +1065,11 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else + else { + afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; + } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1426,8 +1086,11 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else + else { xfrm_state_afinfo[afinfo->family] = NULL; + afinfo->state_byspi = NULL; + afinfo->state_bydst = NULL; + } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; @@ -1543,17 +1206,12 @@ EXPORT_SYMBOL(xfrm_init_state); void __init xfrm_state_init(void) { - unsigned int sz; - - sz = sizeof(struct hlist_head) * 8; - - xfrm_state_bydst = xfrm_hash_alloc(sz); - xfrm_state_bysrc = xfrm_hash_alloc(sz); - xfrm_state_byspi = xfrm_hash_alloc(sz); - if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi) - panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes."); - xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1); + int i; + for (i=0; i #include #include -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#include -#endif static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type) { @@ -90,22 +87,6 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } -static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, - xfrm_address_t **addrp) -{ - struct rtattr *rt = xfrma[type - 1]; - - if (!rt) - return 0; - - if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) - return -EINVAL; - - if (addrp) - *addrp = RTA_DATA(rt); - - return 0; -} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -176,19 +157,6 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; break; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - case IPPROTO_DSTOPTS: - case IPPROTO_ROUTING: - if (xfrma[XFRMA_ALG_COMP-1] || - xfrma[XFRMA_ALG_AUTH-1] || - xfrma[XFRMA_ALG_CRYPT-1] || - xfrma[XFRMA_ENCAP-1] || - xfrma[XFRMA_SEC_CTX-1] || - !xfrma[XFRMA_COADDR-1]) - goto out; - break; -#endif - default: goto out; }; @@ -203,14 +171,11 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; - if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) - goto out; err = -EINVAL; switch (p->mode) { - case XFRM_MODE_TRANSPORT: - case XFRM_MODE_TUNNEL: - case XFRM_MODE_ROUTEOPTIMIZATION: + case 0: + case 1: break; default: @@ -295,24 +260,6 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } -static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) -{ - struct rtattr *rta = u_arg; - xfrm_address_t *p, *uaddrp; - - if (!rta) - return 0; - - uaddrp = RTA_DATA(rta); - p = kmalloc(sizeof(*p), GFP_KERNEL); - if (!p) - return -ENOMEM; - - memcpy(p, uaddrp, sizeof(*p)); - *addrpp = p; - return 0; -} - static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -402,8 +349,7 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) - goto error; + err = xfrm_init_state(x); if (err) goto error; @@ -472,48 +418,16 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) return err; } -static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, - struct rtattr **xfrma, - int *errp) -{ - struct xfrm_state *x = NULL; - int err; - - if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { - err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - } else { - xfrm_address_t *saddr = NULL; - - err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); - if (err) - goto out; - - if (!saddr) { - err = -EINVAL; - goto out; - } - - x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, - p->family); - } - - out: - if (!x && errp) - *errp = err; - return x; -} - static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err = -ESRCH; + int err; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); if (x == NULL) - return err; + return -ESRCH; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -607,13 +521,6 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } - - if (x->coaddr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); - - if (x->lastused) - RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); - nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; @@ -635,7 +542,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_state_walk(0, dump_one_state, &info); + (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); cb->args[0] = info.this_idx; return skb->len; @@ -671,9 +578,10 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err = -ESRCH; + int err; - x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + err = -ESRCH; if (x == NULL) goto out_noput; @@ -786,22 +694,6 @@ static int verify_policy_dir(__u8 dir) return 0; } -static int verify_policy_type(__u8 type) -{ - switch (type) { - case XFRM_POLICY_TYPE_MAIN: -#ifdef CONFIG_XFRM_SUB_POLICY - case XFRM_POLICY_TYPE_SUB: -#endif - break; - - default: - return -EINVAL; - }; - - return 0; -} - static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { switch (p->share) { @@ -895,29 +787,6 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) return 0; } -static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) -{ - struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; - struct xfrm_userpolicy_type *upt; - __u8 type = XFRM_POLICY_TYPE_MAIN; - int err; - - if (rt) { - if (rt->rta_len < sizeof(*upt)) - return -EINVAL; - - upt = RTA_DATA(rt); - type = upt->type; - } - - err = verify_policy_type(type); - if (err) - return err; - - *tp = type; - return 0; -} - static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -956,20 +825,16 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); - err = copy_from_user_policy_type(&xp->type, xfrma); - if (err) - goto error; - if (!(err = copy_from_user_tmpl(xp, xfrma))) err = copy_from_user_sec_ctx(xp, xfrma); - if (err) - goto error; + + if (err) { + *errp = err; + kfree(xp); + xp = NULL; + } return xp; - error: - *errp = err; - kfree(xp); - return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) @@ -1046,63 +911,27 @@ static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb) return -1; } -static int copy_sec_ctx(struct xfrm_sec_ctx *s, struct sk_buff *skb) -{ - int ctx_size = sizeof(struct xfrm_sec_ctx) + s->ctx_len; - struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); - struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); - - uctx->exttype = XFRMA_SEC_CTX; - uctx->len = ctx_size; - uctx->ctx_doi = s->ctx_doi; - uctx->ctx_alg = s->ctx_alg; - uctx->ctx_len = s->ctx_len; - memcpy(uctx + 1, s->ctx_str, s->ctx_len); - return 0; - - rtattr_failure: - return -1; -} - -static inline int copy_to_user_state_sec_ctx(struct xfrm_state *x, struct sk_buff *skb) -{ - if (x->security) { - return copy_sec_ctx(x->security, skb); - } - return 0; -} - -static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) +static int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *skb) { if (xp->security) { - return copy_sec_ctx(xp->security, skb); - } - return 0; -} - -#ifdef CONFIG_XFRM_SUB_POLICY -static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) -{ - struct xfrm_userpolicy_type upt; - - memset(&upt, 0, sizeof(upt)); - upt.type = xp->type; - - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + int ctx_size = sizeof(struct xfrm_sec_ctx) + + xp->security->ctx_len; + struct rtattr *rt = __RTA_PUT(skb, XFRMA_SEC_CTX, ctx_size); + struct xfrm_user_sec_ctx *uctx = RTA_DATA(rt); + uctx->exttype = XFRMA_SEC_CTX; + uctx->len = ctx_size; + uctx->ctx_doi = xp->security->ctx_doi; + uctx->ctx_alg = xp->security->ctx_alg; + uctx->ctx_len = xp->security->ctx_len; + memcpy(uctx + 1, xp->security->ctx_str, xp->security->ctx_len); + } return 0; -rtattr_failure: + rtattr_failure: return -1; } -#else -static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) -{ - return 0; -} -#endif - static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -1126,8 +955,6 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) - goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -1149,10 +976,7 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); -#ifdef CONFIG_XFRM_SUB_POLICY - (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); -#endif + (void) xfrm_policy_walk(dump_one_policy, &info); cb->args[0] = info.this_idx; return skb->len; @@ -1188,7 +1012,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; - __u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1196,16 +1019,12 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); - if (err) - return err; - err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, delete); + xp = xfrm_policy_byid(p->dir, p->index, delete); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1222,7 +1041,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); + xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } if (xp == NULL) @@ -1405,16 +1224,9 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { - struct km_event c; - __u8 type = XFRM_POLICY_TYPE_MAIN; - int err; - - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); - if (err) - return err; +struct km_event c; - xfrm_policy_flush(type); - c.data.type = type; + xfrm_policy_flush(); c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; @@ -1427,15 +1239,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; - __u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; - err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); - if (err) - return err; - if (p->index) - xp = xfrm_policy_byid(type, p->dir, p->index, 0); + xp = xfrm_policy_byid(p->dir, p->index, 0); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1452,7 +1259,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); + xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); security_xfrm_policy_free(&tmp); } @@ -1579,7 +1386,6 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), - [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -1904,9 +1710,7 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; - if (copy_to_user_state_sec_ctx(x, skb)) - goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) + if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -1940,7 +1744,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, +static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1948,7 +1752,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, struct xfrm_policy *xp; int nr; - switch (sk->sk_family) { + switch (family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1988,18 +1792,8 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, } copy_from_user_policy(xp, p); - xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); - if (!xp->security) { - int err = security_xfrm_sock_policy_alloc(xp, sk); - if (err) { - kfree(xp); - *dir = err; - return NULL; - } - } - *dir = p->dir; return xp; @@ -2022,8 +1816,6 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) - goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -2095,8 +1887,6 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; - if (copy_to_user_policy_type(xp, skb) < 0) - goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2114,9 +1904,6 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; -#ifdef CONFIG_XFRM_SUB_POLICY - struct xfrm_userpolicy_type upt; -#endif int len = NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); @@ -2126,13 +1913,6 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); - nlh->nlmsg_flags = 0; - -#ifdef CONFIG_XFRM_SUB_POLICY - memset(&upt, 0, sizeof(upt)); - upt.type = c->data.type; - RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); -#endif nlh->nlmsg_len = skb->tail - b; @@ -2140,9 +1920,6 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: -#ifdef CONFIG_XFRM_SUB_POLICY -rtattr_failure: -#endif kfree_skb(skb); return -1; } @@ -2167,64 +1944,19 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } -static int build_report(struct sk_buff *skb, u8 proto, - struct xfrm_selector *sel, xfrm_address_t *addr) -{ - struct xfrm_user_report *ur; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); - ur = NLMSG_DATA(nlh); - nlh->nlmsg_flags = 0; - - ur->proto = proto; - memcpy(&ur->sel, sel, sizeof(ur->sel)); - - if (addr) - RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, - xfrm_address_t *addr) -{ - struct sk_buff *skb; - size_t len; - - len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); - skb = alloc_skb(len, GFP_ATOMIC); - if (skb == NULL) - return -ENOMEM; - - if (build_report(skb, proto, sel, addr) < 0) - BUG(); - - NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; - return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); -} - static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, - .report = xfrm_send_report, }; static int __init xfrm_user_init(void) { struct sock *nlsk; - printk(KERN_INFO "Initializing XFRM netlink socket\n"); + printk(KERN_INFO "Initializing IPsec netlink socket\n"); nlsk = netlink_kernel_create(NETLINK_XFRM, XFRMNLGRP_MAX, xfrm_netlink_rcv, THIS_MODULE); diff --git a/trunk/security/dummy.c b/trunk/security/dummy.c index aeee70565509..58c6d399c844 100644 --- a/trunk/security/dummy.c +++ b/trunk/security/dummy.c @@ -709,10 +709,10 @@ static int dummy_socket_create (int family, int type, return 0; } -static int dummy_socket_post_create (struct socket *sock, int family, int type, - int protocol, int kern) +static void dummy_socket_post_create (struct socket *sock, int family, int type, + int protocol, int kern) { - return 0; + return; } static int dummy_socket_bind (struct socket *sock, struct sockaddr *address, @@ -805,38 +805,14 @@ static inline void dummy_sk_free_security (struct sock *sk) { } -static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk) -{ -} - -static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) -{ -} - -static inline void dummy_sock_graft(struct sock* sk, struct socket *parent) -{ -} - -static inline int dummy_inet_conn_request(struct sock *sk, - struct sk_buff *skb, struct request_sock *req) +static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; } - -static inline void dummy_inet_csk_clone(struct sock *newsk, - const struct request_sock *req) -{ -} - -static inline void dummy_req_classify_flow(const struct request_sock *req, - struct flowi *fl) -{ -} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk) +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { return 0; } @@ -855,8 +831,7 @@ static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp) return 0; } -static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid) +static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) { return 0; } @@ -874,23 +849,6 @@ static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; } - -static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl) -{ - return 1; -} - -static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) -{ - return 1; -} - -static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall) -{ - return 0; -} - #endif /* CONFIG_SECURITY_NETWORK_XFRM */ static int dummy_register_security (const char *name, struct security_operations *ops) { @@ -1102,12 +1060,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, socket_getpeersec_dgram); set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); - set_to_dummy_if_null(ops, sk_clone_security); - set_to_dummy_if_null(ops, sk_getsecid); - set_to_dummy_if_null(ops, sock_graft); - set_to_dummy_if_null(ops, inet_conn_request); - set_to_dummy_if_null(ops, inet_csk_clone); - set_to_dummy_if_null(ops, req_classify_flow); + set_to_dummy_if_null(ops, sk_getsid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); @@ -1118,9 +1071,6 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, xfrm_state_free_security); set_to_dummy_if_null(ops, xfrm_state_delete_security); set_to_dummy_if_null(ops, xfrm_policy_lookup); - set_to_dummy_if_null(ops, xfrm_state_pol_flow_match); - set_to_dummy_if_null(ops, xfrm_flow_state_match); - set_to_dummy_if_null(ops, xfrm_decode_session); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS set_to_dummy_if_null(ops, key_alloc); diff --git a/trunk/security/selinux/hooks.c b/trunk/security/selinux/hooks.c index 5a66c4c09f7a..5d1b8c733199 100644 --- a/trunk/security/selinux/hooks.c +++ b/trunk/security/selinux/hooks.c @@ -12,8 +12,6 @@ * Copyright (C) 2003 Red Hat, Inc., James Morris * Copyright (C) 2004-2005 Trusted Computer Solutions, Inc. * - * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. - * Paul Moore, * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -76,7 +74,6 @@ #include "objsec.h" #include "netif.h" #include "xfrm.h" -#include "selinux_netlabel.h" #define XATTR_SELINUX_SUFFIX "selinux" #define XATTR_NAME_SELINUX XATTR_SECURITY_PREFIX XATTR_SELINUX_SUFFIX @@ -272,17 +269,17 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) { struct sk_security_struct *ssec; + if (family != PF_UNIX) + return 0; + ssec = kzalloc(sizeof(*ssec), priority); if (!ssec) return -ENOMEM; ssec->sk = sk; ssec->peer_sid = SECINITSID_UNLABELED; - ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; - selinux_netlbl_sk_security_init(ssec, family); - return 0; } @@ -290,6 +287,9 @@ static void sk_free_security(struct sock *sk) { struct sk_security_struct *ssec = sk->sk_security; + if (sk->sk_family != PF_UNIX) + return; + sk->sk_security = NULL; kfree(ssec); } @@ -2400,7 +2400,6 @@ static int selinux_inode_listsecurity(struct inode *inode, char *buffer, size_t static int selinux_file_permission(struct file *file, int mask) { - int rc; struct inode *inode = file->f_dentry->d_inode; if (!mask) { @@ -2412,12 +2411,8 @@ static int selinux_file_permission(struct file *file, int mask) if ((file->f_flags & O_APPEND) && (mask & MAY_WRITE)) mask |= MAY_APPEND; - rc = file_has_perm(current, file, - file_mask_to_av(inode->i_mode, mask)); - if (rc) - return rc; - - return selinux_netlbl_inode_permission(inode, mask); + return file_has_perm(current, file, + file_mask_to_av(inode->i_mode, mask)); } static int selinux_file_alloc_security(struct file *file) @@ -3068,13 +3063,11 @@ static int selinux_socket_create(int family, int type, return err; } -static int selinux_socket_post_create(struct socket *sock, int family, - int type, int protocol, int kern) +static void selinux_socket_post_create(struct socket *sock, int family, + int type, int protocol, int kern) { - int err = 0; struct inode_security_struct *isec; struct task_security_struct *tsec; - struct sk_security_struct *sksec; u32 newsid; isec = SOCK_INODE(sock)->i_security; @@ -3085,15 +3078,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, isec->sid = kern ? SECINITSID_KERNEL : newsid; isec->initialized = 1; - if (sock->sk) { - sksec = sock->sk->sk_security; - sksec->sid = isec->sid; - err = selinux_netlbl_socket_post_create(sock, - family, - isec->sid); - } - - return err; + return; } /* Range of port numbers used to automatically bind. @@ -3274,13 +3259,7 @@ static int selinux_socket_accept(struct socket *sock, struct socket *newsock) static int selinux_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { - int rc; - - rc = socket_has_perm(current, sock, SOCKET__WRITE); - if (rc) - return rc; - - return selinux_netlbl_inode_permission(SOCK_INODE(sock), MAY_WRITE); + return socket_has_perm(current, sock, SOCKET__WRITE); } static int selinux_socket_recvmsg(struct socket *sock, struct msghdr *msg, @@ -3348,9 +3327,8 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, /* server child socket */ ssec = newsk->sk_security; ssec->peer_sid = isec->sid; - err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid); - - return err; + + return 0; } static int selinux_socket_unix_may_send(struct socket *sock, @@ -3376,29 +3354,11 @@ static int selinux_socket_unix_may_send(struct socket *sock, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, u16 family, char *addrp, int len) + struct avc_audit_data *ad, u32 sock_sid, u16 sock_class, + u16 family, char *addrp, int len) { int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0; - struct socket *sock; - u16 sock_class = 0; - u32 sock_sid = 0; - - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (sock) { - struct inode *inode; - inode = SOCK_INODE(sock); - if (inode) { - struct inode_security_struct *isec; - isec = inode->i_security; - sock_sid = isec->sid; - sock_class = isec->sclass; - } - } - read_unlock_bh(&sk->sk_callback_lock); - if (!sock_sid) - goto out; if (!skb->dev) goto out; @@ -3458,10 +3418,12 @@ static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { u16 family; + u16 sock_class = 0; char *addrp; int len, err = 0; + u32 sock_sid = 0; + struct socket *sock; struct avc_audit_data ad; - struct sk_security_struct *sksec = sk->sk_security; family = sk->sk_family; if (family != PF_INET && family != PF_INET6) @@ -3471,6 +3433,22 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP)) family = PF_INET; + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (sock) { + struct inode *inode; + inode = SOCK_INODE(sock); + if (inode) { + struct inode_security_struct *isec; + isec = inode->i_security; + sock_sid = isec->sid; + sock_class = isec->sclass; + } + } + read_unlock_bh(&sk->sk_callback_lock); + if (!sock_sid) + goto out; + AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]"; ad.u.net.family = family; @@ -3480,19 +3458,16 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; if (selinux_compat_net) - err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family, + err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid, + sock_class, family, addrp, len); else - err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) goto out; - err = selinux_netlbl_sock_rcv_skb(sksec, skb, &ad); - if (err) - goto out; - - err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); + err = selinux_xfrm_sock_rcv_skb(sock_sid, skb); out: return err; } @@ -3515,9 +3490,8 @@ static int selinux_socket_getpeersec_stream(struct socket *sock, char __user *op peer_sid = ssec->peer_sid; } else if (isec->sclass == SECCLASS_TCP_SOCKET) { - peer_sid = selinux_netlbl_socket_getpeersec_stream(sock); - if (peer_sid == SECSID_NULL) - peer_sid = selinux_socket_getpeer_stream(sock->sk); + peer_sid = selinux_socket_getpeer_stream(sock->sk); + if (peer_sid == SECSID_NULL) { err = -ENOPROTOOPT; goto out; @@ -3557,11 +3531,8 @@ static int selinux_socket_getpeersec_dgram(struct socket *sock, struct sk_buff * if (sock && (sock->sk->sk_family == PF_UNIX)) selinux_get_inode_sid(SOCK_INODE(sock), &peer_secid); - else if (skb) { - peer_secid = selinux_netlbl_socket_getpeersec_dgram(skb); - if (peer_secid == SECSID_NULL) - peer_secid = selinux_socket_getpeer_dgram(skb); - } + else if (skb) + peer_secid = selinux_socket_getpeer_dgram(skb); if (peer_secid == SECSID_NULL) err = -EINVAL; @@ -3580,86 +3551,22 @@ static void selinux_sk_free_security(struct sock *sk) sk_free_security(sk); } -static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) +static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) { - struct sk_security_struct *ssec = sk->sk_security; - struct sk_security_struct *newssec = newsk->sk_security; - - newssec->sid = ssec->sid; - newssec->peer_sid = ssec->peer_sid; - - selinux_netlbl_sk_clone_security(ssec, newssec); -} + struct inode_security_struct *isec; + u32 sock_sid = SECINITSID_ANY_SOCKET; -static void selinux_sk_getsecid(struct sock *sk, u32 *secid) -{ if (!sk) - *secid = SECINITSID_ANY_SOCKET; - else { - struct sk_security_struct *sksec = sk->sk_security; - - *secid = sksec->sid; - } -} - -static void selinux_sock_graft(struct sock* sk, struct socket *parent) -{ - struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; - struct sk_security_struct *sksec = sk->sk_security; - - isec->sid = sksec->sid; - - selinux_netlbl_sock_graft(sk, parent); -} - -static int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, - struct request_sock *req) -{ - struct sk_security_struct *sksec = sk->sk_security; - int err; - u32 newsid; - u32 peersid; - - newsid = selinux_netlbl_inet_conn_request(skb, sksec->sid); - if (newsid != SECSID_NULL) { - req->secid = newsid; - return 0; - } - - err = selinux_xfrm_decode_session(skb, &peersid, 0); - BUG_ON(err); - - if (peersid == SECSID_NULL) { - req->secid = sksec->sid; - return 0; - } - - err = security_sid_mls_copy(sksec->sid, peersid, &newsid); - if (err) - return err; - - req->secid = newsid; - return 0; -} - -static void selinux_inet_csk_clone(struct sock *newsk, - const struct request_sock *req) -{ - struct sk_security_struct *newsksec = newsk->sk_security; + return selinux_no_sk_sid(fl); - newsksec->sid = req->secid; - /* NOTE: Ideally, we should also get the isec->sid for the - new socket in sync, but we don't have the isec available yet. - So we will wait until sock_graft to do it, by which - time it will have been created and available. */ + read_lock_bh(&sk->sk_callback_lock); + isec = get_sock_isec(sk); - selinux_netlbl_sk_security_init(newsksec, req->rsk_ops->family); -} + if (isec) + sock_sid = isec->sid; -static void selinux_req_classify_flow(const struct request_sock *req, - struct flowi *fl) -{ - fl->secid = req->secid; + read_unlock_bh(&sk->sk_callback_lock); + return sock_sid; } static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) @@ -3701,24 +3608,12 @@ static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_NETFILTER static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev, + struct inode_security_struct *isec, struct avc_audit_data *ad, u16 family, char *addrp, int len) { - int err = 0; + int err; u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0; - struct socket *sock; - struct inode *inode; - struct inode_security_struct *isec; - - sock = sk->sk_socket; - if (!sock) - goto out; - - inode = SOCK_INODE(sock); - if (!inode) - goto out; - - isec = inode->i_security; err = sel_netif_sids(dev, &if_sid, NULL); if (err) @@ -3783,16 +3678,26 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; + struct socket *sock; + struct inode *inode; struct sk_buff *skb = *pskb; + struct inode_security_struct *isec; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; - struct sk_security_struct *sksec; sk = skb->sk; if (!sk) goto out; - sksec = sk->sk_security; + sock = sk->sk_socket; + if (!sock) + goto out; + + inode = SOCK_INODE(sock); + if (!inode) + goto out; + + isec = inode->i_security; AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = dev->name; @@ -3803,16 +3708,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, goto out; if (selinux_compat_net) - err = selinux_ip_postroute_last_compat(sk, dev, &ad, + err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad, family, addrp, len); else - err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad); if (err) goto out; - err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad); + err = selinux_xfrm_postroute_last(isec->sid, skb); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4713,12 +4618,7 @@ static struct security_operations selinux_ops = { .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, - .sk_clone_security = selinux_sk_clone_security, - .sk_getsecid = selinux_sk_getsecid, - .sock_graft = selinux_sock_graft, - .inet_conn_request = selinux_inet_conn_request, - .inet_csk_clone = selinux_inet_csk_clone, - .req_classify_flow = selinux_req_classify_flow, + .sk_getsid = selinux_sk_getsid_security, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, @@ -4729,9 +4629,6 @@ static struct security_operations selinux_ops = { .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, - .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, - .xfrm_flow_state_match = selinux_xfrm_flow_state_match, - .xfrm_decode_session = selinux_xfrm_decode_session, #endif #ifdef CONFIG_KEYS diff --git a/trunk/security/selinux/include/av_perm_to_string.h b/trunk/security/selinux/include/av_perm_to_string.h index 09fc8a2345eb..7c9b58380833 100644 --- a/trunk/security/selinux/include/av_perm_to_string.h +++ b/trunk/security/selinux/include/av_perm_to_string.h @@ -241,7 +241,6 @@ S_(SECCLASS_ASSOCIATION, ASSOCIATION__SENDTO, "sendto") S_(SECCLASS_ASSOCIATION, ASSOCIATION__RECVFROM, "recvfrom") S_(SECCLASS_ASSOCIATION, ASSOCIATION__SETCONTEXT, "setcontext") - S_(SECCLASS_ASSOCIATION, ASSOCIATION__POLMATCH, "polmatch") S_(SECCLASS_PACKET, PACKET__SEND, "send") S_(SECCLASS_PACKET, PACKET__RECV, "recv") S_(SECCLASS_PACKET, PACKET__RELABELTO, "relabelto") diff --git a/trunk/security/selinux/include/av_permissions.h b/trunk/security/selinux/include/av_permissions.h index 81f4f526c8b1..69fd4b48202c 100644 --- a/trunk/security/selinux/include/av_permissions.h +++ b/trunk/security/selinux/include/av_permissions.h @@ -911,7 +911,6 @@ #define ASSOCIATION__SENDTO 0x00000001UL #define ASSOCIATION__RECVFROM 0x00000002UL #define ASSOCIATION__SETCONTEXT 0x00000004UL -#define ASSOCIATION__POLMATCH 0x00000008UL #define NETLINK_KOBJECT_UEVENT_SOCKET__IOCTL 0x00000001UL #define NETLINK_KOBJECT_UEVENT_SOCKET__READ 0x00000002UL diff --git a/trunk/security/selinux/include/objsec.h b/trunk/security/selinux/include/objsec.h index 0a39bfd1319f..940178865fc7 100644 --- a/trunk/security/selinux/include/objsec.h +++ b/trunk/security/selinux/include/objsec.h @@ -99,16 +99,7 @@ struct netif_security_struct { struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ - u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ -#ifdef CONFIG_NETLABEL - u16 sclass; /* sock security class */ - enum { /* NetLabel state */ - NLBL_UNSET = 0, - NLBL_REQUIRE, - NLBL_LABELED, - } nlbl_state; -#endif }; struct key_security_struct { diff --git a/trunk/security/selinux/include/security.h b/trunk/security/selinux/include/security.h index 911954a692fa..063af47bb231 100644 --- a/trunk/security/selinux/include/security.h +++ b/trunk/security/selinux/include/security.h @@ -78,8 +78,6 @@ int security_node_sid(u16 domain, void *addr, u32 addrlen, int security_validate_transition(u32 oldsid, u32 newsid, u32 tasksid, u16 tclass); -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid); - #define SECURITY_FS_USE_XATTR 1 /* use xattr */ #define SECURITY_FS_USE_TRANS 2 /* use transition SIDs, e.g. devpts/tmpfs */ #define SECURITY_FS_USE_TASK 3 /* use task SIDs, e.g. pipefs/sockfs */ diff --git a/trunk/security/selinux/include/selinux_netlabel.h b/trunk/security/selinux/include/selinux_netlabel.h deleted file mode 100644 index ecab4bddaaf4..000000000000 --- a/trunk/security/selinux/include/selinux_netlabel.h +++ /dev/null @@ -1,119 +0,0 @@ -/* - * SELinux interface to the NetLabel subsystem - * - * Author : Paul Moore - * - */ - -/* - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See - * the GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - */ - -#ifndef _SELINUX_NETLABEL_H_ -#define _SELINUX_NETLABEL_H_ - -#include -#include -#include -#include -#include - -#include "avc.h" -#include "objsec.h" - -#ifdef CONFIG_NETLABEL -void selinux_netlbl_cache_invalidate(void); -int selinux_netlbl_socket_post_create(struct socket *sock, - int sock_family, - u32 sid); -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock); -u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid); -int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, - struct sk_buff *skb, - struct avc_audit_data *ad); -u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock); -u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb); -void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec, - int family); -void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec, - struct sk_security_struct *newssec); -int selinux_netlbl_inode_permission(struct inode *inode, int mask); -#else -static inline void selinux_netlbl_cache_invalidate(void) -{ - return; -} - -static inline int selinux_netlbl_socket_post_create(struct socket *sock, - int sock_family, - u32 sid) -{ - return 0; -} - -static inline void selinux_netlbl_sock_graft(struct sock *sk, - struct socket *sock) -{ - return; -} - -static inline u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, - u32 sock_sid) -{ - return SECSID_NULL; -} - -static inline int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, - struct sk_buff *skb, - struct avc_audit_data *ad) -{ - return 0; -} - -static inline u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) -{ - return SECSID_NULL; -} - -static inline u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) -{ - return SECSID_NULL; -} - -static inline void selinux_netlbl_sk_security_init( - struct sk_security_struct *ssec, - int family) -{ - return; -} - -static inline void selinux_netlbl_sk_clone_security( - struct sk_security_struct *ssec, - struct sk_security_struct *newssec) -{ - return; -} - -static inline int selinux_netlbl_inode_permission(struct inode *inode, - int mask) -{ - return 0; -} -#endif /* CONFIG_NETLABEL */ - -#endif diff --git a/trunk/security/selinux/include/xfrm.h b/trunk/security/selinux/include/xfrm.h index 81eb59890162..c96498a10eb8 100644 --- a/trunk/security/selinux/include/xfrm.h +++ b/trunk/security/selinux/include/xfrm.h @@ -2,25 +2,18 @@ * SELinux support for the XFRM LSM hooks * * Author : Trent Jaeger, - * Updated : Venkat Yekkirala, */ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); -int selinux_xfrm_state_alloc(struct xfrm_state *x, - struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid); +int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); -int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, - struct xfrm_policy *xp, struct flowi *fl); -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); - +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir); /* * Extract the security blob from the sock (it's actually on the socket) @@ -33,23 +26,30 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk) return SOCK_INODE(sk->sk_socket)->i_security; } + +static inline u32 selinux_no_sk_sid(struct flowi *fl) +{ + /* NOTE: no sock occurs on ICMP reply, forwards, ... */ + /* icmp_reply: authorize as kernel packet */ + if (fl && fl->proto == IPPROTO_ICMP) { + return SECINITSID_KERNEL; + } + + return SECINITSID_ANY_SOCKET; +} + #ifdef CONFIG_SECURITY_NETWORK_XFRM -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, - struct avc_audit_data *ad); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad); +int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb); +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb); u32 selinux_socket_getpeer_stream(struct sock *sk); u32 selinux_socket_getpeer_dgram(struct sk_buff *skb); -int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall); #else -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) +static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) +static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) { return 0; } @@ -63,11 +63,6 @@ static inline int selinux_socket_getpeer_dgram(struct sk_buff *skb) { return SECSID_NULL; } -static inline int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) -{ - *sid = SECSID_NULL; - return 0; -} #endif #endif /* _SELINUX_XFRM_H_ */ diff --git a/trunk/security/selinux/ss/ebitmap.c b/trunk/security/selinux/ss/ebitmap.c index cfed1d30fa6a..47024a6e1844 100644 --- a/trunk/security/selinux/ss/ebitmap.c +++ b/trunk/security/selinux/ss/ebitmap.c @@ -3,14 +3,6 @@ * * Author : Stephen Smalley, */ -/* - * Updated: Hewlett-Packard - * - * Added ebitmap_export() and ebitmap_import() - * - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - */ - #include #include #include @@ -67,138 +59,6 @@ int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src) return 0; } -/** - * ebitmap_export - Export an ebitmap to a unsigned char bitmap string - * @src: the ebitmap to export - * @dst: the resulting bitmap string - * @dst_len: length of dst in bytes - * - * Description: - * Allocate a buffer at least src->highbit bits long and export the extensible - * bitmap into the buffer. The bitmap string will be in little endian format, - * i.e. LSB first. The value returned in dst_len may not the true size of the - * buffer as the length of the buffer is rounded up to a multiple of MAPTYPE. - * The caller must free the buffer when finished. Returns zero on success, - * negative values on failure. - * - */ -int ebitmap_export(const struct ebitmap *src, - unsigned char **dst, - size_t *dst_len) -{ - size_t bitmap_len; - unsigned char *bitmap; - struct ebitmap_node *iter_node; - MAPTYPE node_val; - size_t bitmap_byte; - unsigned char bitmask; - - bitmap_len = src->highbit / 8; - if (src->highbit % 7) - bitmap_len += 1; - if (bitmap_len == 0) - return -EINVAL; - - bitmap = kzalloc((bitmap_len & ~(sizeof(MAPTYPE) - 1)) + - sizeof(MAPTYPE), - GFP_ATOMIC); - if (bitmap == NULL) - return -ENOMEM; - - iter_node = src->node; - do { - bitmap_byte = iter_node->startbit / 8; - bitmask = 0x80; - node_val = iter_node->map; - do { - if (bitmask == 0) { - bitmap_byte++; - bitmask = 0x80; - } - if (node_val & (MAPTYPE)0x01) - bitmap[bitmap_byte] |= bitmask; - node_val >>= 1; - bitmask >>= 1; - } while (node_val > 0); - iter_node = iter_node->next; - } while (iter_node); - - *dst = bitmap; - *dst_len = bitmap_len; - return 0; -} - -/** - * ebitmap_import - Import an unsigned char bitmap string into an ebitmap - * @src: the bitmap string - * @src_len: the bitmap length in bytes - * @dst: the empty ebitmap - * - * Description: - * This function takes a little endian bitmap string in src and imports it into - * the ebitmap pointed to by dst. Returns zero on success, negative values on - * failure. - * - */ -int ebitmap_import(const unsigned char *src, - size_t src_len, - struct ebitmap *dst) -{ - size_t src_off = 0; - size_t node_limit; - struct ebitmap_node *node_new; - struct ebitmap_node *node_last = NULL; - u32 i_byte; - u32 i_bit; - unsigned char src_byte; - - while (src_off < src_len) { - if (src_len - src_off >= sizeof(MAPTYPE)) { - if (*(MAPTYPE *)&src[src_off] == 0) { - src_off += sizeof(MAPTYPE); - continue; - } - node_limit = sizeof(MAPTYPE); - } else { - for (src_byte = 0, i_byte = src_off; - i_byte < src_len && src_byte == 0; - i_byte++) - src_byte |= src[i_byte]; - if (src_byte == 0) - break; - node_limit = src_len - src_off; - } - - node_new = kzalloc(sizeof(*node_new), GFP_ATOMIC); - if (unlikely(node_new == NULL)) { - ebitmap_destroy(dst); - return -ENOMEM; - } - node_new->startbit = src_off * 8; - for (i_byte = 0; i_byte < node_limit; i_byte++) { - src_byte = src[src_off++]; - for (i_bit = i_byte * 8; src_byte != 0; i_bit++) { - if (src_byte & 0x80) - node_new->map |= MAPBIT << i_bit; - src_byte <<= 1; - } - } - - if (node_last != NULL) - node_last->next = node_new; - else - dst->node = node_new; - node_last = node_new; - } - - if (likely(node_last != NULL)) - dst->highbit = node_last->startbit + MAPSIZE; - else - ebitmap_init(dst); - - return 0; -} - int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2) { struct ebitmap_node *n1, *n2; diff --git a/trunk/security/selinux/ss/ebitmap.h b/trunk/security/selinux/ss/ebitmap.h index da2d4651b10d..8bf41055a6cb 100644 --- a/trunk/security/selinux/ss/ebitmap.h +++ b/trunk/security/selinux/ss/ebitmap.h @@ -69,12 +69,6 @@ static inline int ebitmap_node_get_bit(struct ebitmap_node * n, int ebitmap_cmp(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_cpy(struct ebitmap *dst, struct ebitmap *src); -int ebitmap_export(const struct ebitmap *src, - unsigned char **dst, - size_t *dst_len); -int ebitmap_import(const unsigned char *src, - size_t src_len, - struct ebitmap *dst); int ebitmap_contains(struct ebitmap *e1, struct ebitmap *e2); int ebitmap_get_bit(struct ebitmap *e, unsigned long bit); int ebitmap_set_bit(struct ebitmap *e, unsigned long bit, int value); diff --git a/trunk/security/selinux/ss/mls.c b/trunk/security/selinux/ss/mls.c index 119bd6078ba1..7bc5b6440f70 100644 --- a/trunk/security/selinux/ss/mls.c +++ b/trunk/security/selinux/ss/mls.c @@ -10,13 +10,6 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ -/* - * Updated: Hewlett-Packard - * - * Added support to import/export the MLS label - * - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - */ #include #include @@ -218,6 +211,26 @@ int mls_context_isvalid(struct policydb *p, struct context *c) return 1; } +/* + * Copies the MLS range from `src' into `dst'. + */ +static inline int mls_copy_context(struct context *dst, + struct context *src) +{ + int l, rc = 0; + + /* Copy the MLS range from the source context */ + for (l = 0; l < 2; l++) { + dst->range.level[l].sens = src->range.level[l].sens; + rc = ebitmap_cpy(&dst->range.level[l].cat, + &src->range.level[l].cat); + if (rc) + break; + } + + return rc; +} + /* * Set the MLS fields in the security context structure * `context' based on the string representation in @@ -572,152 +585,3 @@ int mls_compute_sid(struct context *scontext, return -EINVAL; } -/** - * mls_export_lvl - Export the MLS sensitivity levels - * @context: the security context - * @low: the low sensitivity level - * @high: the high sensitivity level - * - * Description: - * Given the security context copy the low MLS sensitivity level into lvl_low - * and the high sensitivity level in lvl_high. The MLS levels are only - * exported if the pointers are not NULL, if they are NULL then that level is - * not exported. - * - */ -void mls_export_lvl(const struct context *context, u32 *low, u32 *high) -{ - if (!selinux_mls_enabled) - return; - - if (low != NULL) - *low = context->range.level[0].sens - 1; - if (high != NULL) - *high = context->range.level[1].sens - 1; -} - -/** - * mls_import_lvl - Import the MLS sensitivity levels - * @context: the security context - * @low: the low sensitivity level - * @high: the high sensitivity level - * - * Description: - * Given the security context and the two sensitivty levels, set the MLS levels - * in the context according the two given as parameters. Returns zero on - * success, negative values on failure. - * - */ -void mls_import_lvl(struct context *context, u32 low, u32 high) -{ - if (!selinux_mls_enabled) - return; - - context->range.level[0].sens = low + 1; - context->range.level[1].sens = high + 1; -} - -/** - * mls_export_cat - Export the MLS categories - * @context: the security context - * @low: the low category - * @low_len: length of the cat_low bitmap in bytes - * @high: the high category - * @high_len: length of the cat_high bitmap in bytes - * - * Description: - * Given the security context export the low MLS category bitmap into cat_low - * and the high category bitmap into cat_high. The MLS categories are only - * exported if the pointers are not NULL, if they are NULL then that level is - * not exported. The caller is responsibile for freeing the memory when - * finished. Returns zero on success, negative values on failure. - * - */ -int mls_export_cat(const struct context *context, - unsigned char **low, - size_t *low_len, - unsigned char **high, - size_t *high_len) -{ - int rc = -EPERM; - - if (!selinux_mls_enabled) - return 0; - - if (low != NULL) { - rc = ebitmap_export(&context->range.level[0].cat, - low, - low_len); - if (rc != 0) - goto export_cat_failure; - } - if (high != NULL) { - rc = ebitmap_export(&context->range.level[1].cat, - high, - high_len); - if (rc != 0) - goto export_cat_failure; - } - - return 0; - -export_cat_failure: - if (low != NULL) - kfree(*low); - if (high != NULL) - kfree(*high); - return rc; -} - -/** - * mls_import_cat - Import the MLS categories - * @context: the security context - * @low: the low category - * @low_len: length of the cat_low bitmap in bytes - * @high: the high category - * @high_len: length of the cat_high bitmap in bytes - * - * Description: - * Given the security context and the two category bitmap strings import the - * categories into the security context. The MLS categories are only imported - * if the pointers are not NULL, if they are NULL they are skipped. Returns - * zero on success, negative values on failure. - * - */ -int mls_import_cat(struct context *context, - const unsigned char *low, - size_t low_len, - const unsigned char *high, - size_t high_len) -{ - int rc = -EPERM; - - if (!selinux_mls_enabled) - return 0; - - if (low != NULL) { - rc = ebitmap_import(low, - low_len, - &context->range.level[0].cat); - if (rc != 0) - goto import_cat_failure; - } - if (high != NULL) { - if (high == low) - rc = ebitmap_cpy(&context->range.level[1].cat, - &context->range.level[0].cat); - else - rc = ebitmap_import(high, - high_len, - &context->range.level[1].cat); - if (rc != 0) - goto import_cat_failure; - } - - return 0; - -import_cat_failure: - ebitmap_destroy(&context->range.level[0].cat); - ebitmap_destroy(&context->range.level[1].cat); - return rc; -} diff --git a/trunk/security/selinux/ss/mls.h b/trunk/security/selinux/ss/mls.h index df6032c6d492..fbb42f07dd7c 100644 --- a/trunk/security/selinux/ss/mls.h +++ b/trunk/security/selinux/ss/mls.h @@ -10,13 +10,6 @@ * * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. */ -/* - * Updated: Hewlett-Packard - * - * Added support to import/export the MLS label - * - * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 - */ #ifndef _SS_MLS_H_ #define _SS_MLS_H_ @@ -24,26 +17,6 @@ #include "context.h" #include "policydb.h" -/* - * Copies the MLS range from `src' into `dst'. - */ -static inline int mls_copy_context(struct context *dst, - struct context *src) -{ - int l, rc = 0; - - /* Copy the MLS range from the source context */ - for (l = 0; l < 2; l++) { - dst->range.level[l].sens = src->range.level[l].sens; - rc = ebitmap_cpy(&dst->range.level[l].cat, - &src->range.level[l].cat); - if (rc) - break; - } - - return rc; -} - int mls_compute_context_len(struct context *context); void mls_sid_to_context(struct context *context, char **scontext); int mls_context_isvalid(struct policydb *p, struct context *c); @@ -69,19 +42,5 @@ int mls_compute_sid(struct context *scontext, int mls_setup_user_range(struct context *fromcon, struct user_datum *user, struct context *usercon); -void mls_export_lvl(const struct context *context, u32 *low, u32 *high); -void mls_import_lvl(struct context *context, u32 low, u32 high); - -int mls_export_cat(const struct context *context, - unsigned char **low, - size_t *low_len, - unsigned char **high, - size_t *high_len); -int mls_import_cat(struct context *context, - const unsigned char *low, - size_t low_len, - const unsigned char *high, - size_t high_len); - #endif /* _SS_MLS_H */ diff --git a/trunk/security/selinux/ss/services.c b/trunk/security/selinux/ss/services.c index 7eb69a602d8f..85e429884393 100644 --- a/trunk/security/selinux/ss/services.c +++ b/trunk/security/selinux/ss/services.c @@ -13,11 +13,6 @@ * * Added conditional policy language extensions * - * Updated: Hewlett-Packard - * - * Added support for NetLabel - * - * Copyright (C) 2006 Hewlett-Packard Development Company, L.P. * Copyright (C) 2004-2006 Trusted Computer Solutions, Inc. * Copyright (C) 2003 - 2004 Tresys Technology, LLC * Copyright (C) 2003 Red Hat, Inc., James Morris @@ -34,8 +29,6 @@ #include #include #include -#include -#include #include "flask.h" #include "avc.h" @@ -47,8 +40,6 @@ #include "services.h" #include "conditional.h" #include "mls.h" -#include "objsec.h" -#include "selinux_netlabel.h" extern void selnl_notify_policyload(u32 seqno); unsigned int policydb_loaded_version; @@ -1250,7 +1241,6 @@ int security_load_policy(void *data, size_t len) selinux_complete_init(); avc_ss_reset(seqno); selnl_notify_policyload(seqno); - selinux_netlbl_cache_invalidate(); return 0; } @@ -1305,7 +1295,6 @@ int security_load_policy(void *data, size_t len) avc_ss_reset(seqno); selnl_notify_policyload(seqno); - selinux_netlbl_cache_invalidate(); return 0; @@ -1828,75 +1817,6 @@ int security_get_bool_value(int bool) return rc; } -/* - * security_sid_mls_copy() - computes a new sid based on the given - * sid and the mls portion of mls_sid. - */ -int security_sid_mls_copy(u32 sid, u32 mls_sid, u32 *new_sid) -{ - struct context *context1; - struct context *context2; - struct context newcon; - char *s; - u32 len; - int rc = 0; - - if (!ss_initialized || !selinux_mls_enabled) { - *new_sid = sid; - goto out; - } - - context_init(&newcon); - - POLICY_RDLOCK; - context1 = sidtab_search(&sidtab, sid); - if (!context1) { - printk(KERN_ERR "security_sid_mls_copy: unrecognized SID " - "%d\n", sid); - rc = -EINVAL; - goto out_unlock; - } - - context2 = sidtab_search(&sidtab, mls_sid); - if (!context2) { - printk(KERN_ERR "security_sid_mls_copy: unrecognized SID " - "%d\n", mls_sid); - rc = -EINVAL; - goto out_unlock; - } - - newcon.user = context1->user; - newcon.role = context1->role; - newcon.type = context1->type; - rc = mls_copy_context(&newcon, context2); - if (rc) - goto out_unlock; - - - /* Check the validity of the new context. */ - if (!policydb_context_isvalid(&policydb, &newcon)) { - rc = convert_context_handle_invalid_context(&newcon); - if (rc) - goto bad; - } - - rc = sidtab_context_to_sid(&sidtab, &newcon, new_sid); - goto out_unlock; - -bad: - if (!context_struct_to_string(&newcon, &s, &len)) { - audit_log(current->audit_context, GFP_ATOMIC, AUDIT_SELINUX_ERR, - "security_sid_mls_copy: invalid context %s", s); - kfree(s); - } - -out_unlock: - POLICY_RDUNLOCK; - context_destroy(&newcon); -out: - return rc; -} - struct selinux_audit_rule { u32 au_seqno; struct context au_ctxt; @@ -2144,536 +2064,3 @@ void selinux_audit_set_callback(int (*callback)(void)) { aurule_callback = callback; } - -#ifdef CONFIG_NETLABEL -/* - * This is the structure we store inside the NetLabel cache block. - */ -#define NETLBL_CACHE(x) ((struct netlbl_cache *)(x)) -#define NETLBL_CACHE_T_NONE 0 -#define NETLBL_CACHE_T_SID 1 -#define NETLBL_CACHE_T_MLS 2 -struct netlbl_cache { - u32 type; - union { - u32 sid; - struct mls_range mls_label; - } data; -}; - -/** - * selinux_netlbl_cache_free - Free the NetLabel cached data - * @data: the data to free - * - * Description: - * This function is intended to be used as the free() callback inside the - * netlbl_lsm_cache structure. - * - */ -static void selinux_netlbl_cache_free(const void *data) -{ - struct netlbl_cache *cache = NETLBL_CACHE(data); - switch (cache->type) { - case NETLBL_CACHE_T_MLS: - ebitmap_destroy(&cache->data.mls_label.level[0].cat); - break; - } - kfree(data); -} - -/** - * selinux_netlbl_cache_add - Add an entry to the NetLabel cache - * @skb: the packet - * @ctx: the SELinux context - * - * Description: - * Attempt to cache the context in @ctx, which was derived from the packet in - * @skb, in the NetLabel subsystem cache. - * - */ -static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx) -{ - struct netlbl_cache *cache = NULL; - struct netlbl_lsm_secattr secattr; - - netlbl_secattr_init(&secattr); - - cache = kzalloc(sizeof(*cache), GFP_ATOMIC); - if (cache == NULL) - goto netlbl_cache_add_failure; - secattr.cache.free = selinux_netlbl_cache_free; - secattr.cache.data = (void *)cache; - - cache->type = NETLBL_CACHE_T_MLS; - if (ebitmap_cpy(&cache->data.mls_label.level[0].cat, - &ctx->range.level[0].cat) != 0) - goto netlbl_cache_add_failure; - cache->data.mls_label.level[1].cat.highbit = - cache->data.mls_label.level[0].cat.highbit; - cache->data.mls_label.level[1].cat.node = - cache->data.mls_label.level[0].cat.node; - cache->data.mls_label.level[0].sens = ctx->range.level[0].sens; - cache->data.mls_label.level[1].sens = ctx->range.level[0].sens; - - if (netlbl_cache_add(skb, &secattr) != 0) - goto netlbl_cache_add_failure; - - return; - -netlbl_cache_add_failure: - netlbl_secattr_destroy(&secattr, 1); -} - -/** - * selinux_netlbl_cache_invalidate - Invalidate the NetLabel cache - * - * Description: - * Invalidate the NetLabel security attribute mapping cache. - * - */ -void selinux_netlbl_cache_invalidate(void) -{ - netlbl_cache_invalidate(); -} - -/** - * selinux_netlbl_secattr_to_sid - Convert a NetLabel secattr to a SELinux SID - * @skb: the network packet - * @secattr: the NetLabel packet security attributes - * @base_sid: the SELinux SID to use as a context for MLS only attributes - * @sid: the SELinux SID - * - * Description: - * Convert the given NetLabel packet security attributes in @secattr into a - * SELinux SID. If the @secattr field does not contain a full SELinux - * SID/context then use the context in @base_sid as the foundation. If @skb - * is not NULL attempt to cache as much data as possibile. Returns zero on - * success, negative values on failure. - * - */ -static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb, - struct netlbl_lsm_secattr *secattr, - u32 base_sid, - u32 *sid) -{ - int rc = -EIDRM; - struct context *ctx; - struct context ctx_new; - struct netlbl_cache *cache; - - POLICY_RDLOCK; - - if (secattr->cache.data) { - cache = NETLBL_CACHE(secattr->cache.data); - switch (cache->type) { - case NETLBL_CACHE_T_SID: - *sid = cache->data.sid; - rc = 0; - break; - case NETLBL_CACHE_T_MLS: - ctx = sidtab_search(&sidtab, base_sid); - if (ctx == NULL) - goto netlbl_secattr_to_sid_return; - - ctx_new.user = ctx->user; - ctx_new.role = ctx->role; - ctx_new.type = ctx->type; - ctx_new.range.level[0].sens = - cache->data.mls_label.level[0].sens; - ctx_new.range.level[0].cat.highbit = - cache->data.mls_label.level[0].cat.highbit; - ctx_new.range.level[0].cat.node = - cache->data.mls_label.level[0].cat.node; - ctx_new.range.level[1].sens = - cache->data.mls_label.level[1].sens; - ctx_new.range.level[1].cat.highbit = - cache->data.mls_label.level[1].cat.highbit; - ctx_new.range.level[1].cat.node = - cache->data.mls_label.level[1].cat.node; - - rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); - break; - default: - goto netlbl_secattr_to_sid_return; - } - } else if (secattr->mls_lvl_vld) { - ctx = sidtab_search(&sidtab, base_sid); - if (ctx == NULL) - goto netlbl_secattr_to_sid_return; - - ctx_new.user = ctx->user; - ctx_new.role = ctx->role; - ctx_new.type = ctx->type; - mls_import_lvl(&ctx_new, secattr->mls_lvl, secattr->mls_lvl); - if (secattr->mls_cat) { - if (mls_import_cat(&ctx_new, - secattr->mls_cat, - secattr->mls_cat_len, - NULL, - 0) != 0) - goto netlbl_secattr_to_sid_return; - ctx_new.range.level[1].cat.highbit = - ctx_new.range.level[0].cat.highbit; - ctx_new.range.level[1].cat.node = - ctx_new.range.level[0].cat.node; - } else { - ebitmap_init(&ctx_new.range.level[0].cat); - ebitmap_init(&ctx_new.range.level[1].cat); - } - if (mls_context_isvalid(&policydb, &ctx_new) != 1) - goto netlbl_secattr_to_sid_return_cleanup; - - rc = sidtab_context_to_sid(&sidtab, &ctx_new, sid); - if (rc != 0) - goto netlbl_secattr_to_sid_return_cleanup; - - if (skb != NULL) - selinux_netlbl_cache_add(skb, &ctx_new); - ebitmap_destroy(&ctx_new.range.level[0].cat); - } else { - *sid = SECINITSID_UNLABELED; - rc = 0; - } - -netlbl_secattr_to_sid_return: - POLICY_RDUNLOCK; - return rc; -netlbl_secattr_to_sid_return_cleanup: - ebitmap_destroy(&ctx_new.range.level[0].cat); - goto netlbl_secattr_to_sid_return; -} - -/** - * selinux_netlbl_skbuff_getsid - Get the sid of a packet using NetLabel - * @skb: the packet - * @base_sid: the SELinux SID to use as a context for MLS only attributes - * @sid: the SID - * - * Description: - * Call the NetLabel mechanism to get the security attributes of the given - * packet and use those attributes to determine the correct context/SID to - * assign to the packet. Returns zero on success, negative values on failure. - * - */ -static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb, - u32 base_sid, - u32 *sid) -{ - int rc; - struct netlbl_lsm_secattr secattr; - - netlbl_secattr_init(&secattr); - rc = netlbl_skbuff_getattr(skb, &secattr); - if (rc == 0) - rc = selinux_netlbl_secattr_to_sid(skb, - &secattr, - base_sid, - sid); - netlbl_secattr_destroy(&secattr, 0); - - return rc; -} - -/** - * selinux_netlbl_socket_setsid - Label a socket using the NetLabel mechanism - * @sock: the socket to label - * @sid: the SID to use - * - * Description: - * Attempt to label a socket using the NetLabel mechanism using the given - * SID. Returns zero values on success, negative values on failure. - * - */ -static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid) -{ - int rc = -ENOENT; - struct sk_security_struct *sksec = sock->sk->sk_security; - struct netlbl_lsm_secattr secattr; - struct context *ctx; - - if (!ss_initialized) - return 0; - - POLICY_RDLOCK; - - ctx = sidtab_search(&sidtab, sid); - if (ctx == NULL) - goto netlbl_socket_setsid_return; - - netlbl_secattr_init(&secattr); - secattr.domain = kstrdup(policydb.p_type_val_to_name[ctx->type - 1], - GFP_ATOMIC); - mls_export_lvl(ctx, &secattr.mls_lvl, NULL); - secattr.mls_lvl_vld = 1; - mls_export_cat(ctx, - &secattr.mls_cat, - &secattr.mls_cat_len, - NULL, - NULL); - - rc = netlbl_socket_setattr(sock, &secattr); - if (rc == 0) - sksec->nlbl_state = NLBL_LABELED; - - netlbl_secattr_destroy(&secattr, 0); - -netlbl_socket_setsid_return: - POLICY_RDUNLOCK; - return rc; -} - -/** - * selinux_netlbl_sk_security_init - Setup the NetLabel fields - * @ssec: the sk_security_struct - * @family: the socket family - * - * Description: - * Called when a new sk_security_struct is allocated to initialize the NetLabel - * fields. - * - */ -void selinux_netlbl_sk_security_init(struct sk_security_struct *ssec, - int family) -{ - if (family == PF_INET) - ssec->nlbl_state = NLBL_REQUIRE; - else - ssec->nlbl_state = NLBL_UNSET; -} - -/** - * selinux_netlbl_sk_clone_security - Copy the NetLabel fields - * @ssec: the original sk_security_struct - * @newssec: the cloned sk_security_struct - * - * Description: - * Clone the NetLabel specific sk_security_struct fields from @ssec to - * @newssec. - * - */ -void selinux_netlbl_sk_clone_security(struct sk_security_struct *ssec, - struct sk_security_struct *newssec) -{ - newssec->sclass = ssec->sclass; - if (ssec->nlbl_state != NLBL_UNSET) - newssec->nlbl_state = NLBL_REQUIRE; - else - newssec->nlbl_state = NLBL_UNSET; -} - -/** - * selinux_netlbl_socket_post_create - Label a socket using NetLabel - * @sock: the socket to label - * @sock_family: the socket family - * @sid: the SID to use - * - * Description: - * Attempt to label a socket using the NetLabel mechanism using the given - * SID. Returns zero values on success, negative values on failure. - * - */ -int selinux_netlbl_socket_post_create(struct socket *sock, - int sock_family, - u32 sid) -{ - struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; - struct sk_security_struct *sksec = sock->sk->sk_security; - - sksec->sclass = isec->sclass; - - if (sock_family != PF_INET) - return 0; - - sksec->nlbl_state = NLBL_REQUIRE; - return selinux_netlbl_socket_setsid(sock, sid); -} - -/** - * selinux_netlbl_sock_graft - Netlabel the new socket - * @sk: the new connection - * @sock: the new socket - * - * Description: - * The connection represented by @sk is being grafted onto @sock so set the - * socket's NetLabel to match the SID of @sk. - * - */ -void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) -{ - struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; - struct sk_security_struct *sksec = sk->sk_security; - - sksec->sclass = isec->sclass; - - if (sk->sk_family != PF_INET) - return; - - sksec->nlbl_state = NLBL_REQUIRE; - sksec->peer_sid = sksec->sid; - - /* Try to set the NetLabel on the socket to save time later, if we fail - * here we will pick up the pieces in later calls to - * selinux_netlbl_inode_permission(). */ - selinux_netlbl_socket_setsid(sock, sksec->sid); -} - -/** - * selinux_netlbl_inet_conn_request - Handle a new connection request - * @skb: the packet - * @sock_sid: the SID of the parent socket - * - * Description: - * If present, use the security attributes of the packet in @skb and the - * parent sock's SID to arrive at a SID for the new child sock. Returns the - * SID of the connection or SECSID_NULL on failure. - * - */ -u32 selinux_netlbl_inet_conn_request(struct sk_buff *skb, u32 sock_sid) -{ - int rc; - u32 peer_sid; - - rc = selinux_netlbl_skbuff_getsid(skb, sock_sid, &peer_sid); - if (rc != 0) - return SECSID_NULL; - - if (peer_sid == SECINITSID_UNLABELED) - return SECSID_NULL; - - return peer_sid; -} - -/** - * selinux_netlbl_inode_permission - Verify the socket is NetLabel labeled - * @inode: the file descriptor's inode - * @mask: the permission mask - * - * Description: - * Looks at a file's inode and if it is marked as a socket protected by - * NetLabel then verify that the socket has been labeled, if not try to label - * the socket now with the inode's SID. Returns zero on success, negative - * values on failure. - * - */ -int selinux_netlbl_inode_permission(struct inode *inode, int mask) -{ - int rc; - struct inode_security_struct *isec; - struct sk_security_struct *sksec; - struct socket *sock; - - if (!S_ISSOCK(inode->i_mode)) - return 0; - - sock = SOCKET_I(inode); - isec = inode->i_security; - sksec = sock->sk->sk_security; - down(&isec->sem); - if (unlikely(sksec->nlbl_state == NLBL_REQUIRE && - (mask & (MAY_WRITE | MAY_APPEND)))) { - lock_sock(sock->sk); - rc = selinux_netlbl_socket_setsid(sock, sksec->sid); - release_sock(sock->sk); - } else - rc = 0; - up(&isec->sem); - - return rc; -} - -/** - * selinux_netlbl_sock_rcv_skb - Do an inbound access check using NetLabel - * @sksec: the sock's sk_security_struct - * @skb: the packet - * @ad: the audit data - * - * Description: - * Fetch the NetLabel security attributes from @skb and perform an access check - * against the receiving socket. Returns zero on success, negative values on - * error. - * - */ -int selinux_netlbl_sock_rcv_skb(struct sk_security_struct *sksec, - struct sk_buff *skb, - struct avc_audit_data *ad) -{ - int rc; - u32 netlbl_sid; - u32 recv_perm; - - rc = selinux_netlbl_skbuff_getsid(skb, sksec->sid, &netlbl_sid); - if (rc != 0) - return rc; - - if (netlbl_sid == SECINITSID_UNLABELED) - return 0; - - switch (sksec->sclass) { - case SECCLASS_UDP_SOCKET: - recv_perm = UDP_SOCKET__RECV_MSG; - break; - case SECCLASS_TCP_SOCKET: - recv_perm = TCP_SOCKET__RECV_MSG; - break; - default: - recv_perm = RAWIP_SOCKET__RECV_MSG; - } - - rc = avc_has_perm(sksec->sid, - netlbl_sid, - sksec->sclass, - recv_perm, - ad); - if (rc == 0) - return 0; - - netlbl_skbuff_err(skb, rc); - return rc; -} - -/** - * selinux_netlbl_socket_getpeersec_stream - Return the connected peer's SID - * @sock: the socket - * - * Description: - * Examine @sock to find the connected peer's SID. Returns the SID on success - * or SECSID_NULL on error. - * - */ -u32 selinux_netlbl_socket_getpeersec_stream(struct socket *sock) -{ - struct sk_security_struct *sksec = sock->sk->sk_security; - - if (sksec->peer_sid == SECINITSID_UNLABELED) - return SECSID_NULL; - - return sksec->peer_sid; -} - -/** - * selinux_netlbl_socket_getpeersec_dgram - Return the SID of a NetLabel packet - * @skb: the packet - * - * Description: - * Examine @skb to find the SID assigned to it by NetLabel. Returns the SID on - * success, SECSID_NULL on error. - * - */ -u32 selinux_netlbl_socket_getpeersec_dgram(struct sk_buff *skb) -{ - int peer_sid; - struct sock *sk = skb->sk; - struct inode_security_struct *isec; - - if (sk == NULL || sk->sk_socket == NULL) - return SECSID_NULL; - - isec = SOCK_INODE(sk->sk_socket)->i_security; - if (selinux_netlbl_skbuff_getsid(skb, isec->sid, &peer_sid) != 0) - return SECSID_NULL; - if (peer_sid == SECINITSID_UNLABELED) - return SECSID_NULL; - - return peer_sid; -} -#endif /* CONFIG_NETLABEL */ diff --git a/trunk/security/selinux/xfrm.c b/trunk/security/selinux/xfrm.c index 3e742b850af6..6c985ced8102 100644 --- a/trunk/security/selinux/xfrm.c +++ b/trunk/security/selinux/xfrm.c @@ -6,12 +6,7 @@ * Authors: Serge Hallyn * Trent Jaeger * - * Updated: Venkat Yekkirala - * - * Granular IPSec Associations for use in MLS environments. - * * Copyright (C) 2005 International Business Machines Corporation - * Copyright (C) 2006 Trusted Computer Solutions, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -72,10 +67,10 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a flow can use - * a xfrm policy rule. + * LSM hook implementation that authorizes that a socket can be used + * with the corresponding xfrm_sec_ctx and direction. */ -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; @@ -89,130 +84,27 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) sel_sid = ctx->ctx_sid; } - rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__POLMATCH, + rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION, + ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM : + ((dir == FLOW_DIR_OUT) ? ASSOCIATION__SENDTO : + (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))), NULL); return rc; } -/* - * LSM hook implementation that authorizes that a state matches - * the given policy, flow combo. - */ - -int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, - struct flowi *fl) -{ - u32 state_sid; - u32 pol_sid; - int err; - - if (x->security) - state_sid = x->security->ctx_sid; - else - state_sid = SECINITSID_UNLABELED; - - if (xp->security) - pol_sid = xp->security->ctx_sid; - else - pol_sid = SECINITSID_UNLABELED; - - err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__POLMATCH, - NULL); - - if (err) - return 0; - - return selinux_xfrm_flow_state_match(fl, x); -} - -/* - * LSM hook implementation that authorizes that a particular outgoing flow - * can use a given security association. - */ - -int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) -{ - int rc = 0; - u32 sel_sid = SECINITSID_UNLABELED; - struct xfrm_sec_ctx *ctx; - - /* Context sid is either set to label or ANY_ASSOC */ - if ((ctx = xfrm->security)) { - if (!selinux_authorizable_ctx(ctx)) - return 0; - - sel_sid = ctx->ctx_sid; - } - - rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, - NULL)? 0:1; - - return rc; -} - -/* - * LSM hook implementation that determines the sid for the session. - */ - -int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) -{ - struct sec_path *sp; - - *sid = SECSID_NULL; - - if (skb == NULL) - return 0; - - sp = skb->sp; - if (sp) { - int i, sid_set = 0; - - for (i = sp->len-1; i >= 0; i--) { - struct xfrm_state *x = sp->xvec[i]; - if (selinux_authorizable_xfrm(x)) { - struct xfrm_sec_ctx *ctx = x->security; - - if (!sid_set) { - *sid = ctx->ctx_sid; - sid_set = 1; - - if (!ckall) - break; - } - else if (*sid != ctx->ctx_sid) - return -EINVAL; - } - } - } - - return 0; -} - /* * Security blob allocation for xfrm_policy and xfrm_state * CTX does not have a meaningful value on input */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, - struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid) +static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) { int rc = 0; struct task_security_struct *tsec = current->security; - struct xfrm_sec_ctx *ctx = NULL; - char *ctx_str = NULL; - u32 str_len; - u32 ctx_sid; - - BUG_ON(uctx && pol); - - if (!uctx) - goto not_from_user; + struct xfrm_sec_ctx *ctx; - if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) - return -EINVAL; + BUG_ON(!uctx); + BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX); if (uctx->ctx_len >= PAGE_SIZE) return -ENOMEM; @@ -249,43 +141,9 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, return rc; -not_from_user: - if (pol) { - rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); - if (rc) - goto out; - } - else - ctx_sid = sid; - - rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); - if (rc) - goto out; - - *ctxp = ctx = kmalloc(sizeof(*ctx) + - str_len, - GFP_ATOMIC); - - if (!ctx) { - rc = -ENOMEM; - goto out; - } - - ctx->ctx_doi = XFRM_SC_DOI_LSM; - ctx->ctx_alg = XFRM_SC_ALG_SELINUX; - ctx->ctx_sid = ctx_sid; - ctx->ctx_len = str_len; - memcpy(ctx->ctx_str, - ctx_str, - str_len); - - goto out2; - out: *ctxp = NULL; kfree(ctx); -out2: - kfree(ctx_str); return rc; } @@ -293,23 +151,13 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, - struct xfrm_user_sec_ctx *uctx, struct sock *sk) +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx) { int err; - u32 sid; BUG_ON(!xp); - BUG_ON(uctx && sk); - if (sk) { - struct sk_security_struct *ssec = sk->sk_security; - sid = ssec->sid; - } - else - sid = SECSID_NULL; - - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx); return err; } @@ -369,14 +217,13 @@ int selinux_xfrm_policy_delete(struct xfrm_policy *xp) * LSM hook implementation that allocs and transfers sec_ctx spec to * xfrm_state. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, - struct xfrm_sec_ctx *pol, u32 secid) +int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) { int err; BUG_ON(!x); - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid); + err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx); return err; } @@ -482,30 +329,38 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) +int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) { int i, rc = 0; struct sec_path *sp; - u32 sel_sid = SECINITSID_UNLABELED; sp = skb->sp; if (sp) { + /* + * __xfrm_policy_check does not approve unless xfrm_policy_ok + * says that spi's match for policy and the socket. + * + * Only need to verify the existence of an authorizable sp. + */ for (i = 0; i < sp->len; i++) { struct xfrm_state *x = sp->xvec[i]; - if (x && selinux_authorizable_xfrm(x)) { - struct xfrm_sec_ctx *ctx = x->security; - sel_sid = ctx->ctx_sid; - break; - } + if (x && selinux_authorizable_xfrm(x)) + goto accept; } } - rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, ad); + /* check SELinux sock for unlabelled access */ + rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, + ASSOCIATION__RECVFROM, NULL); + if (rc) + goto drop; + +accept: + return 0; +drop: return rc; } @@ -516,8 +371,7 @@ int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, * If we do have a authorizable security association, then it has already been * checked in xfrm_policy_lookup hook. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, - struct avc_audit_data *ad) +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) { struct dst_entry *dst; int rc = 0; @@ -537,7 +391,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, } rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, ad); + ASSOCIATION__SENDTO, NULL); out: return rc; }