diff options
339 files changed, 40476 insertions, 7425 deletions
diff --git a/Documentation/devicetree/bindings/gpio/gpio_keys.txt b/Documentation/devicetree/bindings/gpio/gpio_keys.txt new file mode 100644 index 000000000000..7190c99d7611 --- /dev/null +++ b/Documentation/devicetree/bindings/gpio/gpio_keys.txt @@ -0,0 +1,36 @@ +Device-Tree bindings for input/gpio_keys.c keyboard driver + +Required properties: + - compatible = "gpio-keys"; + +Optional properties: + - autorepeat: Boolean, Enable auto repeat feature of Linux input + subsystem. + +Each button (key) is represented as a sub-node of "gpio-keys": +Subnode properties: + + - gpios: OF devcie-tree gpio specificatin. + - label: Descriptive name of the key. + - linux,code: Keycode to emit. + +Optional subnode-properties: + - linux,input-type: Specify event type this button/key generates. + If not specified defaults to <1> == EV_KEY. + - debounce-interval: Debouncing interval time in milliseconds. + If not specified defaults to 5. + - gpio-key,wakeup: Boolean, button can wake-up the system. + +Example nodes: + + gpio_keys { + compatible = "gpio-keys"; + #address-cells = <1>; + #size-cells = <0>; + autorepeat; + button@21 { + label = "GPIO Key UP"; + linux,code = <103>; + gpios = <&gpio1 0 1>; + }; + ... diff --git a/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt new file mode 100644 index 000000000000..2144af1a5264 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/fsl-imx-wdt.txt @@ -0,0 +1,14 @@ +* Freescale i.MX Watchdog Timer (WDT) Controller + +Required properties: +- compatible : Should be "fsl,<soc>-wdt" +- reg : Should contain WDT registers location and length +- interrupts : Should contain WDT interrupt + +Examples: + +wdt@73f98000 { + compatible = "fsl,imx51-wdt", "fsl,imx21-wdt"; + reg = <0x73f98000 0x4000>; + interrupts = <58>; +}; diff --git a/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt new file mode 100644 index 000000000000..79ead8263ae4 --- /dev/null +++ b/Documentation/devicetree/bindings/watchdog/samsung-wdt.txt @@ -0,0 +1,11 @@ +* Samsung's Watchdog Timer Controller + +The Samsung's Watchdog controller is used for resuming system operation +after a preset amount of time during which the WDT reset event has not +occured. + +Required properties: +- compatible : should be "samsung,s3c2410-wdt" +- reg : base physical address of the controller and length of memory mapped + region. +- interrupts : interrupt number to the cpu. diff --git a/Documentation/md.txt b/Documentation/md.txt index f0eee83ff78a..fc94770f44ab 100644 --- a/Documentation/md.txt +++ b/Documentation/md.txt @@ -360,18 +360,20 @@ Each directory contains: A file recording the current state of the device in the array which can be a comma separated list of faulty - device has been kicked from active use due to - a detected fault + a detected fault or it has unacknowledged bad + blocks in_sync - device is a fully in-sync member of the array writemostly - device will only be subject to read requests if there are no other options. This applies only to raid1 arrays. - blocked - device has failed, metadata is "external", - and the failure hasn't been acknowledged yet. + blocked - device has failed, and the failure hasn't been + acknowledged yet by the metadata handler. Writes that would write to this device if it were not faulty are blocked. spare - device is working, but not a full member. This includes spares that are in the process of being recovered to + write_error - device has ever seen a write error. This list may grow in future. This can be written to. Writing "faulty" simulates a failure on the device. @@ -379,9 +381,11 @@ Each directory contains: Writing "writemostly" sets the writemostly flag. Writing "-writemostly" clears the writemostly flag. Writing "blocked" sets the "blocked" flag. - Writing "-blocked" clears the "blocked" flag and allows writes - to complete. + Writing "-blocked" clears the "blocked" flags and allows writes + to complete and possibly simulates an error. Writing "in_sync" sets the in_sync flag. + Writing "write_error" sets writeerrorseen flag. + Writing "-write_error" clears writeerrorseen flag. This file responds to select/poll. Any change to 'faulty' or 'blocked' causes an event. @@ -419,7 +423,6 @@ Each directory contains: written, it will be rejected. recovery_start - When the device is not 'in_sync', this records the number of sectors from the start of the device which are known to be correct. This is normally zero, but during a recovery @@ -435,6 +438,20 @@ Each directory contains: Setting this to 'none' is equivalent to setting 'in_sync'. Setting to any other value also clears the 'in_sync' flag. + bad_blocks + This gives the list of all known bad blocks in the form of + start address and length (in sectors respectively). If output + is too big to fit in a page, it will be truncated. Writing + "sector length" to this file adds new acknowledged (i.e. + recorded to disk safely) bad blocks. + + unacknowledged_bad_blocks + This gives the list of known-but-not-yet-saved-to-disk bad + blocks in the same form of 'bad_blocks'. If output is too big + to fit in a page, it will be truncated. Writing to this file + adds bad blocks without acknowledging them. This is largely + for testing. + An active md device will also contain and entry for each active device diff --git a/Documentation/security/keys-ecryptfs.txt b/Documentation/security/keys-ecryptfs.txt new file mode 100644 index 000000000000..c3bbeba63562 --- /dev/null +++ b/Documentation/security/keys-ecryptfs.txt @@ -0,0 +1,68 @@ + Encrypted keys for the eCryptfs filesystem + +ECryptfs is a stacked filesystem which transparently encrypts and decrypts each +file using a randomly generated File Encryption Key (FEK). + +Each FEK is in turn encrypted with a File Encryption Key Encryption Key (FEFEK) +either in kernel space or in user space with a daemon called 'ecryptfsd'. In +the former case the operation is performed directly by the kernel CryptoAPI +using a key, the FEFEK, derived from a user prompted passphrase; in the latter +the FEK is encrypted by 'ecryptfsd' with the help of external libraries in order +to support other mechanisms like public key cryptography, PKCS#11 and TPM based +operations. + +The data structure defined by eCryptfs to contain information required for the +FEK decryption is called authentication token and, currently, can be stored in a +kernel key of the 'user' type, inserted in the user's session specific keyring +by the userspace utility 'mount.ecryptfs' shipped with the package +'ecryptfs-utils'. + +The 'encrypted' key type has been extended with the introduction of the new +format 'ecryptfs' in order to be used in conjunction with the eCryptfs +filesystem. Encrypted keys of the newly introduced format store an +authentication token in its payload with a FEFEK randomly generated by the +kernel and protected by the parent master key. + +In order to avoid known-plaintext attacks, the datablob obtained through +commands 'keyctl print' or 'keyctl pipe' does not contain the overall +authentication token, which content is well known, but only the FEFEK in +encrypted form. + +The eCryptfs filesystem may really benefit from using encrypted keys in that the +required key can be securely generated by an Administrator and provided at boot +time after the unsealing of a 'trusted' key in order to perform the mount in a +controlled environment. Another advantage is that the key is not exposed to +threats of malicious software, because it is available in clear form only at +kernel level. + +Usage: + keyctl add encrypted name "new ecryptfs key-type:master-key-name keylen" ring + keyctl add encrypted name "load hex_blob" ring + keyctl update keyid "update key-type:master-key-name" + +name:= '<16 hexadecimal characters>' +key-type:= 'trusted' | 'user' +keylen:= 64 + + +Example of encrypted key usage with the eCryptfs filesystem: + +Create an encrypted key "1000100010001000" of length 64 bytes with format +'ecryptfs' and save it using a previously loaded user key "test": + + $ keyctl add encrypted 1000100010001000 "new ecryptfs user:test 64" @u + 19184530 + + $ keyctl print 19184530 + ecryptfs user:test 64 490045d4bfe48c99f0d465fbbbb79e7500da954178e2de0697 + dd85091f5450a0511219e9f7cd70dcd498038181466f78ac8d4c19504fcc72402bfc41c2 + f253a41b7507ccaa4b2b03fff19a69d1cc0b16e71746473f023a95488b6edfd86f7fdd40 + 9d292e4bacded1258880122dd553a661 + + $ keyctl pipe 19184530 > ecryptfs.blob + +Mount an eCryptfs filesystem using the created encrypted key "1000100010001000" +into the '/secret' directory: + + $ mount -i -t ecryptfs -oecryptfs_sig=1000100010001000,\ + ecryptfs_cipher=aes,ecryptfs_key_bytes=32 /secret /secret diff --git a/Documentation/security/keys-trusted-encrypted.txt b/Documentation/security/keys-trusted-encrypted.txt index 8fb79bc1ac4b..5f50ccabfc8a 100644 --- a/Documentation/security/keys-trusted-encrypted.txt +++ b/Documentation/security/keys-trusted-encrypted.txt @@ -53,12 +53,19 @@ they are only as secure as the user key encrypting them. The master user key should therefore be loaded in as secure a way as possible, preferably early in boot. +The decrypted portion of encrypted keys can contain either a simple symmetric +key or a more complex structure. The format of the more complex structure is +application specific, which is identified by 'format'. + Usage: - keyctl add encrypted name "new key-type:master-key-name keylen" ring - keyctl add encrypted name "load hex_blob" ring - keyctl update keyid "update key-type:master-key-name" + keyctl add encrypted name "new [format] key-type:master-key-name keylen" + ring + keyctl add encrypted name "load hex_blob" ring + keyctl update keyid "update key-type:master-key-name" + +format:= 'default | ecryptfs' +key-type:= 'trusted' | 'user' -where 'key-type' is either 'trusted' or 'user'. Examples of trusted and encrypted key usage: @@ -114,15 +121,25 @@ Reseal a trusted key under new pcr values: 7ef6a24defe4846104209bf0c3eced7fa1a672ed5b125fc9d8cd88b476a658a4434644ef df8ae9a178e9f83ba9f08d10fa47e4226b98b0702f06b3b8 -Create and save an encrypted key "evm" using the above trusted key "kmk": +The initial consumer of trusted keys is EVM, which at boot time needs a high +quality symmetric key for HMAC protection of file metadata. The use of a +trusted key provides strong guarantees that the EVM key has not been +compromised by a user level problem, and when sealed to specific boot PCR +values, protects against boot and offline attacks. Create and save an +encrypted key "evm" using the above trusted key "kmk": +option 1: omitting 'format' $ keyctl add encrypted evm "new trusted:kmk 32" @u 159771175 +option 2: explicitly defining 'format' as 'default' + $ keyctl add encrypted evm "new default trusted:kmk 32" @u + 159771175 + $ keyctl print 159771175 - trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55 - be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64 - 5972dcb82ab2dde83376d82b2e3c09ffc + default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3 + 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0 + 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc $ keyctl pipe 159771175 > evm.blob @@ -132,14 +149,11 @@ Load an encrypted key "evm" from saved blob: 831684262 $ keyctl print 831684262 - trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b382dbbc55 - be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e024717c64 - 5972dcb82ab2dde83376d82b2e3c09ffc - - -The initial consumer of trusted keys is EVM, which at boot time needs a high -quality symmetric key for HMAC protection of file metadata. The use of a -trusted key provides strong guarantees that the EVM key has not been -compromised by a user level problem, and when sealed to specific boot PCR -values, protects against boot and offline attacks. Other uses for trusted and -encrypted keys, such as for disk and file encryption are anticipated. + default trusted:kmk 32 2375725ad57798846a9bbd240de8906f006e66c03af53b1b3 + 82dbbc55be2a44616e4959430436dc4f2a7a9659aa60bb4652aeb2120f149ed197c564e0 + 24717c64 5972dcb82ab2dde83376d82b2e3c09ffc + +Other uses for trusted and encrypted keys, such as for disk and file encryption +are anticipated. In particular the new format 'ecryptfs' has been defined in +in order to use encrypted keys to mount an eCryptfs filesystem. More details +about the usage can be found in the file 'Documentation/keys-ecryptfs.txt'. diff --git a/Documentation/watchdog/00-INDEX b/Documentation/watchdog/00-INDEX index ee994513a9b1..fc51128071c2 100644 --- a/Documentation/watchdog/00-INDEX +++ b/Documentation/watchdog/00-INDEX @@ -8,6 +8,8 @@ src/ - directory holding watchdog related example programs. watchdog-api.txt - description of the Linux Watchdog driver API. +watchdog-kernel-api.txt + - description of the Linux WatchDog Timer Driver Core kernel API. watchdog-parameters.txt - information on driver parameters (for drivers other than the ones that have driver-specific files here) diff --git a/Documentation/watchdog/watchdog-kernel-api.txt b/Documentation/watchdog/watchdog-kernel-api.txt new file mode 100644 index 000000000000..4f7c894244d2 --- /dev/null +++ b/Documentation/watchdog/watchdog-kernel-api.txt @@ -0,0 +1,162 @@ +The Linux WatchDog Timer Driver Core kernel API. +=============================================== +Last reviewed: 22-Jul-2011 + +Wim Van Sebroeck <wim@iguana.be> + +Introduction +------------ +This document does not describe what a WatchDog Timer (WDT) Driver or Device is. +It also does not describe the API which can be used by user space to communicate +with a WatchDog Timer. If you want to know this then please read the following +file: Documentation/watchdog/watchdog-api.txt . + +So what does this document describe? It describes the API that can be used by +WatchDog Timer Drivers that want to use the WatchDog Timer Driver Core +Framework. This framework provides all interfacing towards user space so that +the same code does not have to be reproduced each time. This also means that +a watchdog timer driver then only needs to provide the different routines +(operations) that control the watchdog timer (WDT). + +The API +------- +Each watchdog timer driver that wants to use the WatchDog Timer Driver Core +must #include <linux/watchdog.h> (you would have to do this anyway when +writing a watchdog device driver). This include file contains following +register/unregister routines: + +extern int watchdog_register_device(struct watchdog_device *); +extern void watchdog_unregister_device(struct watchdog_device *); + +The watchdog_register_device routine registers a watchdog timer device. +The parameter of this routine is a pointer to a watchdog_device structure. +This routine returns zero on success and a negative errno code for failure. + +The watchdog_unregister_device routine deregisters a registered watchdog timer +device. The parameter of this routine is the pointer to the registered +watchdog_device structure. + +The watchdog device structure looks like this: + +struct watchdog_device { + const struct watchdog_info *info; + const struct watchdog_ops *ops; + unsigned int bootstatus; + unsigned int timeout; + unsigned int min_timeout; + unsigned int max_timeout; + void *driver_data; + unsigned long status; +}; + +It contains following fields: +* info: a pointer to a watchdog_info structure. This structure gives some + additional information about the watchdog timer itself. (Like it's unique name) +* ops: a pointer to the list of watchdog operations that the watchdog supports. +* timeout: the watchdog timer's timeout value (in seconds). +* min_timeout: the watchdog timer's minimum timeout value (in seconds). +* max_timeout: the watchdog timer's maximum timeout value (in seconds). +* bootstatus: status of the device after booting (reported with watchdog + WDIOF_* status bits). +* driver_data: a pointer to the drivers private data of a watchdog device. + This data should only be accessed via the watchdog_set_drvadata and + watchdog_get_drvdata routines. +* status: this field contains a number of status bits that give extra + information about the status of the device (Like: is the watchdog timer + running/active, is the nowayout bit set, is the device opened via + the /dev/watchdog interface or not, ...). + +The list of watchdog operations is defined as: + +struct watchdog_ops { + struct module *owner; + /* mandatory operations */ + int (*start)(struct watchdog_device *); + int (*stop)(struct watchdog_device *); + /* optional operations */ + int (*ping)(struct watchdog_device *); + unsigned int (*status)(struct watchdog_device *); + int (*set_timeout)(struct watchdog_device *, unsigned int); + long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long); +}; + +It is important that you first define the module owner of the watchdog timer +driver's operations. This module owner will be used to lock the module when +the watchdog is active. (This to avoid a system crash when you unload the +module and /dev/watchdog is still open). +Some operations are mandatory and some are optional. The mandatory operations +are: +* start: this is a pointer to the routine that starts the watchdog timer + device. + The routine needs a pointer to the watchdog timer device structure as a + parameter. It returns zero on success or a negative errno code for failure. +* stop: with this routine the watchdog timer device is being stopped. + The routine needs a pointer to the watchdog timer device structure as a + parameter. It returns zero on success or a negative errno code for failure. + Some watchdog timer hardware can only be started and not be stopped. The + driver supporting this hardware needs to make sure that a start and stop + routine is being provided. This can be done by using a timer in the driver + that regularly sends a keepalive ping to the watchdog timer hardware. + +Not all watchdog timer hardware supports the same functionality. That's why +all other routines/operations are optional. They only need to be provided if +they are supported. These optional routines/operations are: +* ping: this is the routine that sends a keepalive ping to the watchdog timer + hardware. + The routine needs a pointer to the watchdog timer device structure as a + parameter. It returns zero on success or a negative errno code for failure. + Most hardware that does not support this as a separate function uses the + start function to restart the watchdog timer hardware. And that's also what + the watchdog timer driver core does: to send a keepalive ping to the watchdog + timer hardware it will either use the ping operation (when available) or the + start operation (when the ping operation is not available). + (Note: the WDIOC_KEEPALIVE ioctl call will only be active when the + WDIOF_KEEPALIVEPING bit has been set in the option field on the watchdog's + info structure). +* status: this routine checks the status of the watchdog timer device. The + status of the device is reported with watchdog WDIOF_* status flags/bits. +* set_timeout: this routine checks and changes the timeout of the watchdog + timer device. It returns 0 on success, -EINVAL for "parameter out of range" + and -EIO for "could not write value to the watchdog". On success the timeout + value of the watchdog_device will be changed to the value that was just used + to re-program the watchdog timer device. + (Note: the WDIOF_SETTIMEOUT needs to be set in the options field of the + watchdog's info structure). +* ioctl: if this routine is present then it will be called first before we do + our own internal ioctl call handling. This routine should return -ENOIOCTLCMD + if a command is not supported. The parameters that are passed to the ioctl + call are: watchdog_device, cmd and arg. + +The status bits should (preferably) be set with the set_bit and clear_bit alike +bit-operations. The status bits that are defined are: +* WDOG_ACTIVE: this status bit indicates whether or not a watchdog timer device + is active or not. When the watchdog is active after booting, then you should + set this status bit (Note: when you register the watchdog timer device with + this bit set, then opening /dev/watchdog will skip the start operation) +* WDOG_DEV_OPEN: this status bit shows whether or not the watchdog device + was opened via /dev/watchdog. + (This bit should only be used by the WatchDog Timer Driver Core). +* WDOG_ALLOW_RELEASE: this bit stores whether or not the magic close character + has been sent (so that we can support the magic close feature). + (This bit should only be used by the WatchDog Timer Driver Core). +* WDOG_NO_WAY_OUT: this bit stores the nowayout setting for the watchdog. + If this bit is set then the watchdog timer will not be able to stop. + +Note: The WatchDog Timer Driver Core supports the magic close feature and +the nowayout feature. To use the magic close feature you must set the +WDIOF_MAGICCLOSE bit in the options field of the watchdog's info structure. +The nowayout feature will overrule the magic close feature. + +To get or set driver specific data the following two helper functions should be +used: + +static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) +static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) + +The watchdog_set_drvdata function allows you to add driver specific data. The +arguments of this function are the watchdog device where you want to add the +driver specific data to and a pointer to the data itself. + +The watchdog_get_drvdata function allows you to retrieve driver specific data. +The argument of this function is the watchdog device where you want to retrieve +data from. The function retruns the pointer to the driver specific data. diff --git a/MAINTAINERS b/MAINTAINERS index 7b2e9e85e427..1d2e79db0f58 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6408,7 +6408,7 @@ L: tomoyo-users-en@lists.sourceforge.jp (subscribers-only, for users in English) L: tomoyo-dev@lists.sourceforge.jp (subscribers-only, for developers in Japanese) L: tomoyo-users@lists.sourceforge.jp (subscribers-only, for users in Japanese) W: http://tomoyo.sourceforge.jp/ -T: quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.3.x/tomoyo-lsm/patches/ +T: quilt http://svn.sourceforge.jp/svnroot/tomoyo/trunk/2.4.x/tomoyo-lsm/patches/ S: Maintained F: security/tomoyo/ diff --git a/arch/microblaze/include/asm/cpuinfo.h b/arch/microblaze/include/asm/cpuinfo.h index d8f013347a9e..7d6831ac8a46 100644 --- a/arch/microblaze/include/asm/cpuinfo.h +++ b/arch/microblaze/include/asm/cpuinfo.h @@ -38,6 +38,7 @@ struct cpuinfo { u32 use_exc; u32 ver_code; u32 mmu; + u32 mmu_privins; u32 endian; /* CPU caches */ diff --git a/arch/microblaze/include/asm/irqflags.h b/arch/microblaze/include/asm/irqflags.h index c4532f032b3b..c9a6262832c4 100644 --- a/arch/microblaze/include/asm/irqflags.h +++ b/arch/microblaze/include/asm/irqflags.h @@ -14,7 +14,7 @@ #if CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long flags; asm volatile(" msrclr %0, %1 \n" @@ -25,7 +25,7 @@ static inline unsigned long arch_local_irq_save(void) return flags; } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { /* this uses r0 without declaring it - is that correct? */ asm volatile(" msrclr r0, %0 \n" @@ -35,7 +35,7 @@ static inline void arch_local_irq_disable(void) : "memory"); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { /* this uses r0 without declaring it - is that correct? */ asm volatile(" msrset r0, %0 \n" @@ -47,7 +47,7 @@ static inline void arch_local_irq_enable(void) #else /* !CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */ -static inline unsigned long arch_local_irq_save(void) +static inline notrace unsigned long arch_local_irq_save(void) { unsigned long flags, tmp; asm volatile (" mfs %0, rmsr \n" @@ -61,7 +61,7 @@ static inline unsigned long arch_local_irq_save(void) return flags; } -static inline void arch_local_irq_disable(void) +static inline notrace void arch_local_irq_disable(void) { unsigned long tmp; asm volatile(" mfs %0, rmsr \n" @@ -74,7 +74,7 @@ static inline void arch_local_irq_disable(void) : "memory"); } -static inline void arch_local_irq_enable(void) +static inline notrace void arch_local_irq_enable(void) { unsigned long tmp; asm volatile(" mfs %0, rmsr \n" @@ -89,7 +89,7 @@ static inline void arch_local_irq_enable(void) #endif /* CONFIG_XILINX_MICROBLAZE0_USE_MSR_INSTR */ -static inline unsigned long arch_local_save_flags(void) +static inline notrace unsigned long arch_local_save_flags(void) { unsigned long flags; asm volatile(" mfs %0, rmsr \n" @@ -100,7 +100,7 @@ static inline unsigned long arch_local_save_flags(void) return flags; } -static inline void arch_local_irq_restore(unsigned long flags) +static inline notrace void arch_local_irq_restore(unsigned long flags) { asm volatile(" mts rmsr, %0 \n" " nop \n" @@ -109,12 +109,12 @@ static inline void arch_local_irq_restore(unsigned long flags) : "memory"); } -static inline bool arch_irqs_disabled_flags(unsigned long flags) +static inline notrace bool arch_irqs_disabled_flags(unsigned long flags) { return (flags & MSR_IE) == 0; } -static inline bool arch_irqs_disabled(void) +static inline notrace bool arch_irqs_disabled(void) { return arch_irqs_disabled_flags(arch_local_save_flags()); } diff --git a/arch/microblaze/include/asm/processor.h b/arch/microblaze/include/asm/processor.h index aed2a6be8e27..7283bfb2f7e4 100644 --- a/arch/microblaze/include/asm/processor.h +++ b/arch/microblaze/include/asm/processor.h @@ -125,9 +125,6 @@ struct thread_struct { .pgdir = swapper_pg_dir, \ } -/* Do necessary setup to start up a newly executed thread. */ -void start_thread(struct pt_regs *regs, - unsigned long pc, unsigned long usp); /* Free all resources held by a thread. */ extern inline void release_thread(struct task_struct *dead_task) diff --git a/arch/microblaze/include/asm/prom.h b/arch/microblaze/include/asm/prom.h index 9ad567e2d425..20c5e8e5121b 100644 --- a/arch/microblaze/include/asm/prom.h +++ b/arch/microblaze/include/asm/prom.h @@ -26,8 +26,12 @@ #define HAVE_ARCH_DEVTREE_FIXUPS /* Other Prototypes */ -extern int early_uartlite_console(void); -extern int early_uart16550_console(void); +enum early_consoles { + UARTLITE = 1, + UART16550 = 2, +}; + +extern int of_early_console(void *version); /* * OF address retreival & translation diff --git a/arch/microblaze/include/asm/pvr.h b/arch/microblaze/include/asm/pvr.h index a10bec62e857..4bbdb4c03b57 100644 --- a/arch/microblaze/include/asm/pvr.h +++ b/arch/microblaze/include/asm/pvr.h @@ -111,16 +111,16 @@ struct pvr_s { /* Target family PVR mask */ #define PVR10_TARGET_FAMILY_MASK 0xFF000000 -/* MMU descrtiption */ +/* MMU description */ #define PVR11_USE_MMU 0xC0000000 #define PVR11_MMU_ITLB_SIZE 0x38000000 #define PVR11_MMU_DTLB_SIZE 0x07000000 #define PVR11_MMU_TLB_ACCESS 0x00C00000 #define PVR11_MMU_ZONES 0x003C0000 +#define PVR11_MMU_PRIVINS 0x00010000 /* MSR Reset value PVR mask */ #define PVR11_MSR_RESET_VALUE_MASK 0x000007FF - /* PVR access macros */ #define PVR_IS_FULL(_pvr) (_pvr.pvr[0] & PVR0_PVR_FULL_MASK) #define PVR_USE_BARREL(_pvr) (_pvr.pvr[0] & PVR0_USE_BARREL_MASK) @@ -216,6 +216,7 @@ struct pvr_s { #define PVR_MMU_DTLB_SIZE(_pvr) (_pvr.pvr[11] & PVR11_MMU_DTLB_SIZE) #define PVR_MMU_TLB_ACCESS(_pvr) (_pvr.pvr[11] & PVR11_MMU_TLB_ACCESS) #define PVR_MMU_ZONES(_pvr) (_pvr.pvr[11] & PVR11_MMU_ZONES) +#define PVR_MMU_PRIVINS(pvr) (pvr.pvr[11] & PVR11_MMU_PRIVINS) /* endian */ #define PVR_ENDIAN(_pvr) (_pvr.pvr[0] & PVR0_ENDI) diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h index 8f3968971e4e..904e5ef6a11b 100644 --- a/arch/microblaze/include/asm/setup.h +++ b/arch/microblaze/include/asm/setup.h @@ -23,6 +23,7 @@ extern char cmd_line[COMMAND_LINE_SIZE]; void early_printk(const char *fmt, ...); int setup_early_printk(char *opt); +void remap_early_printk(void); void disable_early_printk(void); #if defined(CONFIG_EARLY_PRINTK) diff --git a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c index f70a6047f08e..916aaedf1945 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c +++ b/arch/microblaze/kernel/cpu/cpuinfo-pvr-full.c @@ -72,6 +72,7 @@ void set_cpuinfo_pvr_full(struct cpuinfo *ci, struct device_node *cpu) CI(pvr_user2, USER2); CI(mmu, USE_MMU); + CI(mmu_privins, MMU_PRIVINS); CI(endian, ENDIAN); CI(use_icache, USE_ICACHE); diff --git a/arch/microblaze/kernel/cpu/cpuinfo-static.c b/arch/microblaze/kernel/cpu/cpuinfo-static.c index b16b994ca3d2..592bb2e838c4 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo-static.c +++ b/arch/microblaze/kernel/cpu/cpuinfo-static.c @@ -119,6 +119,7 @@ void __init set_cpuinfo_static(struct cpuinfo *ci, struct device_node *cpu) ci->pvr_user2 = fcpu(cpu, "xlnx,pvr-user2"); ci->mmu = fcpu(cpu, "xlnx,use-mmu"); + ci->mmu_privins = fcpu(cpu, "xlnx,mmu-privileged-instr"); ci->endian = fcpu(cpu, "xlnx,endianness"); ci->ver_code = 0; diff --git a/arch/microblaze/kernel/cpu/cpuinfo.c b/arch/microblaze/kernel/cpu/cpuinfo.c index c1640c52711f..44394d80a683 100644 --- a/arch/microblaze/kernel/cpu/cpuinfo.c +++ b/arch/microblaze/kernel/cpu/cpuinfo.c @@ -88,4 +88,8 @@ void __init setup_cpuinfo(void) printk(KERN_WARNING "%s: Unsupported PVR setting\n", __func__); set_cpuinfo_static(&cpuinfo, cpu); } + + if (cpuinfo.mmu_privins) + printk(KERN_WARNING "%s: Stream instructions enabled" + " - USERSPACE CAN LOCK THIS KERNEL!\n", __func__); } diff --git a/arch/microblaze/kernel/cpu/mb.c b/arch/microblaze/kernel/cpu/mb.c index b4048af02615..7b5dca7ed39d 100644 --- a/arch/microblaze/kernel/cpu/mb.c +++ b/arch/microblaze/kernel/cpu/mb.c @@ -97,6 +97,10 @@ static int show_cpuinfo(struct seq_file *m, void *v) (cpuinfo.use_exc & PVR2_FPU_EXC_MASK) ? "fpu " : "", (cpuinfo.use_exc & PVR2_USE_FSL_EXC) ? "fsl " : ""); + count += seq_printf(m, + "Stream-insns:\t%sprivileged\n", + cpuinfo.mmu_privins ? "un" : ""); + if (cpuinfo.use_icache) count += seq_printf(m, "Icache:\t\t%ukB\tline length:\t%dB\n", @@ -110,10 +114,11 @@ static int show_cpuinfo(struct seq_file *m, void *v) "Dcache:\t\t%ukB\tline length:\t%dB\n", cpuinfo.dcache_size >> 10, cpuinfo.dcache_line_length); + seq_printf(m, "Dcache-Policy:\t"); if (cpuinfo.dcache_wb) - count += seq_printf(m, "\t\twrite-back\n"); + count += seq_printf(m, "write-back\n"); else - count += seq_printf(m, "\t\twrite-through\n"); + count += seq_printf(m, "write-through\n"); } else count += seq_printf(m, "Dcache:\t\tno\n"); diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c index c3616a080ebf..d26d92d47754 100644 --- a/arch/microblaze/kernel/early_printk.c +++ b/arch/microblaze/kernel/early_printk.c @@ -35,7 +35,7 @@ static void early_printk_uartlite_putc(char c) * we'll never timeout on a working UART. */ - unsigned retries = 10000; + unsigned retries = 1000000; /* read status bit - 0x8 offset */ while (--retries && (in_be32(base_addr + 8) & (1 << 3))) ; @@ -60,7 +60,7 @@ static void early_printk_uartlite_write(struct console *unused, static struct console early_serial_uartlite_console = { .name = "earlyser", .write = early_printk_uartlite_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; #endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */ @@ -104,7 +104,7 @@ static void early_printk_uart16550_write(struct console *unused, static struct console early_serial_uart16550_console = { .name = "earlyser", .write = early_printk_uart16550_write, - .flags = CON_PRINTBUFFER, + .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; #endif /* CONFIG_SERIAL_8250_CONSOLE */ @@ -127,48 +127,56 @@ void early_printk(const char *fmt, ...) int __init setup_early_printk(char *opt) { + int version = 0; + if (early_console_initialized) return 1; -#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE - base_addr = early_uartlite_console(); + base_addr = of_early_console(&version); if (base_addr) { - early_console_initialized = 1; #ifdef CONFIG_MMU early_console_reg_tlb_alloc(base_addr); #endif - early_console = &early_serial_uartlite_console; - early_printk("early_printk_console is enabled at 0x%08x\n", - base_addr); - - /* register_console(early_console); */ - - return 0; - } -#endif /* CONFIG_SERIAL_UARTLITE_CONSOLE */ - + switch (version) { +#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE + case UARTLITE: + printk(KERN_INFO "Early console on uartlite " + "at 0x%08x\n", base_addr); + early_console = &early_serial_uartlite_console; + break; +#endif #ifdef CONFIG_SERIAL_8250_CONSOLE - base_addr = early_uart16550_console(); - base_addr &= ~3; /* clear register offset */ - if (base_addr) { - early_console_initialized = 1; -#ifdef CONFIG_MMU - early_console_reg_tlb_alloc(base_addr); + case UART16550: + printk(KERN_INFO "Early console on uart16650 " + "at 0x%08x\n", base_addr); + early_console = &early_serial_uart16550_console; + break; #endif - early_console = &early_serial_uart16550_console; - - early_printk("early_printk_console is enabled at 0x%08x\n", - base_addr); - - /* register_console(early_console); */ + default: + printk(KERN_INFO "Unsupported early console %d\n", + version); + return 1; + } + register_console(early_console); + early_console_initialized = 1; return 0; } -#endif /* CONFIG_SERIAL_8250_CONSOLE */ - return 1; } +/* Remap early console to virtual address and do not allocate one TLB + * only for early console because of performance degression */ +void __init remap_early_printk(void) +{ + if (!early_console_initialized || !early_console) + return; + printk(KERN_INFO "early_printk_console remaping from 0x%x to ", + base_addr); + base_addr = (u32) ioremap(base_addr, PAGE_SIZE); + printk(KERN_CONT "0x%x\n", base_addr); +} + void __init disable_early_printk(void) { if (!early_console_initialized || !early_console) diff --git a/arch/microblaze/kernel/hw_exception_handler.S b/arch/microblaze/kernel/hw_exception_handler.S index 56572e923a83..e62be8379604 100644 --- a/arch/microblaze/kernel/hw_exception_handler.S +++ b/arch/microblaze/kernel/hw_exception_handler.S @@ -1113,23 +1113,23 @@ lw_r10_vm: R3_TO_LWREG_VM_V (10); lw_r11_vm: R3_TO_LWREG_VM_V (11); lw_r12_vm: R3_TO_LWREG_VM_V (12); lw_r13_vm: R3_TO_LWREG_VM_V (13); -lw_r14_vm: R3_TO_LWREG_VM (14); +lw_r14_vm: R3_TO_LWREG_VM_V (14); lw_r15_vm: R3_TO_LWREG_VM_V (15); -lw_r16_vm: R3_TO_LWREG_VM (16); +lw_r16_vm: R3_TO_LWREG_VM_V (16); lw_r17_vm: R3_TO_LWREG_VM_V (17); lw_r18_vm: R3_TO_LWREG_VM_V (18); -lw_r19_vm: R3_TO_LWREG_VM (19); -lw_r20_vm: R3_TO_LWREG_VM (20); -lw_r21_vm: R3_TO_LWREG_VM (21); -lw_r22_vm: R3_TO_LWREG_VM (22); -lw_r23_vm: R3_TO_LWREG_VM (23); -lw_r24_vm: R3_TO_LWREG_VM (24); -lw_r25_vm: R3_TO_LWREG_VM (25); -lw_r26_vm: R3_TO_LWREG_VM (26); -lw_r27_vm: R3_TO_LWREG_VM (27); -lw_r28_vm: R3_TO_LWREG_VM (28); -lw_r29_vm: R3_TO_LWREG_VM (29); -lw_r30_vm: R3_TO_LWREG_VM (30); +lw_r19_vm: R3_TO_LWREG_VM_V (19); +lw_r20_vm: R3_TO_LWREG_VM_V (20); +lw_r21_vm: R3_TO_LWREG_VM_V (21); +lw_r22_vm: R3_TO_LWREG_VM_V (22); +lw_r23_vm: R3_TO_LWREG_VM_V (23); +lw_r24_vm: R3_TO_LWREG_VM_V (24); +lw_r25_vm: R3_TO_LWREG_VM_V (25); +lw_r26_vm: R3_TO_LWREG_VM_V (26); +lw_r27_vm: R3_TO_LWREG_VM_V (27); +lw_r28_vm: R3_TO_LWREG_VM_V (28); +lw_r29_vm: R3_TO_LWREG_VM_V (29); +lw_r30_vm: R3_TO_LWREG_VM_V (30); lw_r31_vm: R3_TO_LWREG_VM_V (31); sw_table_vm: @@ -1147,23 +1147,23 @@ sw_r10_vm: SWREG_TO_R3_VM_V (10); sw_r11_vm: SWREG_TO_R3_VM_V (11); sw_r12_vm: SWREG_TO_R3_VM_V (12); sw_r13_vm: SWREG_TO_R3_VM_V (13); -sw_r14_vm: SWREG_TO_R3_VM (14); +sw_r14_vm: SWREG_TO_R3_VM_V (14); sw_r15_vm: SWREG_TO_R3_VM_V (15); -sw_r16_vm: SWREG_TO_R3_VM (16); +sw_r16_vm: SWREG_TO_R3_VM_V (16); sw_r17_vm: SWREG_TO_R3_VM_V (17); sw_r18_vm: SWREG_TO_R3_VM_V (18); -sw_r19_vm: SWREG_TO_R3_VM (19); -sw_r20_vm: SWREG_TO_R3_VM (20); -sw_r21_vm: SWREG_TO_R3_VM (21); -sw_r22_vm: SWREG_TO_R3_VM (22); -sw_r23_vm: SWREG_TO_R3_VM (23); -sw_r24_vm: SWREG_TO_R3_VM (24); -sw_r25_vm: SWREG_TO_R3_VM (25); -sw_r26_vm: SWREG_TO_R3_VM (26); -sw_r27_vm: SWREG_TO_R3_VM (27); -sw_r28_vm: SWREG_TO_R3_VM (28); -sw_r29_vm: SWREG_TO_R3_VM (29); -sw_r30_vm: SWREG_TO_R3_VM (30); +sw_r19_vm: SWREG_TO_R3_VM_V (19); +sw_r20_vm: SWREG_TO_R3_VM_V (20); +sw_r21_vm: SWREG_TO_R3_VM_V (21); +sw_r22_vm: SWREG_TO_R3_VM_V (22); +sw_r23_vm: SWREG_TO_R3_VM_V (23); +sw_r24_vm: SWREG_TO_R3_VM_V (24); +sw_r25_vm: SWREG_TO_R3_VM_V (25); +sw_r26_vm: SWREG_TO_R3_VM_V (26); +sw_r27_vm: SWREG_TO_R3_VM_V (27); +sw_r28_vm: SWREG_TO_R3_VM_V (28); +sw_r29_vm: SWREG_TO_R3_VM_V (29); +sw_r30_vm: SWREG_TO_R3_VM_V (30); sw_r31_vm: SWREG_TO_R3_VM_V (31); #endif /* CONFIG_MMU */ diff --git a/arch/microblaze/kernel/intc.c b/arch/microblaze/kernel/intc.c index c88f066f41bd..eb41441c7fd0 100644 --- a/arch/microblaze/kernel/intc.c +++ b/arch/microblaze/kernel/intc.c @@ -134,7 +134,7 @@ void __init init_IRQ(void) intr_type = be32_to_cpup(of_get_property(intc, "xlnx,kind-of-intr", NULL)); - if (intr_type >= (1 << (nr_irq + 1))) + if (intr_type > (u32)((1ULL << nr_irq) - 1)) printk(KERN_INFO " ERROR: Mismatch in kind-of-intr param\n"); #ifdef CONFIG_SELFMOD_INTC diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 968648a81c1e..dbb812421d8a 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -237,7 +237,6 @@ unsigned long get_wchan(struct task_struct *p) /* Set up a thread for executing a new program */ void start_thread(struct pt_regs *regs, unsigned long pc, unsigned long usp) { - set_fs(USER_DS); regs->pc = pc; regs->r1 = usp; regs->pt_mode = 0; diff --git a/arch/microblaze/kernel/prom.c b/arch/microblaze/kernel/prom.c index b15cc219b1d9..977484add216 100644 --- a/arch/microblaze/kernel/prom.c +++ b/arch/microblaze/kernel/prom.c @@ -53,69 +53,58 @@ void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) } #ifdef CONFIG_EARLY_PRINTK -/* MS this is Microblaze specifig function */ -static int __init early_init_dt_scan_serial(unsigned long node, - const char *uname, int depth, void *data) -{ - unsigned long l; - char *p; - const __be32 *addr; - - pr_debug("search \"serial\", depth: %d, uname: %s\n", depth, uname); - -/* find all serial nodes */ - if (strncmp(uname, "serial", 6) != 0) - return 0; - -/* find compatible node with uartlite */ - p = of_get_flat_dt_prop(node, "compatible", &l); - if ((strncmp(p, "xlnx,xps-uartlite", 17) != 0) && - (strncmp(p, "xlnx,opb-uartlite", 17) != 0) && - (strncmp(p, "xlnx,axi-uartlite", 17) != 0)) - return 0; - - addr = of_get_flat_dt_prop(node, "reg", &l); - return be32_to_cpup(addr); /* return address */ -} +char *stdout; -/* this function is looking for early uartlite console - Microblaze specific */ -int __init early_uartlite_console(void) -{ - return of_scan_flat_dt(early_init_dt_scan_serial, NULL); -} - -/* MS this is Microblaze specifig function */ -static int __init early_init_dt_scan_serial_full(unsigned long node, +int __init early_init_dt_scan_chosen_serial(unsigned long node, const char *uname, int depth, void *data) { unsigned long l; char *p; - unsigned int addr; - - pr_debug("search \"chosen\", depth: %d, uname: %s\n", depth, uname); - -/* find all serial nodes */ - if (strncmp(uname, "serial", 6) != 0) - return 0; - early_init_dt_check_for_initrd(node); - -/* find compatible node with uartlite */ - p = of_get_flat_dt_prop(node, "compatible", &l); - - if ((strncmp(p, "xlnx,xps-uart16550", 18) != 0) && - (strncmp(p, "xlnx,axi-uart16550", 18) != 0)) - return 0; - - addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l); - addr += *(u32 *)of_get_flat_dt_prop(node, "reg-offset", &l); - return be32_to_cpu(addr); /* return address */ + pr_debug("%s: depth: %d, uname: %s\n", __func__, depth, uname); + + if (depth == 1 && (strcmp(uname, "chosen") == 0 || + strcmp(uname, "chosen@0") == 0)) { + p = of_get_flat_dt_prop(node, "linux,stdout-path", &l); + if (p != NULL && l > 0) + stdout = p; /* store pointer to stdout-path */ + } + + if (stdout && strstr(stdout, uname)) { + p = of_get_flat_dt_prop(node, "compatible", &l); + pr_debug("Compatible string: %s\n", p); + + if ((strncmp(p, "xlnx,xps-uart16550", 18) == 0) || + (strncmp(p, "xlnx,axi-uart16550", 18) == 0)) { + unsigned int addr; + + *(u32 *)data = UART16550; + + addr = *(u32 *)of_get_flat_dt_prop(node, "reg", &l); + addr += *(u32 *)of_get_flat_dt_prop(node, + "reg-offset", &l); + /* clear register offset */ + return be32_to_cpu(addr) & ~3; + } + if ((strncmp(p, "xlnx,xps-uartlite", 17) == 0) || + (strncmp(p, "xlnx,opb-uartlite", 17) == 0) || + (strncmp(p, "xlnx,axi-uartlite", 17) == 0) || + (strncmp(p, "xlnx,mdm", 8) == 0)) { + unsigned int *addrp; + + *(u32 *)data = UARTLITE; + + addrp = of_get_flat_dt_prop(node, "reg", &l); + return be32_to_cpup(addrp); /* return address */ + } + } + return 0; } -/* this function is looking for early uartlite console - Microblaze specific */ -int __init early_uart16550_console(void) +/* this function is looking for early console - Microblaze specific */ +int __init of_early_console(void *version) { - return of_scan_flat_dt(early_init_dt_scan_serial_full, NULL); + return of_scan_flat_dt(early_init_dt_scan_chosen_serial, version); } #endif diff --git a/arch/microblaze/kernel/setup.c b/arch/microblaze/kernel/setup.c index 8e2c09b7ff26..0e654a12d37e 100644 --- a/arch/microblaze/kernel/setup.c +++ b/arch/microblaze/kernel/setup.c @@ -59,6 +59,11 @@ void __init setup_arch(char **cmdline_p) setup_memory(); +#ifdef CONFIG_EARLY_PRINTK + /* remap early console to virtual address */ + remap_early_printk(); +#endif + xilinx_pci_init(); #if defined(CONFIG_SELFMOD_INTC) || defined(CONFIG_SELFMOD_TIMER) diff --git a/arch/sparc/include/asm/elf_64.h b/arch/sparc/include/asm/elf_64.h index cfa9cd2e5519..64f7a00b3747 100644 --- a/arch/sparc/include/asm/elf_64.h +++ b/arch/sparc/include/asm/elf_64.h @@ -177,9 +177,11 @@ static inline unsigned int sparc64_elf_hwcap(void) cap |= HWCAP_SPARC_ULTRA3; else if (tlb_type == hypervisor) { if (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || - sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3) cap |= HWCAP_SPARC_BLKINIT; - if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2) + if (sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || + sun4v_chip_type == SUN4V_CHIP_NIAGARA3) cap |= HWCAP_SPARC_N2; } diff --git a/arch/sparc/include/asm/hypervisor.h b/arch/sparc/include/asm/hypervisor.h index 75686409be24..7a5f80df15d0 100644 --- a/arch/sparc/include/asm/hypervisor.h +++ b/arch/sparc/include/asm/hypervisor.h @@ -2950,6 +2950,7 @@ extern unsigned long sun4v_ncs_request(unsigned long request, #define HV_GRP_N2_CPU 0x0202 #define HV_GRP_NIU 0x0204 #define HV_GRP_VF_CPU 0x0205 +#define HV_GRP_KT_CPU 0x0209 #define HV_GRP_DIAG 0x0300 #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/spitfire.h b/arch/sparc/include/asm/spitfire.h index f0d0c40c44da..55a17c6efeb8 100644 --- a/arch/sparc/include/asm/spitfire.h +++ b/arch/sparc/include/asm/spitfire.h @@ -42,6 +42,7 @@ #define SUN4V_CHIP_INVALID 0x00 #define SUN4V_CHIP_NIAGARA1 0x01 #define SUN4V_CHIP_NIAGARA2 0x02 +#define SUN4V_CHIP_NIAGARA3 0x03 #define SUN4V_CHIP_UNKNOWN 0xff #ifndef __ASSEMBLY__ diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index bee4bf4be3af..9ed6ff679ab7 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -65,6 +65,7 @@ static struct xor_block_template xor_block_niagara = { #define XOR_SELECT_TEMPLATE(FASTEST) \ ((tlb_type == hypervisor && \ (sun4v_chip_type == SUN4V_CHIP_NIAGARA1 || \ - sun4v_chip_type == SUN4V_CHIP_NIAGARA2)) ? \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA2 || \ + sun4v_chip_type == SUN4V_CHIP_NIAGARA3)) ? \ &xor_block_niagara : \ &xor_block_VIS) diff --git a/arch/sparc/kernel/cpu.c b/arch/sparc/kernel/cpu.c index 138dbbc8dc84..17cf290dc2bc 100644 --- a/arch/sparc/kernel/cpu.c +++ b/arch/sparc/kernel/cpu.c @@ -474,11 +474,18 @@ static void __init sun4v_cpu_probe(void) sparc_pmu_type = "niagara2"; break; + case SUN4V_CHIP_NIAGARA3: + sparc_cpu_type = "UltraSparc T3 (Niagara3)"; + sparc_fpu_type = "UltraSparc T3 integrated FPU"; + sparc_pmu_type = "niagara3"; + break; + default: printk(KERN_WARNING "CPU: Unknown sun4v cpu type [%s]\n", prom_cpu_compatible); sparc_cpu_type = "Unknown SUN4V CPU"; sparc_fpu_type = "Unknown SUN4V FPU"; + sparc_pmu_type = "Unknown SUN4V PMU"; break; } } diff --git a/arch/sparc/kernel/cpumap.c b/arch/sparc/kernel/cpumap.c index d91fd782743a..4197e8d62d4c 100644 --- a/arch/sparc/kernel/cpumap.c +++ b/arch/sparc/kernel/cpumap.c @@ -324,6 +324,7 @@ static int iterate_cpu(struct cpuinfo_tree *t, unsigned int root_index) switch (sun4v_chip_type) { case SUN4V_CHIP_NIAGARA1: case SUN4V_CHIP_NIAGARA2: + case SUN4V_CHIP_NIAGARA3: rover_inc_table = niagara_iterate_method; break; default: diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index aa594c792d19..c752603a7c0d 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -132,6 +132,8 @@ prom_sun4v_name: .asciz "sun4v" prom_niagara_prefix: .asciz "SUNW,UltraSPARC-T" +prom_sparc_prefix: + .asciz "SPARC-T" .align 4 prom_root_compatible: .skip 64 @@ -382,6 +384,22 @@ sun4v_chip_type: 90: ldub [%g7], %g2 ldub [%g1], %g4 cmp %g2, %g4 + bne,pn %icc, 89f + add %g7, 1, %g7 + subcc %g3, 1, %g3 + bne,pt %xcc, 90b + add %g1, 1, %g1 + ba,pt %xcc, 91f + nop + +89: sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 + sethi %hi(prom_sparc_prefix), %g7 + or %g7, %lo(prom_sparc_prefix), %g7 + mov 7, %g3 +90: ldub [%g7], %g2 + ldub [%g1], %g4 + cmp %g2, %g4 bne,pn %icc, 4f add %g7, 1, %g7 subcc %g3, 1, %g3 @@ -390,6 +408,15 @@ sun4v_chip_type: sethi %hi(prom_cpu_compatible), %g1 or %g1, %lo(prom_cpu_compatible), %g1 + ldub [%g1 + 7], %g2 + cmp %g2, '3' + be,pt %xcc, 5f + mov SUN4V_CHIP_NIAGARA3, %g4 + ba,pt %xcc, 4f + nop + +91: sethi %hi(prom_cpu_compatible), %g1 + or %g1, %lo(prom_cpu_compatible), %g1 ldub [%g1 + 17], %g2 cmp %g2, '1' be,pt %xcc, 5f @@ -397,6 +424,7 @@ sun4v_chip_type: cmp %g2, '2' be,pt %xcc, 5f mov SUN4V_CHIP_NIAGARA2, %g4 + 4: mov SUN4V_CHIP_UNKNOWN, %g4 5: sethi %hi(sun4v_chip_type), %g2 @@ -514,6 +542,9 @@ niagara_tlb_fixup: cmp %g1, SUN4V_CHIP_NIAGARA2 be,pt %xcc, niagara2_patch nop + cmp %g1, SUN4V_CHIP_NIAGARA3 + be,pt %xcc, niagara2_patch + nop call generic_patch_copyops nop diff --git a/arch/sparc/kernel/hvapi.c b/arch/sparc/kernel/hvapi.c index 7c60afb835b0..d306e648c33c 100644 --- a/arch/sparc/kernel/hvapi.c +++ b/arch/sparc/kernel/hvapi.c @@ -38,6 +38,7 @@ static struct api_info api_table[] = { { .group = HV_GRP_N2_CPU, }, { .group = HV_GRP_NIU, }, { .group = HV_GRP_VF_CPU, }, + { .group = HV_GRP_KT_CPU, }, { .group = HV_GRP_DIAG, .flags = FLAG_PRE_API }, }; diff --git a/arch/sparc/kernel/pcr.c b/arch/sparc/kernel/pcr.c index 8ac23e660080..343b0f9e2e7b 100644 --- a/arch/sparc/kernel/pcr.c +++ b/arch/sparc/kernel/pcr.c @@ -80,8 +80,11 @@ static void n2_pcr_write(u64 val) { unsigned long ret; - ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); - if (ret != HV_EOK) + if (val & PCR_N2_HTRACE) { + ret = sun4v_niagara2_setperf(HV_N2_PERF_SPARC_CTL, val); + if (ret != HV_EOK) + write_pcr(val); + } else write_pcr(val); } @@ -106,6 +109,10 @@ static int __init register_perf_hsvc(void) perf_hsvc_group = HV_GRP_N2_CPU; break; + case SUN4V_CHIP_NIAGARA3: + perf_hsvc_group = HV_GRP_KT_CPU; + break; + default: return -ENODEV; } diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 171e8d84dc3f..614da624330c 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1343,7 +1343,8 @@ static bool __init supported_pmu(void) sparc_pmu = &niagara1_pmu; return true; } - if (!strcmp(sparc_pmu_type, "niagara2")) { + if (!strcmp(sparc_pmu_type, "niagara2") || + !strcmp(sparc_pmu_type, "niagara3")) { sparc_pmu = &niagara2_pmu; return true; } diff --git a/arch/sparc/lib/atomic32.c b/arch/sparc/lib/atomic32.c index 1a371f8ae0b0..8600eb2461b5 100644 --- a/arch/sparc/lib/atomic32.c +++ b/arch/sparc/lib/atomic32.c @@ -55,7 +55,7 @@ int atomic_cmpxchg(atomic_t *v, int old, int new) } EXPORT_SYMBOL(atomic_cmpxchg); -int atomic_add_unless(atomic_t *v, int a, int u) +int __atomic_add_unless(atomic_t *v, int a, int u) { int ret; unsigned long flags; @@ -67,7 +67,7 @@ int atomic_add_unless(atomic_t *v, int a, int u) spin_unlock_irqrestore(ATOMIC_HASH(v), flags); return ret != u; } -EXPORT_SYMBOL(atomic_add_unless); +EXPORT_SYMBOL(__atomic_add_unless); /* Atomic operations are already serializing */ void atomic_set(atomic_t *v, int i) diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c index ac6739e085e3..c3de70de00d4 100644 --- a/drivers/char/hw_random/n2-drv.c +++ b/drivers/char/hw_random/n2-drv.c @@ -1,6 +1,6 @@ /* n2-drv.c: Niagara-2 RNG driver. * - * Copyright (C) 2008 David S. Miller <davem@davemloft.net> + * Copyright (C) 2008, 2011 David S. Miller <davem@davemloft.net> */ #include <linux/kernel.h> @@ -22,8 +22,8 @@ #define DRV_MODULE_NAME "n2rng" #define PFX DRV_MODULE_NAME ": " -#define DRV_MODULE_VERSION "0.1" -#define DRV_MODULE_RELDATE "May 15, 2008" +#define DRV_MODULE_VERSION "0.2" +#define DRV_MODULE_RELDATE "July 27, 2011" static char version[] __devinitdata = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -623,14 +623,14 @@ static const struct of_device_id n2rng_match[]; static int __devinit n2rng_probe(struct platform_device *op) { const struct of_device_id *match; - int victoria_falls; + int multi_capable; int err = -ENOMEM; struct n2rng *np; match = of_match_device(n2rng_match, &op->dev); if (!match) return -EINVAL; - victoria_falls = (match->data != NULL); + multi_capable = (match->data != NULL); n2rng_driver_version(); np = kzalloc(sizeof(*np), GFP_KERNEL); @@ -640,8 +640,8 @@ static int __devinit n2rng_probe(struct platform_device *op) INIT_DELAYED_WORK(&np->work, n2rng_work); - if (victoria_falls) - np->flags |= N2RNG_FLAG_VF; + if (multi_capable) + np->flags |= N2RNG_FLAG_MULTI; err = -ENODEV; np->hvapi_major = 2; @@ -658,10 +658,10 @@ static int __devinit n2rng_probe(struct platform_device *op) } } - if (np->flags & N2RNG_FLAG_VF) { + if (np->flags & N2RNG_FLAG_MULTI) { if (np->hvapi_major < 2) { - dev_err(&op->dev, "VF RNG requires HVAPI major " - "version 2 or later, got %lu\n", + dev_err(&op->dev, "multi-unit-capable RNG requires " + "HVAPI major version 2 or later, got %lu\n", np->hvapi_major); goto out_hvapi_unregister; } @@ -688,8 +688,8 @@ static int __devinit n2rng_probe(struct platform_device *op) goto out_free_units; dev_info(&op->dev, "Found %s RNG, units: %d\n", - ((np->flags & N2RNG_FLAG_VF) ? - "Victoria Falls" : "Niagara2"), + ((np->flags & N2RNG_FLAG_MULTI) ? + "multi-unit-capable" : "single-unit"), np->num_units); np->hwrng.name = "n2rng"; @@ -751,6 +751,11 @@ static const struct of_device_id n2rng_match[] = { .compatible = "SUNW,vf-rng", .data = (void *) 1, }, + { + .name = "random-number-generator", + .compatible = "SUNW,kt-rng", + .data = (void *) 1, + }, {}, }; MODULE_DEVICE_TABLE(of, n2rng_match); diff --git a/drivers/char/hw_random/n2rng.h b/drivers/char/hw_random/n2rng.h index 4bea07f30978..f244ac89087f 100644 --- a/drivers/char/hw_random/n2rng.h +++ b/drivers/char/hw_random/n2rng.h @@ -68,7 +68,7 @@ struct n2rng { struct platform_device *op; unsigned long flags; -#define N2RNG_FLAG_VF 0x00000001 /* Victoria Falls RNG, else N2 */ +#define N2RNG_FLAG_MULTI 0x00000001 /* Multi-unit capable RNG */ #define N2RNG_FLAG_CONTROL 0x00000002 /* Operating in control domain */ #define N2RNG_FLAG_READY 0x00000008 /* Ready for hw-rng layer */ #define N2RNG_FLAG_SHUTDOWN 0x00000010 /* Driver unregistering */ diff --git a/drivers/char/tpm/tpm.c b/drivers/char/tpm/tpm.c index 7beb0e25f1e1..caf8012ef47c 100644 --- a/drivers/char/tpm/tpm.c +++ b/drivers/char/tpm/tpm.c @@ -534,6 +534,7 @@ void tpm_get_timeouts(struct tpm_chip *chip) struct duration_t *duration_cap; ssize_t rc; u32 timeout; + unsigned int scale = 1; tpm_cmd.header.in = tpm_getcap_header; tpm_cmd.params.getcap_in.cap = TPM_CAP_PROP; @@ -545,24 +546,30 @@ void tpm_get_timeouts(struct tpm_chip *chip) if (rc) goto duration; - if (be32_to_cpu(tpm_cmd.header.out.length) - != 4 * sizeof(u32)) - goto duration; + if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) + != sizeof(tpm_cmd.header.out) + sizeof(u32) + 4 * sizeof(u32)) + return; timeout_cap = &tpm_cmd.params.getcap_out.cap.timeout; /* Don't overwrite default if value is 0 */ timeout = be32_to_cpu(timeout_cap->a); + if (timeout && timeout < 1000) { + /* timeouts in msec rather usec */ + scale = 1000; + chip->vendor.timeout_adjusted = true; + } if (timeout) - chip->vendor.timeout_a = usecs_to_jiffies(timeout); + chip->vendor.timeout_a = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->b); if (timeout) - chip->vendor.timeout_b = usecs_to_jiffies(timeout); + chip->vendor.timeout_b = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->c); if (timeout) - chip->vendor.timeout_c = usecs_to_jiffies(timeout); + chip->vendor.timeout_c = usecs_to_jiffies(timeout * scale); timeout = be32_to_cpu(timeout_cap->d); if (timeout) - chip->vendor.timeout_d = usecs_to_jiffies(timeout); + chip->vendor.timeout_d = usecs_to_jiffies(timeout * scale); duration: tpm_cmd.header.in = tpm_getcap_header; @@ -575,23 +582,31 @@ duration: if (rc) return; - if (be32_to_cpu(tpm_cmd.header.out.return_code) - != 3 * sizeof(u32)) + if (be32_to_cpu(tpm_cmd.header.out.return_code) != 0 || + be32_to_cpu(tpm_cmd.header.out.length) + != sizeof(tpm_cmd.header.out) + sizeof(u32) + 3 * sizeof(u32)) return; + duration_cap = &tpm_cmd.params.getcap_out.cap.duration; chip->vendor.duration[TPM_SHORT] = usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_short)); + chip->vendor.duration[TPM_MEDIUM] = + usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium)); + chip->vendor.duration[TPM_LONG] = + usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long)); + /* The Broadcom BCM0102 chipset in a Dell Latitude D820 gets the above * value wrong and apparently reports msecs rather than usecs. So we * fix up the resulting too-small TPM_SHORT value to make things work. + * We also scale the TPM_MEDIUM and -_LONG values by 1000. */ - if (chip->vendor.duration[TPM_SHORT] < (HZ/100)) + if (chip->vendor.duration[TPM_SHORT] < (HZ / 100)) { chip->vendor.duration[TPM_SHORT] = HZ; - - chip->vendor.duration[TPM_MEDIUM] = - usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_medium)); - chip->vendor.duration[TPM_LONG] = - usecs_to_jiffies(be32_to_cpu(duration_cap->tpm_long)); + chip->vendor.duration[TPM_MEDIUM] *= 1000; + chip->vendor.duration[TPM_LONG] *= 1000; + chip->vendor.duration_adjusted = true; + dev_info(chip->dev, "Adjusting TPM timeout parameters."); + } } EXPORT_SYMBOL_GPL(tpm_get_timeouts); @@ -600,7 +615,7 @@ void tpm_continue_selftest(struct tpm_chip *chip) u8 data[] = { 0, 193, /* TPM_TAG_RQU_COMMAND */ 0, 0, 0, 10, /* length */ - 0, 0, 0, 83, /* TPM_ORD_GetCapability */ + 0, 0, 0, 83, /* TPM_ORD_ContinueSelfTest */ }; tpm_transmit(chip, data, sizeof(data)); @@ -863,18 +878,24 @@ ssize_t tpm_show_pubek(struct device *dev, struct device_attribute *attr, data = tpm_cmd.params.readpubek_out_buffer; str += sprintf(str, - "Algorithm: %02X %02X %02X %02X\nEncscheme: %02X %02X\n" - "Sigscheme: %02X %02X\nParameters: %02X %02X %02X %02X" - " %02X %02X %02X %02X %02X %02X %02X %02X\n" - "Modulus length: %d\nModulus: \n", - data[10], data[11], data[12], data[13], data[14], - data[15], data[16], data[17], data[22], data[23], - data[24], data[25], data[26], data[27], data[28], - data[29], data[30], data[31], data[32], data[33], - be32_to_cpu(*((__be32 *) (data + 34)))); + "Algorithm: %02X %02X %02X %02X\n" + "Encscheme: %02X %02X\n" + "Sigscheme: %02X %02X\n" + "Parameters: %02X %02X %02X %02X " + "%02X %02X %02X %02X " + "%02X %02X %02X %02X\n" + "Modulus length: %d\n" + "Modulus:\n", + data[0], data[1], data[2], data[3], + data[4], data[5], + data[6], data[7], + data[12], data[13], data[14], data[15], + data[16], data[17], data[18], data[19], + data[20], data[21], data[22], data[23], + be32_to_cpu(*((__be32 *) (data + 24)))); for (i = 0; i < 256; i++) { - str += sprintf(str, "%02X ", data[i + 38]); + str += sprintf(str, "%02X ", data[i + 28]); if ((i + 1) % 16 == 0) str += sprintf(str, "\n"); } @@ -937,6 +958,35 @@ ssize_t tpm_show_caps_1_2(struct device * dev, } EXPORT_SYMBOL_GPL(tpm_show_caps_1_2); +ssize_t tpm_show_durations(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d %d %d [%s]\n", + jiffies_to_usecs(chip->vendor.duration[TPM_SHORT]), + jiffies_to_usecs(chip->vendor.duration[TPM_MEDIUM]), + jiffies_to_usecs(chip->vendor.duration[TPM_LONG]), + chip->vendor.duration_adjusted + ? "adjusted" : "original"); +} +EXPORT_SYMBOL_GPL(tpm_show_durations); + +ssize_t tpm_show_timeouts(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct tpm_chip *chip = dev_get_drvdata(dev); + + return sprintf(buf, "%d %d %d %d [%s]\n", + jiffies_to_usecs(chip->vendor.timeout_a), + jiffies_to_usecs(chip->vendor.timeout_b), + jiffies_to_usecs(chip->vendor.timeout_c), + jiffies_to_usecs(chip->vendor.timeout_d), + chip->vendor.timeout_adjusted + ? "adjusted" : "original"); +} +EXPORT_SYMBOL_GPL(tpm_show_timeouts); + ssize_t tpm_store_cancel(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { diff --git a/drivers/char/tpm/tpm.h b/drivers/char/tpm/tpm.h index 72ddb031b69a..9c4163cfa3ce 100644 --- a/drivers/char/tpm/tpm.h +++ b/drivers/char/tpm/tpm.h @@ -56,6 +56,10 @@ extern ssize_t tpm_show_owned(struct device *, struct device_attribute *attr, char *); extern ssize_t tpm_show_temp_deactivated(struct device *, struct device_attribute *attr, char *); +extern ssize_t tpm_show_durations(struct device *, + struct device_attribute *attr, char *); +extern ssize_t tpm_show_timeouts(struct device *, + struct device_attribute *attr, char *); struct tpm_chip; @@ -67,6 +71,7 @@ struct tpm_vendor_specific { unsigned long base; /* TPM base address */ int irq; + int probed_irq; int region_size; int have_region; @@ -81,7 +86,9 @@ struct tpm_vendor_specific { struct list_head list; int locality; unsigned long timeout_a, timeout_b, timeout_c, timeout_d; /* jiffies */ + bool timeout_adjusted; unsigned long duration[3]; /* jiffies */ + bool duration_adjusted; wait_queue_head_t read_queue; wait_queue_head_t int_queue; diff --git a/drivers/char/tpm/tpm_nsc.c b/drivers/char/tpm/tpm_nsc.c index a605cb7dd898..82facc9104c7 100644 --- a/drivers/char/tpm/tpm_nsc.c +++ b/drivers/char/tpm/tpm_nsc.c @@ -330,12 +330,12 @@ static int __init init_nsc(void) pdev->dev.driver = &nsc_drv.driver; pdev->dev.release = tpm_nsc_remove; - if ((rc = platform_device_register(pdev)) < 0) - goto err_free_dev; + if ((rc = platform_device_add(pdev)) < 0) + goto err_put_dev; if (request_region(base, 2, "tpm_nsc0") == NULL ) { rc = -EBUSY; - goto err_unreg_dev; + goto err_del_dev; } if (!(chip = tpm_register_hardware(&pdev->dev, &tpm_nsc))) { @@ -382,10 +382,10 @@ static int __init init_nsc(void) err_rel_reg: release_region(base, 2); -err_unreg_dev: - platform_device_unregister(pdev); -err_free_dev: - kfree(pdev); +err_del_dev: + platform_device_del(pdev); +err_put_dev: + platform_device_put(pdev); err_unreg_drv: platform_driver_unregister(&nsc_drv); return rc; diff --git a/drivers/char/tpm/tpm_tis.c b/drivers/char/tpm/tpm_tis.c index dd21df55689d..7fc2f108f490 100644 --- a/drivers/char/tpm/tpm_tis.c +++ b/drivers/char/tpm/tpm_tis.c @@ -26,6 +26,7 @@ #include <linux/interrupt.h> #include <linux/wait.h> #include <linux/acpi.h> +#include <linux/freezer.h> #include "tpm.h" #define TPM_HEADER_SIZE 10 @@ -79,7 +80,7 @@ enum tis_defaults { static LIST_HEAD(tis_chips); static DEFINE_SPINLOCK(tis_lock); -#ifdef CONFIG_ACPI +#ifdef CONFIG_PNP static int is_itpm(struct pnp_dev *dev) { struct acpi_device *acpi = pnp_acpi_device(dev); @@ -92,11 +93,6 @@ static int is_itpm(struct pnp_dev *dev) return 0; } -#else -static int is_itpm(struct pnp_dev *dev) -{ - return 0; -} #endif static int check_locality(struct tpm_chip *chip, int l) @@ -120,7 +116,7 @@ static void release_locality(struct tpm_chip *chip, int l, int force) static int request_locality(struct tpm_chip *chip, int l) { - unsigned long stop; + unsigned long stop, timeout; long rc; if (check_locality(chip, l) >= 0) @@ -129,17 +125,25 @@ static int request_locality(struct tpm_chip *chip, int l) iowrite8(TPM_ACCESS_REQUEST_USE, chip->vendor.iobase + TPM_ACCESS(l)); + stop = jiffies + chip->vendor.timeout_a; + if (chip->vendor.irq) { +again: + timeout = stop - jiffies; + if ((long)timeout <= 0) + return -1; rc = wait_event_interruptible_timeout(chip->vendor.int_queue, (check_locality (chip, l) >= 0), - chip->vendor.timeout_a); + timeout); if (rc > 0) return l; - + if (rc == -ERESTARTSYS && freezing(current)) { + clear_thread_flag(TIF_SIGPENDING); + goto again; + } } else { /* wait for burstcount */ - stop = jiffies + chip->vendor.timeout_a; do { if (check_locality(chip, l) >= 0) return l; @@ -196,15 +200,24 @@ static int wait_for_stat(struct tpm_chip *chip, u8 mask, unsigned long timeout, if ((status & mask) == mask) return 0; + stop = jiffies + timeout; + if (chip->vendor.irq) { +again: + timeout = stop - jiffies; + if ((long)timeout <= 0) + return -ETIME; rc = wait_event_interruptible_timeout(*queue, ((tpm_tis_status (chip) & mask) == mask), timeout); if (rc > 0) return 0; + if (rc == -ERESTARTSYS && freezing(current)) { + clear_thread_flag(TIF_SIGPENDING); + goto again; + } } else { - stop = jiffies + timeout; do { msleep(TPM_TIMEOUT); status = tpm_tis_status(chip); @@ -288,11 +301,10 @@ MODULE_PARM_DESC(itpm, "Force iTPM workarounds (found on some Lenovo laptops)"); * tpm.c can skip polling for the data to be available as the interrupt is * waited for here */ -static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +static int tpm_tis_send_data(struct tpm_chip *chip, u8 *buf, size_t len) { int rc, status, burstcnt; size_t count = 0; - u32 ordinal; if (request_locality(chip, 0) < 0) return -EBUSY; @@ -327,8 +339,7 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) /* write last byte */ iowrite8(buf[count], - chip->vendor.iobase + - TPM_DATA_FIFO(chip->vendor.locality)); + chip->vendor.iobase + TPM_DATA_FIFO(chip->vendor.locality)); wait_for_stat(chip, TPM_STS_VALID, chip->vendor.timeout_c, &chip->vendor.int_queue); status = tpm_tis_status(chip); @@ -337,6 +348,28 @@ static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) goto out_err; } + return 0; + +out_err: + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + return rc; +} + +/* + * If interrupts are used (signaled by an irq set in the vendor structure) + * tpm.c can skip polling for the data to be available as the interrupt is + * waited for here + */ +static int tpm_tis_send(struct tpm_chip *chip, u8 *buf, size_t len) +{ + int rc; + u32 ordinal; + + rc = tpm_tis_send_data(chip, buf, len); + if (rc < 0) + return rc; + /* go and do it */ iowrite8(TPM_STS_GO, chip->vendor.iobase + TPM_STS(chip->vendor.locality)); @@ -358,6 +391,47 @@ out_err: return rc; } +/* + * Early probing for iTPM with STS_DATA_EXPECT flaw. + * Try sending command without itpm flag set and if that + * fails, repeat with itpm flag set. + */ +static int probe_itpm(struct tpm_chip *chip) +{ + int rc = 0; + u8 cmd_getticks[] = { + 0x00, 0xc1, 0x00, 0x00, 0x00, 0x0a, + 0x00, 0x00, 0x00, 0xf1 + }; + size_t len = sizeof(cmd_getticks); + int rem_itpm = itpm; + + itpm = 0; + + rc = tpm_tis_send_data(chip, cmd_getticks, len); + if (rc == 0) + goto out; + + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + + itpm = 1; + + rc = tpm_tis_send_data(chip, cmd_getticks, len); + if (rc == 0) { + dev_info(chip->dev, "Detected an iTPM.\n"); + rc = 1; + } else + rc = -EFAULT; + +out: + itpm = rem_itpm; + tpm_tis_ready(chip); + release_locality(chip, chip->vendor.locality, 0); + + return rc; +} + static const struct file_operations tis_ops = { .owner = THIS_MODULE, .llseek = no_llseek, @@ -376,6 +450,8 @@ static DEVICE_ATTR(temp_deactivated, S_IRUGO, tpm_show_temp_deactivated, NULL); static DEVICE_ATTR(caps, S_IRUGO, tpm_show_caps_1_2, NULL); static DEVICE_ATTR(cancel, S_IWUSR | S_IWGRP, NULL, tpm_store_cancel); +static DEVICE_ATTR(durations, S_IRUGO, tpm_show_durations, NULL); +static DEVICE_ATTR(timeouts, S_IRUGO, tpm_show_timeouts, NULL); static struct attribute *tis_attrs[] = { &dev_attr_pubek.attr, @@ -385,7 +461,9 @@ static struct attribute *tis_attrs[] = { &dev_attr_owned.attr, &dev_attr_temp_deactivated.attr, &dev_attr_caps.attr, - &dev_attr_cancel.attr, NULL, + &dev_attr_cancel.attr, + &dev_attr_durations.attr, + &dev_attr_timeouts.attr, NULL, }; static struct attribute_group tis_attr_grp = { @@ -416,7 +494,7 @@ static irqreturn_t tis_int_probe(int irq, void *dev_id) if (interrupt == 0) return IRQ_NONE; - chip->vendor.irq = irq; + chip->vendor.probed_irq = irq; /* Clear interrupts handled with TPM_EOI */ iowrite32(interrupt, @@ -464,7 +542,7 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, resource_size_t len, unsigned int irq) { u32 vendor, intfcaps, intmask; - int rc, i; + int rc, i, irq_s, irq_e; struct tpm_chip *chip; if (!(chip = tpm_register_hardware(dev, &tpm_tis))) @@ -493,6 +571,14 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, "1.2 TPM (device-id 0x%X, rev-id %d)\n", vendor >> 16, ioread8(chip->vendor.iobase + TPM_RID(0))); + if (!itpm) { + itpm = probe_itpm(chip); + if (itpm < 0) { + rc = -ENODEV; + goto out_err; + } + } + if (itpm) dev_info(dev, "Intel iTPM workaround enabled\n"); @@ -522,6 +608,9 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (intfcaps & TPM_INTF_DATA_AVAIL_INT) dev_dbg(dev, "\tData Avail Int Support\n"); + /* get the timeouts before testing for irqs */ + tpm_get_timeouts(chip); + /* INTERRUPT Setup */ init_waitqueue_head(&chip->vendor.read_queue); init_waitqueue_head(&chip->vendor.int_queue); @@ -540,13 +629,19 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, if (interrupts) chip->vendor.irq = irq; if (interrupts && !chip->vendor.irq) { - chip->vendor.irq = + irq_s = ioread8(chip->vendor.iobase + TPM_INT_VECTOR(chip->vendor.locality)); + if (irq_s) { + irq_e = irq_s; + } else { + irq_s = 3; + irq_e = 15; + } - for (i = 3; i < 16 && chip->vendor.irq == 0; i++) { + for (i = irq_s; i <= irq_e && chip->vendor.irq == 0; i++) { iowrite8(i, chip->vendor.iobase + - TPM_INT_VECTOR(chip->vendor.locality)); + TPM_INT_VECTOR(chip->vendor.locality)); if (request_irq (i, tis_int_probe, IRQF_SHARED, chip->vendor.miscdev.name, chip) != 0) { @@ -568,9 +663,22 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); + chip->vendor.probed_irq = 0; + /* Generate Interrupts */ tpm_gen_interrupt(chip); + chip->vendor.irq = chip->vendor.probed_irq; + + /* free_irq will call into tis_int_probe; + clear all irqs we haven't seen while doing + tpm_gen_interrupt */ + iowrite32(ioread32 + (chip->vendor.iobase + + TPM_INT_STATUS(chip->vendor.locality)), + chip->vendor.iobase + + TPM_INT_STATUS(chip->vendor.locality)); + /* Turn off */ iowrite32(intmask, chip->vendor.iobase + @@ -609,7 +717,6 @@ static int tpm_tis_init(struct device *dev, resource_size_t start, list_add(&chip->vendor.list, &tis_chips); spin_unlock(&tis_lock); - tpm_get_timeouts(chip); tpm_continue_selftest(chip); return 0; @@ -619,6 +726,29 @@ out_err: tpm_remove_hardware(chip->dev); return rc; } + +static void tpm_tis_reenable_interrupts(struct tpm_chip *chip) +{ + u32 intmask; + + /* reenable interrupts that device may have lost or + BIOS/firmware may have disabled */ + iowrite8(chip->vendor.irq, chip->vendor.iobase + + TPM_INT_VECTOR(chip->vendor.locality)); + + intmask = + ioread32(chip->vendor.iobase + + TPM_INT_ENABLE(chip->vendor.locality)); + + intmask |= TPM_INTF_CMD_READY_INT + | TPM_INTF_LOCALITY_CHANGE_INT | TPM_INTF_DATA_AVAIL_INT + | TPM_INTF_STS_VALID_INT | TPM_GLOBAL_INT_ENABLE; + + iowrite32(intmask, + chip->vendor.iobase + TPM_INT_ENABLE(chip->vendor.locality)); +} + + #ifdef CONFIG_PNP static int __devinit tpm_tis_pnp_init(struct pnp_dev *pnp_dev, const struct pnp_device_id *pnp_id) @@ -650,6 +780,9 @@ static int tpm_tis_pnp_resume(struct pnp_dev *dev) struct tpm_chip *chip = pnp_get_drvdata(dev); int ret; + if (chip->vendor.irq) + tpm_tis_reenable_interrupts(chip); + ret = tpm_pm_resume(&dev->dev); if (!ret) tpm_continue_selftest(chip); @@ -702,6 +835,11 @@ static int tpm_tis_suspend(struct platform_device *dev, pm_message_t msg) static int tpm_tis_resume(struct platform_device *dev) { + struct tpm_chip *chip = dev_get_drvdata(&dev->dev); + + if (chip->vendor.irq) + tpm_tis_reenable_interrupts(chip); + return tpm_pm_resume(&dev->dev); } static struct platform_driver tis_drv = { diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c index 2e5b2044c96f..d0183ddb3076 100644 --- a/drivers/crypto/n2_core.c +++ b/drivers/crypto/n2_core.c @@ -1,6 +1,6 @@ /* n2_core.c: Niagara2 Stream Processing Unit (SPU) crypto support. * - * Copyright (C) 2010 David S. Miller <davem@davemloft.net> + * Copyright (C) 2010, 2011 David S. Miller <davem@davemloft.net> */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt @@ -31,8 +31,8 @@ #include "n2_core.h" #define DRV_MODULE_NAME "n2_crypto" -#define DRV_MODULE_VERSION "0.1" -#define DRV_MODULE_RELDATE "April 29, 2010" +#define DRV_MODULE_VERSION "0.2" +#define DRV_MODULE_RELDATE "July 28, 2011" static char version[] __devinitdata = DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n"; @@ -1823,22 +1823,17 @@ static int spu_mdesc_scan(struct mdesc_handle *mdesc, struct platform_device *de static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node, struct spu_mdesc_info *ip) { - const u64 *intr, *ino; - int intr_len, ino_len; + const u64 *ino; + int ino_len; int i; - intr = mdesc_get_property(mdesc, node, "intr", &intr_len); - if (!intr) - return -ENODEV; - ino = mdesc_get_property(mdesc, node, "ino", &ino_len); - if (!ino) + if (!ino) { + printk("NO 'ino'\n"); return -ENODEV; + } - if (intr_len != ino_len) - return -EINVAL; - - ip->num_intrs = intr_len / sizeof(u64); + ip->num_intrs = ino_len / sizeof(u64); ip->ino_table = kzalloc((sizeof(struct ino_blob) * ip->num_intrs), GFP_KERNEL); @@ -1847,7 +1842,7 @@ static int __devinit get_irq_props(struct mdesc_handle *mdesc, u64 node, for (i = 0; i < ip->num_intrs; i++) { struct ino_blob *b = &ip->ino_table[i]; - b->intr = intr[i]; + b->intr = i + 1; b->ino = ino[i]; } @@ -2204,6 +2199,10 @@ static struct of_device_id n2_crypto_match[] = { .name = "n2cp", .compatible = "SUNW,vf-cwq", }, + { + .name = "n2cp", + .compatible = "SUNW,kt-cwq", + }, {}, }; @@ -2228,6 +2227,10 @@ static struct of_device_id n2_mau_match[] = { .name = "ncp", .compatible = "SUNW,vf-mau", }, + { + .name = "ncp", + .compatible = "SUNW,kt-mau", + }, {}, }; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 95a08a8ca8aa..5745b7fe158c 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -271,7 +271,7 @@ int iser_send_command(struct iscsi_conn *conn, unsigned long edtl; int err; struct iser_data_buf *data_buf; - struct iscsi_cmd *hdr = (struct iscsi_cmd *)task->hdr; + struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; struct scsi_cmnd *sc = task->sc; struct iser_tx_desc *tx_desc = &iser_task->desc; diff --git a/drivers/input/joystick/xpad.c b/drivers/input/joystick/xpad.c index 56abf3d0e911..d72887585a14 100644 --- a/drivers/input/joystick/xpad.c +++ b/drivers/input/joystick/xpad.c @@ -154,10 +154,13 @@ static const struct xpad_device { { 0x0f30, 0x8888, "BigBen XBMiniPad Controller", 0, XTYPE_XBOX }, { 0x102c, 0xff0c, "Joytech Wireless Advanced Controller", 0, XTYPE_XBOX }, { 0x12ab, 0x8809, "Xbox DDR dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, + { 0x12ab, 0x0004, "Honey Bee Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, + { 0x0e6f, 0x0105, "HSM3 Xbox360 dancepad", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x1430, 0x4748, "RedOctane Guitar Hero X-plorer", 0, XTYPE_XBOX360 }, { 0x1430, 0x8888, "TX6500+ Dance Pad (first generation)", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX }, { 0x146b, 0x0601, "BigBen Interactive XBOX 360 Controller", 0, XTYPE_XBOX360 }, { 0x045e, 0x028e, "Microsoft X-Box 360 pad", 0, XTYPE_XBOX360 }, + { 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 }, { 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x0016, "Hori Real Arcade Pro.EX", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, { 0x0f0d, 0x000d, "Hori Fighting Stick EX2", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 }, @@ -236,9 +239,10 @@ static struct usb_device_id xpad_table [] = { XPAD_XBOX360_VENDOR(0x046d), /* Logitech X-Box 360 style controllers */ XPAD_XBOX360_VENDOR(0x0738), /* Mad Catz X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x0e6f), /* 0x0e6f X-Box 360 controllers */ + XPAD_XBOX360_VENDOR(0x12ab), /* X-Box 360 dance pads */ XPAD_XBOX360_VENDOR(0x1430), /* RedOctane X-Box 360 controllers */ XPAD_XBOX360_VENDOR(0x146b), /* BigBen Interactive Controllers */ - XPAD_XBOX360_VENDOR(0x1bad), /* Rock Band Drums */ + XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */ XPAD_XBOX360_VENDOR(0x0f0d), /* Hori Controllers */ { } }; @@ -545,7 +549,7 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) struct usb_endpoint_descriptor *ep_irq_out; int error; - if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX) + if (xpad->xtype == XTYPE_UNKNOWN) return 0; xpad->odata = usb_alloc_coherent(xpad->udev, XPAD_PKT_LEN, @@ -579,13 +583,13 @@ static int xpad_init_output(struct usb_interface *intf, struct usb_xpad *xpad) static void xpad_stop_output(struct usb_xpad *xpad) { - if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) + if (xpad->xtype != XTYPE_UNKNOWN) usb_kill_urb(xpad->irq_out); } static void xpad_deinit_output(struct usb_xpad *xpad) { - if (xpad->xtype == XTYPE_XBOX360 || xpad->xtype == XTYPE_XBOX) { + if (xpad->xtype != XTYPE_UNKNOWN) { usb_free_urb(xpad->irq_out); usb_free_coherent(xpad->udev, XPAD_PKT_LEN, xpad->odata, xpad->odata_dma); @@ -632,6 +636,23 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + case XTYPE_XBOX360W: + xpad->odata[0] = 0x00; + xpad->odata[1] = 0x01; + xpad->odata[2] = 0x0F; + xpad->odata[3] = 0xC0; + xpad->odata[4] = 0x00; + xpad->odata[5] = strong / 256; + xpad->odata[6] = weak / 256; + xpad->odata[7] = 0x00; + xpad->odata[8] = 0x00; + xpad->odata[9] = 0x00; + xpad->odata[10] = 0x00; + xpad->odata[11] = 0x00; + xpad->irq_out->transfer_buffer_length = 12; + + return usb_submit_urb(xpad->irq_out, GFP_ATOMIC); + default: dbg("%s - rumble command sent to unsupported xpad type: %d", __func__, xpad->xtype); @@ -644,7 +665,7 @@ static int xpad_play_effect(struct input_dev *dev, void *data, struct ff_effect static int xpad_init_ff(struct usb_xpad *xpad) { - if (xpad->xtype != XTYPE_XBOX360 && xpad->xtype != XTYPE_XBOX) + if (xpad->xtype == XTYPE_UNKNOWN) return 0; input_set_capability(xpad->dev, EV_FF, FF_RUMBLE); diff --git a/drivers/input/keyboard/adp5588-keys.c b/drivers/input/keyboard/adp5588-keys.c index af45d275f686..7b404e5443ed 100644 --- a/drivers/input/keyboard/adp5588-keys.c +++ b/drivers/input/keyboard/adp5588-keys.c @@ -9,7 +9,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/keyboard/adp5589-keys.c b/drivers/input/keyboard/adp5589-keys.c index 631598663aab..c7708263051b 100644 --- a/drivers/input/keyboard/adp5589-keys.c +++ b/drivers/input/keyboard/adp5589-keys.c @@ -8,7 +8,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c index 11478eb2c27d..19cfc0cf558c 100644 --- a/drivers/input/keyboard/atkbd.c +++ b/drivers/input/keyboard/atkbd.c @@ -1578,14 +1578,14 @@ static int __init atkbd_setup_forced_release(const struct dmi_system_id *id) atkbd_platform_fixup = atkbd_apply_forced_release_keylist; atkbd_platform_fixup_data = id->driver_data; - return 0; + return 1; } static int __init atkbd_setup_scancode_fixup(const struct dmi_system_id *id) { atkbd_platform_scancode_fixup = id->driver_data; - return 0; + return 1; } static const struct dmi_system_id atkbd_dmi_quirk_table[] __initconst = { diff --git a/drivers/input/keyboard/gpio_keys.c b/drivers/input/keyboard/gpio_keys.c index 6e6145b9a4c1..ce281d152275 100644 --- a/drivers/input/keyboard/gpio_keys.c +++ b/drivers/input/keyboard/gpio_keys.c @@ -2,6 +2,7 @@ * Driver for keys on GPIO lines capable of generating interrupts. * * Copyright 2005 Phil Blundell + * Copyright 2010, 2011 David Jander <david@protonic.nl> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -25,6 +26,8 @@ #include <linux/gpio_keys.h> #include <linux/workqueue.h> #include <linux/gpio.h> +#include <linux/of_platform.h> +#include <linux/of_gpio.h> struct gpio_button_data { struct gpio_keys_button *button; @@ -415,7 +418,7 @@ static int __devinit gpio_keys_setup_key(struct platform_device *pdev, if (!button->can_disable) irqflags |= IRQF_SHARED; - error = request_any_context_irq(irq, gpio_keys_isr, irqflags, desc, bdata); + error = request_threaded_irq(irq, NULL, gpio_keys_isr, irqflags, desc, bdata); if (error < 0) { dev_err(dev, "Unable to claim irq %d; error %d\n", irq, error); @@ -445,15 +448,120 @@ static void gpio_keys_close(struct input_dev *input) ddata->disable(input->dev.parent); } +/* + * Handlers for alternative sources of platform_data + */ +#ifdef CONFIG_OF +/* + * Translate OpenFirmware node properties into platform_data + */ +static int gpio_keys_get_devtree_pdata(struct device *dev, + struct gpio_keys_platform_data *pdata) +{ + struct device_node *node, *pp; + int i; + struct gpio_keys_button *buttons; + const u32 *reg; + int len; + + node = dev->of_node; + if (node == NULL) + return -ENODEV; + + memset(pdata, 0, sizeof *pdata); + + pdata->rep = !!of_get_property(node, "autorepeat", &len); + + /* First count the subnodes */ + pdata->nbuttons = 0; + pp = NULL; + while ((pp = of_get_next_child(node, pp))) + pdata->nbuttons++; + + if (pdata->nbuttons == 0) + return -ENODEV; + + buttons = kzalloc(pdata->nbuttons * (sizeof *buttons), GFP_KERNEL); + if (!buttons) + return -ENODEV; + + pp = NULL; + i = 0; + while ((pp = of_get_next_child(node, pp))) { + enum of_gpio_flags flags; + + if (!of_find_property(pp, "gpios", NULL)) { + pdata->nbuttons--; + dev_warn(dev, "Found button without gpios\n"); + continue; + } + buttons[i].gpio = of_get_gpio_flags(pp, 0, &flags); + buttons[i].active_low = flags & OF_GPIO_ACTIVE_LOW; + + reg = of_get_property(pp, "linux,code", &len); + if (!reg) { + dev_err(dev, "Button without keycode: 0x%x\n", buttons[i].gpio); + goto out_fail; + } + buttons[i].code = be32_to_cpup(reg); + + buttons[i].desc = of_get_property(pp, "label", &len); + + reg = of_get_property(pp, "linux,input-type", &len); + buttons[i].type = reg ? be32_to_cpup(reg) : EV_KEY; + + buttons[i].wakeup = !!of_get_property(pp, "gpio-key,wakeup", NULL); + + reg = of_get_property(pp, "debounce-interval", &len); + buttons[i].debounce_interval = reg ? be32_to_cpup(reg) : 5; + + i++; + } + + pdata->buttons = buttons; + + return 0; + +out_fail: + kfree(buttons); + return -ENODEV; +} + +static struct of_device_id gpio_keys_of_match[] = { + { .compatible = "gpio-keys", }, + { }, +}; +MODULE_DEVICE_TABLE(of, gpio_keys_of_match); + +#else + +static int gpio_keys_get_devtree_pdata(struct device *dev, + struct gpio_keys_platform_data *altp) +{ + return -ENODEV; +} + +#define gpio_keys_of_match NULL + +#endif + static int __devinit gpio_keys_probe(struct platform_device *pdev) { struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct gpio_keys_drvdata *ddata; struct device *dev = &pdev->dev; + struct gpio_keys_platform_data alt_pdata; struct input_dev *input; int i, error; int wakeup = 0; + if (!pdata) { + error = gpio_keys_get_devtree_pdata(dev, &alt_pdata); + if (error) + return error; + pdata = &alt_pdata; + } + ddata = kzalloc(sizeof(struct gpio_keys_drvdata) + pdata->nbuttons * sizeof(struct gpio_button_data), GFP_KERNEL); @@ -544,13 +652,15 @@ static int __devinit gpio_keys_probe(struct platform_device *pdev) fail1: input_free_device(input); kfree(ddata); + /* If we have no platform_data, we allocated buttons dynamically. */ + if (!pdev->dev.platform_data) + kfree(pdata->buttons); return error; } static int __devexit gpio_keys_remove(struct platform_device *pdev) { - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); struct input_dev *input = ddata->input; int i; @@ -559,31 +669,39 @@ static int __devexit gpio_keys_remove(struct platform_device *pdev) device_init_wakeup(&pdev->dev, 0); - for (i = 0; i < pdata->nbuttons; i++) { - int irq = gpio_to_irq(pdata->buttons[i].gpio); + for (i = 0; i < ddata->n_buttons; i++) { + int irq = gpio_to_irq(ddata->data[i].button->gpio); free_irq(irq, &ddata->data[i]); if (ddata->data[i].timer_debounce) del_timer_sync(&ddata->data[i].timer); cancel_work_sync(&ddata->data[i].work); - gpio_free(pdata->buttons[i].gpio); + gpio_free(ddata->data[i].button->gpio); } input_unregister_device(input); + /* + * If we had no platform_data, we allocated buttons dynamically, and + * must free them here. ddata->data[0].button is the pointer to the + * beginning of the allocated array. + */ + if (!pdev->dev.platform_data) + kfree(ddata->data[0].button); + + kfree(ddata); + return 0; } - -#ifdef CONFIG_PM +#ifdef CONFIG_PM_SLEEP static int gpio_keys_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev); int i; - if (device_may_wakeup(&pdev->dev)) { - for (i = 0; i < pdata->nbuttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; + if (device_may_wakeup(dev)) { + for (i = 0; i < ddata->n_buttons; i++) { + struct gpio_keys_button *button = ddata->data[i].button; if (button->wakeup) { int irq = gpio_to_irq(button->gpio); enable_irq_wake(irq); @@ -596,15 +714,13 @@ static int gpio_keys_suspend(struct device *dev) static int gpio_keys_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct gpio_keys_drvdata *ddata = platform_get_drvdata(pdev); - struct gpio_keys_platform_data *pdata = pdev->dev.platform_data; + struct gpio_keys_drvdata *ddata = dev_get_drvdata(dev); int i; - for (i = 0; i < pdata->nbuttons; i++) { + for (i = 0; i < ddata->n_buttons; i++) { - struct gpio_keys_button *button = &pdata->buttons[i]; - if (button->wakeup && device_may_wakeup(&pdev->dev)) { + struct gpio_keys_button *button = ddata->data[i].button; + if (button->wakeup && device_may_wakeup(dev)) { int irq = gpio_to_irq(button->gpio); disable_irq_wake(irq); } @@ -615,22 +731,18 @@ static int gpio_keys_resume(struct device *dev) return 0; } - -static const struct dev_pm_ops gpio_keys_pm_ops = { - .suspend = gpio_keys_suspend, - .resume = gpio_keys_resume, -}; #endif +static SIMPLE_DEV_PM_OPS(gpio_keys_pm_ops, gpio_keys_suspend, gpio_keys_resume); + static struct platform_driver gpio_keys_device_driver = { .probe = gpio_keys_probe, .remove = __devexit_p(gpio_keys_remove), .driver = { .name = "gpio-keys", .owner = THIS_MODULE, -#ifdef CONFIG_PM .pm = &gpio_keys_pm_ops, -#endif + .of_match_table = gpio_keys_of_match, } }; @@ -644,10 +756,10 @@ static void __exit gpio_keys_exit(void) platform_driver_unregister(&gpio_keys_device_driver); } -module_init(gpio_keys_init); +late_initcall(gpio_keys_init); module_exit(gpio_keys_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Phil Blundell <pb@handhelds.org>"); -MODULE_DESCRIPTION("Keyboard driver for CPU GPIOs"); +MODULE_DESCRIPTION("Keyboard driver for GPIOs"); MODULE_ALIAS("platform:gpio-keys"); diff --git a/drivers/input/keyboard/lm8323.c b/drivers/input/keyboard/lm8323.c index 71f744a8e686..ab0acaf7fe8f 100644 --- a/drivers/input/keyboard/lm8323.c +++ b/drivers/input/keyboard/lm8323.c @@ -146,7 +146,6 @@ struct lm8323_chip { /* device lock */ struct mutex lock; struct i2c_client *client; - struct work_struct work; struct input_dev *idev; bool kp_enabled; bool pm_suspend; @@ -162,7 +161,6 @@ struct lm8323_chip { #define client_to_lm8323(c) container_of(c, struct lm8323_chip, client) #define dev_to_lm8323(d) container_of(d, struct lm8323_chip, client->dev) -#define work_to_lm8323(w) container_of(w, struct lm8323_chip, work) #define cdev_to_pwm(c) container_of(c, struct lm8323_pwm, cdev) #define work_to_pwm(w) container_of(w, struct lm8323_pwm, work) @@ -375,9 +373,9 @@ static void pwm_done(struct lm8323_pwm *pwm) * Bottom half: handle the interrupt by posting key events, or dealing with * errors appropriately. */ -static void lm8323_work(struct work_struct *work) +static irqreturn_t lm8323_irq(int irq, void *_lm) { - struct lm8323_chip *lm = work_to_lm8323(work); + struct lm8323_chip *lm = _lm; u8 ints; int i; @@ -409,16 +407,6 @@ static void lm8323_work(struct work_struct *work) } mutex_unlock(&lm->lock); -} - -/* - * We cannot use I2C in interrupt context, so we just schedule work. - */ -static irqreturn_t lm8323_irq(int irq, void *data) -{ - struct lm8323_chip *lm = data; - - schedule_work(&lm->work); return IRQ_HANDLED; } @@ -675,7 +663,6 @@ static int __devinit lm8323_probe(struct i2c_client *client, lm->client = client; lm->idev = idev; mutex_init(&lm->lock); - INIT_WORK(&lm->work, lm8323_work); lm->size_x = pdata->size_x; lm->size_y = pdata->size_y; @@ -746,9 +733,8 @@ static int __devinit lm8323_probe(struct i2c_client *client, goto fail3; } - err = request_irq(client->irq, lm8323_irq, - IRQF_TRIGGER_FALLING | IRQF_DISABLED, - "lm8323", lm); + err = request_threaded_irq(client->irq, NULL, lm8323_irq, + IRQF_TRIGGER_LOW|IRQF_ONESHOT, "lm8323", lm); if (err) { dev_err(&client->dev, "could not get IRQ %d\n", client->irq); goto fail4; @@ -783,7 +769,6 @@ static int __devexit lm8323_remove(struct i2c_client *client) disable_irq_wake(client->irq); free_irq(client->irq, lm); - cancel_work_sync(&lm->work); input_unregister_device(lm->idev); diff --git a/drivers/input/keyboard/mpr121_touchkey.c b/drivers/input/keyboard/mpr121_touchkey.c index 0a9e81194888..1c1615d9a7f9 100644 --- a/drivers/input/keyboard/mpr121_touchkey.c +++ b/drivers/input/keyboard/mpr121_touchkey.c @@ -43,14 +43,15 @@ * enabled capacitance sensing inputs and its run/suspend mode. */ #define ELECTRODE_CONF_ADDR 0x5e +#define ELECTRODE_CONF_QUICK_CHARGE 0x80 #define AUTO_CONFIG_CTRL_ADDR 0x7b #define AUTO_CONFIG_USL_ADDR 0x7d #define AUTO_CONFIG_LSL_ADDR 0x7e #define AUTO_CONFIG_TL_ADDR 0x7f /* Threshold of touch/release trigger */ -#define TOUCH_THRESHOLD 0x0f -#define RELEASE_THRESHOLD 0x0a +#define TOUCH_THRESHOLD 0x08 +#define RELEASE_THRESHOLD 0x05 /* Masks for touch and release triggers */ #define TOUCH_STATUS_MASK 0xfff /* MPR121 has 12 keys */ @@ -127,7 +128,7 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata, struct i2c_client *client) { const struct mpr121_init_register *reg; - unsigned char usl, lsl, tl; + unsigned char usl, lsl, tl, eleconf; int i, t, vdd, ret; /* Set up touch/release threshold for ele0-ele11 */ @@ -163,8 +164,15 @@ static int __devinit mpr121_phys_init(const struct mpr121_platform_data *pdata, ret = i2c_smbus_write_byte_data(client, AUTO_CONFIG_USL_ADDR, usl); ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_LSL_ADDR, lsl); ret |= i2c_smbus_write_byte_data(client, AUTO_CONFIG_TL_ADDR, tl); + + /* + * Quick charge bit will let the capacitive charge to ready + * state quickly, or the buttons may not function after system + * boot. + */ + eleconf = mpr121->keycount | ELECTRODE_CONF_QUICK_CHARGE; ret |= i2c_smbus_write_byte_data(client, ELECTRODE_CONF_ADDR, - mpr121->keycount); + eleconf); if (ret != 0) goto err_i2c_write; diff --git a/drivers/input/keyboard/pmic8xxx-keypad.c b/drivers/input/keyboard/pmic8xxx-keypad.c index 6229c3e8e78b..e7cc51d0fb34 100644 --- a/drivers/input/keyboard/pmic8xxx-keypad.c +++ b/drivers/input/keyboard/pmic8xxx-keypad.c @@ -700,9 +700,9 @@ static int __devinit pmic8xxx_kp_probe(struct platform_device *pdev) return 0; err_pmic_reg_read: - free_irq(kp->key_stuck_irq, NULL); + free_irq(kp->key_stuck_irq, kp); err_req_stuck_irq: - free_irq(kp->key_sense_irq, NULL); + free_irq(kp->key_sense_irq, kp); err_gpio_config: err_get_irq: input_free_device(kp->input); @@ -717,8 +717,8 @@ static int __devexit pmic8xxx_kp_remove(struct platform_device *pdev) struct pmic8xxx_kp *kp = platform_get_drvdata(pdev); device_init_wakeup(&pdev->dev, 0); - free_irq(kp->key_stuck_irq, NULL); - free_irq(kp->key_sense_irq, NULL); + free_irq(kp->key_stuck_irq, kp); + free_irq(kp->key_sense_irq, kp); input_unregister_device(kp->input); kfree(kp); diff --git a/drivers/input/keyboard/qt1070.c b/drivers/input/keyboard/qt1070.c index ca7b89196ab7..b21bf5b876bb 100644 --- a/drivers/input/keyboard/qt1070.c +++ b/drivers/input/keyboard/qt1070.c @@ -239,8 +239,6 @@ static int __devexit qt1070_remove(struct i2c_client *client) input_unregister_device(data->input); kfree(data); - i2c_set_clientdata(client, NULL); - return 0; } diff --git a/drivers/input/keyboard/sh_keysc.c b/drivers/input/keyboard/sh_keysc.c index 6876700a4469..934aeb583b30 100644 --- a/drivers/input/keyboard/sh_keysc.c +++ b/drivers/input/keyboard/sh_keysc.c @@ -291,7 +291,7 @@ static int __devexit sh_keysc_remove(struct platform_device *pdev) return 0; } -#if CONFIG_PM_SLEEP +#ifdef CONFIG_PM_SLEEP static int sh_keysc_suspend(struct device *dev) { struct platform_device *pdev = to_platform_device(dev); diff --git a/drivers/input/keyboard/tegra-kbc.c b/drivers/input/keyboard/tegra-kbc.c index 2b3b73ec6689..da3828fc2c09 100644 --- a/drivers/input/keyboard/tegra-kbc.c +++ b/drivers/input/keyboard/tegra-kbc.c @@ -657,7 +657,7 @@ static int __devinit tegra_kbc_probe(struct platform_device *pdev) input_set_drvdata(input_dev, kbc); - input_dev->evbit[0] = BIT_MASK(EV_KEY); + input_dev->evbit[0] = BIT_MASK(EV_KEY) | BIT_MASK(EV_REP); input_set_capability(input_dev, EV_MSC, MSC_SCAN); input_dev->keycode = kbc->keycode; diff --git a/drivers/input/keyboard/tnetv107x-keypad.c b/drivers/input/keyboard/tnetv107x-keypad.c index c8f097a15d89..1c58681de81f 100644 --- a/drivers/input/keyboard/tnetv107x-keypad.c +++ b/drivers/input/keyboard/tnetv107x-keypad.c @@ -337,5 +337,5 @@ module_exit(keypad_exit); MODULE_AUTHOR("Cyril Chemparathy"); MODULE_DESCRIPTION("TNETV107X Keypad Driver"); -MODULE_ALIAS("platform: tnetv107x-keypad"); +MODULE_ALIAS("platform:tnetv107x-keypad"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig index d1bf8724b58f..c9104bb4db06 100644 --- a/drivers/input/misc/Kconfig +++ b/drivers/input/misc/Kconfig @@ -100,6 +100,27 @@ config INPUT_MAX8925_ONKEY To compile this driver as a module, choose M here: the module will be called max8925_onkey. +config INPUT_MMA8450 + tristate "MMA8450 - Freescale's 3-Axis, 8/12-bit Digital Accelerometer" + depends on I2C + select INPUT_POLLDEV + help + Say Y here if you want to support Freescale's MMA8450 Accelerometer + through I2C interface. + + To compile this driver as a module, choose M here: the + module will be called mma8450. + +config INPUT_MPU3050 + tristate "MPU3050 Triaxial gyroscope sensor" + depends on I2C + help + Say Y here if you want to support InvenSense MPU3050 + connected via an I2C bus. + + To compile this driver as a module, choose M here: the + module will be called mpu3050. + config INPUT_APANEL tristate "Fujitsu Lifebook Application Panel buttons" depends on X86 && I2C && LEDS_CLASS @@ -209,6 +230,23 @@ config INPUT_KEYSPAN_REMOTE To compile this driver as a module, choose M here: the module will be called keyspan_remote. +config INPUT_KXTJ9 + tristate "Kionix KXTJ9 tri-axis digital accelerometer" + depends on I2C + help + Say Y here to enable support for the Kionix KXTJ9 digital tri-axis + accelerometer. + + To compile this driver as a module, choose M here: the module will + be called kxtj9. + +config INPUT_KXTJ9_POLLED_MODE + bool "Enable polling mode support" + depends on INPUT_KXTJ9 + select INPUT_POLLDEV + help + Say Y here if you need accelerometer to work in polling mode. + config INPUT_POWERMATE tristate "Griffin PowerMate and Contour Jog support" depends on USB_ARCH_HAS_HCD diff --git a/drivers/input/misc/Makefile b/drivers/input/misc/Makefile index 4da7c3a60e04..299ad5edba84 100644 --- a/drivers/input/misc/Makefile +++ b/drivers/input/misc/Makefile @@ -25,8 +25,11 @@ obj-$(CONFIG_INPUT_DM355EVM) += dm355evm_keys.o obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o +obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o obj-$(CONFIG_INPUT_MAX8925_ONKEY) += max8925_onkey.o +obj-$(CONFIG_INPUT_MMA8450) += mma8450.o +obj-$(CONFIG_INPUT_MPU3050) += mpu3050.o obj-$(CONFIG_INPUT_PCAP) += pcap_keys.o obj-$(CONFIG_INPUT_PCF50633_PMU) += pcf50633-input.o obj-$(CONFIG_INPUT_PCF8574) += pcf8574_keypad.o @@ -46,4 +49,3 @@ obj-$(CONFIG_INPUT_WISTRON_BTNS) += wistron_btns.o obj-$(CONFIG_INPUT_WM831X_ON) += wm831x-on.o obj-$(CONFIG_INPUT_XEN_KBDDEV_FRONTEND) += xen-kbdfront.o obj-$(CONFIG_INPUT_YEALINK) += yealink.o - diff --git a/drivers/input/misc/bfin_rotary.c b/drivers/input/misc/bfin_rotary.c index 4f72bdd69410..d00edc9f39d1 100644 --- a/drivers/input/misc/bfin_rotary.c +++ b/drivers/input/misc/bfin_rotary.c @@ -6,7 +6,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/init.h> #include <linux/interrupt.h> #include <linux/irq.h> diff --git a/drivers/input/misc/kxtj9.c b/drivers/input/misc/kxtj9.c new file mode 100644 index 000000000000..c456f63b6bae --- /dev/null +++ b/drivers/input/misc/kxtj9.c @@ -0,0 +1,671 @@ +/* + * Copyright (C) 2011 Kionix, Inc. + * Written by Chris Hudson <chudson@kionix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/interrupt.h> +#include <linux/slab.h> +#include <linux/input/kxtj9.h> +#include <linux/input-polldev.h> + +#define NAME "kxtj9" +#define G_MAX 8000 +/* OUTPUT REGISTERS */ +#define XOUT_L 0x06 +#define WHO_AM_I 0x0F +/* CONTROL REGISTERS */ +#define INT_REL 0x1A +#define CTRL_REG1 0x1B +#define INT_CTRL1 0x1E +#define DATA_CTRL 0x21 +/* CONTROL REGISTER 1 BITS */ +#define PC1_OFF 0x7F +#define PC1_ON (1 << 7) +/* Data ready funtion enable bit: set during probe if using irq mode */ +#define DRDYE (1 << 5) +/* INTERRUPT CONTROL REGISTER 1 BITS */ +/* Set these during probe if using irq mode */ +#define KXTJ9_IEL (1 << 3) +#define KXTJ9_IEA (1 << 4) +#define KXTJ9_IEN (1 << 5) +/* INPUT_ABS CONSTANTS */ +#define FUZZ 3 +#define FLAT 3 +/* RESUME STATE INDICES */ +#define RES_DATA_CTRL 0 +#define RES_CTRL_REG1 1 +#define RES_INT_CTRL1 2 +#define RESUME_ENTRIES 3 + +/* + * The following table lists the maximum appropriate poll interval for each + * available output data rate. + */ +static const struct { + unsigned int cutoff; + u8 mask; +} kxtj9_odr_table[] = { + { 3, ODR800F }, + { 5, ODR400F }, + { 10, ODR200F }, + { 20, ODR100F }, + { 40, ODR50F }, + { 80, ODR25F }, + { 0, ODR12_5F}, +}; + +struct kxtj9_data { + struct i2c_client *client; + struct kxtj9_platform_data pdata; + struct input_dev *input_dev; +#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE + struct input_polled_dev *poll_dev; +#endif + unsigned int last_poll_interval; + u8 shift; + u8 ctrl_reg1; + u8 data_ctrl; + u8 int_ctrl; +}; + +static int kxtj9_i2c_read(struct kxtj9_data *tj9, u8 addr, u8 *data, int len) +{ + struct i2c_msg msgs[] = { + { + .addr = tj9->client->addr, + .flags = tj9->client->flags, + .len = 1, + .buf = &addr, + }, + { + .addr = tj9->client->addr, + .flags = tj9->client->flags | I2C_M_RD, + .len = len, + .buf = data, + }, + }; + + return i2c_transfer(tj9->client->adapter, msgs, 2); +} + +static void kxtj9_report_acceleration_data(struct kxtj9_data *tj9) +{ + s16 acc_data[3]; /* Data bytes from hardware xL, xH, yL, yH, zL, zH */ + s16 x, y, z; + int err; + + err = kxtj9_i2c_read(tj9, XOUT_L, (u8 *)acc_data, 6); + if (err < 0) + dev_err(&tj9->client->dev, "accelerometer data read failed\n"); + + x = le16_to_cpu(acc_data[tj9->pdata.axis_map_x]) >> tj9->shift; + y = le16_to_cpu(acc_data[tj9->pdata.axis_map_y]) >> tj9->shift; + z = le16_to_cpu(acc_data[tj9->pdata.axis_map_z]) >> tj9->shift; + + input_report_abs(tj9->input_dev, ABS_X, tj9->pdata.negate_x ? -x : x); + input_report_abs(tj9->input_dev, ABS_Y, tj9->pdata.negate_y ? -y : y); + input_report_abs(tj9->input_dev, ABS_Z, tj9->pdata.negate_z ? -z : z); + input_sync(tj9->input_dev); +} + +static irqreturn_t kxtj9_isr(int irq, void *dev) +{ + struct kxtj9_data *tj9 = dev; + int err; + + /* data ready is the only possible interrupt type */ + kxtj9_report_acceleration_data(tj9); + + err = i2c_smbus_read_byte_data(tj9->client, INT_REL); + if (err < 0) + dev_err(&tj9->client->dev, + "error clearing interrupt status: %d\n", err); + + return IRQ_HANDLED; +} + +static int kxtj9_update_g_range(struct kxtj9_data *tj9, u8 new_g_range) +{ + switch (new_g_range) { + case KXTJ9_G_2G: + tj9->shift = 4; + break; + case KXTJ9_G_4G: + tj9->shift = 3; + break; + case KXTJ9_G_8G: + tj9->shift = 2; + break; + default: + return -EINVAL; + } + + tj9->ctrl_reg1 &= 0xe7; + tj9->ctrl_reg1 |= new_g_range; + + return 0; +} + +static int kxtj9_update_odr(struct kxtj9_data *tj9, unsigned int poll_interval) +{ + int err; + int i; + + /* Use the lowest ODR that can support the requested poll interval */ + for (i = 0; i < ARRAY_SIZE(kxtj9_odr_table); i++) { + tj9->data_ctrl = kxtj9_odr_table[i].mask; + if (poll_interval < kxtj9_odr_table[i].cutoff) + break; + } + + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0); + if (err < 0) + return err; + + err = i2c_smbus_write_byte_data(tj9->client, DATA_CTRL, tj9->data_ctrl); + if (err < 0) + return err; + + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + return err; + + return 0; +} + +static int kxtj9_device_power_on(struct kxtj9_data *tj9) +{ + if (tj9->pdata.power_on) + return tj9->pdata.power_on(); + + return 0; +} + +static void kxtj9_device_power_off(struct kxtj9_data *tj9) +{ + int err; + + tj9->ctrl_reg1 &= PC1_OFF; + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + dev_err(&tj9->client->dev, "soft power off failed\n"); + + if (tj9->pdata.power_off) + tj9->pdata.power_off(); +} + +static int kxtj9_enable(struct kxtj9_data *tj9) +{ + int err; + + err = kxtj9_device_power_on(tj9); + if (err < 0) + return err; + + /* ensure that PC1 is cleared before updating control registers */ + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, 0); + if (err < 0) + return err; + + /* only write INT_CTRL_REG1 if in irq mode */ + if (tj9->client->irq) { + err = i2c_smbus_write_byte_data(tj9->client, + INT_CTRL1, tj9->int_ctrl); + if (err < 0) + return err; + } + + err = kxtj9_update_g_range(tj9, tj9->pdata.g_range); + if (err < 0) + return err; + + /* turn on outputs */ + tj9->ctrl_reg1 |= PC1_ON; + err = i2c_smbus_write_byte_data(tj9->client, CTRL_REG1, tj9->ctrl_reg1); + if (err < 0) + return err; + + err = kxtj9_update_odr(tj9, tj9->last_poll_interval); + if (err < 0) + return err; + + /* clear initial interrupt if in irq mode */ + if (tj9->client->irq) { + err = i2c_smbus_read_byte_data(tj9->client, INT_REL); + if (err < 0) { + dev_err(&tj9->client->dev, + "error clearing interrupt: %d\n", err); + goto fail; + } + } + + return 0; + +fail: + kxtj9_device_power_off(tj9); + return err; +} + +static void kxtj9_disable(struct kxtj9_data *tj9) +{ + kxtj9_device_power_off(tj9); +} + +static int kxtj9_input_open(struct input_dev *input) +{ + struct kxtj9_data *tj9 = input_get_drvdata(input); + + return kxtj9_enable(tj9); +} + +static void kxtj9_input_close(struct input_dev *dev) +{ + struct kxtj9_data *tj9 = input_get_drvdata(dev); + + kxtj9_disable(tj9); +} + +static void __devinit kxtj9_init_input_device(struct kxtj9_data *tj9, + struct input_dev *input_dev) +{ + __set_bit(EV_ABS, input_dev->evbit); + input_set_abs_params(input_dev, ABS_X, -G_MAX, G_MAX, FUZZ, FLAT); + input_set_abs_params(input_dev, ABS_Y, -G_MAX, G_MAX, FUZZ, FLAT); + input_set_abs_params(input_dev, ABS_Z, -G_MAX, G_MAX, FUZZ, FLAT); + + input_dev->name = "kxtj9_accel"; + input_dev->id.bustype = BUS_I2C; + input_dev->dev.parent = &tj9->client->dev; +} + +static int __devinit kxtj9_setup_input_device(struct kxtj9_data *tj9) +{ + struct input_dev *input_dev; + int err; + + input_dev = input_allocate_device(); + if (!input_dev) { + dev_err(&tj9->client->dev, "input device allocate failed\n"); + return -ENOMEM; + } + + tj9->input_dev = input_dev; + + input_dev->open = kxtj9_input_open; + input_dev->close = kxtj9_input_close; + input_set_drvdata(input_dev, tj9); + + kxtj9_init_input_device(tj9, input_dev); + + err = input_register_device(tj9->input_dev); + if (err) { + dev_err(&tj9->client->dev, + "unable to register input polled device %s: %d\n", + tj9->input_dev->name, err); + input_free_device(tj9->input_dev); + return err; + } + + return 0; +} + +/* + * When IRQ mode is selected, we need to provide an interface to allow the user + * to change the output data rate of the part. For consistency, we are using + * the set_poll method, which accepts a poll interval in milliseconds, and then + * calls update_odr() while passing this value as an argument. In IRQ mode, the + * data outputs will not be read AT the requested poll interval, rather, the + * lowest ODR that can support the requested interval. The client application + * will be responsible for retrieving data from the input node at the desired + * interval. + */ + +/* Returns currently selected poll interval (in ms) */ +static ssize_t kxtj9_get_poll(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + + return sprintf(buf, "%d\n", tj9->last_poll_interval); +} + +/* Allow users to select a new poll interval (in ms) */ +static ssize_t kxtj9_set_poll(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + unsigned int interval; + int error; + + error = kstrtouint(buf, 10, &interval); + if (error < 0) + return error; + + /* Lock the device to prevent races with open/close (and itself) */ + mutex_lock(&input_dev->mutex); + + disable_irq(client->irq); + + /* + * Set current interval to the greater of the minimum interval or + * the requested interval + */ + tj9->last_poll_interval = max(interval, tj9->pdata.min_interval); + + kxtj9_update_odr(tj9, tj9->last_poll_interval); + + enable_irq(client->irq); + mutex_unlock(&input_dev->mutex); + + return count; +} + +static DEVICE_ATTR(poll, S_IRUGO|S_IWUSR, kxtj9_get_poll, kxtj9_set_poll); + +static struct attribute *kxtj9_attributes[] = { + &dev_attr_poll.attr, + NULL +}; + +static struct attribute_group kxtj9_attribute_group = { + .attrs = kxtj9_attributes +}; + + +#ifdef CONFIG_INPUT_KXTJ9_POLLED_MODE +static void kxtj9_poll(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + unsigned int poll_interval = dev->poll_interval; + + kxtj9_report_acceleration_data(tj9); + + if (poll_interval != tj9->last_poll_interval) { + kxtj9_update_odr(tj9, poll_interval); + tj9->last_poll_interval = poll_interval; + } +} + +static void kxtj9_polled_input_open(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + + kxtj9_enable(tj9); +} + +static void kxtj9_polled_input_close(struct input_polled_dev *dev) +{ + struct kxtj9_data *tj9 = dev->private; + + kxtj9_disable(tj9); +} + +static int __devinit kxtj9_setup_polled_device(struct kxtj9_data *tj9) +{ + int err; + struct input_polled_dev *poll_dev; + poll_dev = input_allocate_polled_device(); + + if (!poll_dev) { + dev_err(&tj9->client->dev, + "Failed to allocate polled device\n"); + return -ENOMEM; + } + + tj9->poll_dev = poll_dev; + tj9->input_dev = poll_dev->input; + + poll_dev->private = tj9; + poll_dev->poll = kxtj9_poll; + poll_dev->open = kxtj9_polled_input_open; + poll_dev->close = kxtj9_polled_input_close; + + kxtj9_init_input_device(tj9, poll_dev->input); + + err = input_register_polled_device(poll_dev); + if (err) { + dev_err(&tj9->client->dev, + "Unable to register polled device, err=%d\n", err); + input_free_polled_device(poll_dev); + return err; + } + + return 0; +} + +static void __devexit kxtj9_teardown_polled_device(struct kxtj9_data *tj9) +{ + input_unregister_polled_device(tj9->poll_dev); + input_free_polled_device(tj9->poll_dev); +} + +#else + +static inline int kxtj9_setup_polled_device(struct kxtj9_data *tj9) +{ + return -ENOSYS; +} + +static inline void kxtj9_teardown_polled_device(struct kxtj9_data *tj9) +{ +} + +#endif + +static int __devinit kxtj9_verify(struct kxtj9_data *tj9) +{ + int retval; + + retval = kxtj9_device_power_on(tj9); + if (retval < 0) + return retval; + + retval = i2c_smbus_read_byte_data(tj9->client, WHO_AM_I); + if (retval < 0) { + dev_err(&tj9->client->dev, "read err int source\n"); + goto out; + } + + retval = retval != 0x06 ? -EIO : 0; + +out: + kxtj9_device_power_off(tj9); + return retval; +} + +static int __devinit kxtj9_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + const struct kxtj9_platform_data *pdata = client->dev.platform_data; + struct kxtj9_data *tj9; + int err; + + if (!i2c_check_functionality(client->adapter, + I2C_FUNC_I2C | I2C_FUNC_SMBUS_BYTE_DATA)) { + dev_err(&client->dev, "client is not i2c capable\n"); + return -ENXIO; + } + + if (!pdata) { + dev_err(&client->dev, "platform data is NULL; exiting\n"); + return -EINVAL; + } + + tj9 = kzalloc(sizeof(*tj9), GFP_KERNEL); + if (!tj9) { + dev_err(&client->dev, + "failed to allocate memory for module data\n"); + return -ENOMEM; + } + + tj9->client = client; + tj9->pdata = *pdata; + + if (pdata->init) { + err = pdata->init(); + if (err < 0) + goto err_free_mem; + } + + err = kxtj9_verify(tj9); + if (err < 0) { + dev_err(&client->dev, "device not recognized\n"); + goto err_pdata_exit; + } + + i2c_set_clientdata(client, tj9); + + tj9->ctrl_reg1 = tj9->pdata.res_12bit | tj9->pdata.g_range; + tj9->data_ctrl = tj9->pdata.data_odr_init; + + if (client->irq) { + /* If in irq mode, populate INT_CTRL_REG1 and enable DRDY. */ + tj9->int_ctrl |= KXTJ9_IEN | KXTJ9_IEA | KXTJ9_IEL; + tj9->ctrl_reg1 |= DRDYE; + + err = kxtj9_setup_input_device(tj9); + if (err) + goto err_pdata_exit; + + err = request_threaded_irq(client->irq, NULL, kxtj9_isr, + IRQF_TRIGGER_RISING | IRQF_ONESHOT, + "kxtj9-irq", tj9); + if (err) { + dev_err(&client->dev, "request irq failed: %d\n", err); + goto err_destroy_input; + } + + err = sysfs_create_group(&client->dev.kobj, &kxtj9_attribute_group); + if (err) { + dev_err(&client->dev, "sysfs create failed: %d\n", err); + goto err_free_irq; + } + + } else { + err = kxtj9_setup_polled_device(tj9); + if (err) + goto err_pdata_exit; + } + + return 0; + +err_free_irq: + free_irq(client->irq, tj9); +err_destroy_input: + input_unregister_device(tj9->input_dev); +err_pdata_exit: + if (tj9->pdata.exit) + tj9->pdata.exit(); +err_free_mem: + kfree(tj9); + return err; +} + +static int __devexit kxtj9_remove(struct i2c_client *client) +{ + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + + if (client->irq) { + sysfs_remove_group(&client->dev.kobj, &kxtj9_attribute_group); + free_irq(client->irq, tj9); + input_unregister_device(tj9->input_dev); + } else { + kxtj9_teardown_polled_device(tj9); + } + + if (tj9->pdata.exit) + tj9->pdata.exit(); + + kfree(tj9); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int kxtj9_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) + kxtj9_disable(tj9); + + mutex_unlock(&input_dev->mutex); + return 0; +} + +static int kxtj9_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + struct kxtj9_data *tj9 = i2c_get_clientdata(client); + struct input_dev *input_dev = tj9->input_dev; + int retval = 0; + + mutex_lock(&input_dev->mutex); + + if (input_dev->users) + kxtj9_enable(tj9); + + mutex_unlock(&input_dev->mutex); + return retval; +} +#endif + +static SIMPLE_DEV_PM_OPS(kxtj9_pm_ops, kxtj9_suspend, kxtj9_resume); + +static const struct i2c_device_id kxtj9_id[] = { + { NAME, 0 }, + { }, +}; + +MODULE_DEVICE_TABLE(i2c, kxtj9_id); + +static struct i2c_driver kxtj9_driver = { + .driver = { + .name = NAME, + .owner = THIS_MODULE, + .pm = &kxtj9_pm_ops, + }, + .probe = kxtj9_probe, + .remove = __devexit_p(kxtj9_remove), + .id_table = kxtj9_id, +}; + +static int __init kxtj9_init(void) +{ + return i2c_add_driver(&kxtj9_driver); +} +module_init(kxtj9_init); + +static void __exit kxtj9_exit(void) +{ + i2c_del_driver(&kxtj9_driver); +} +module_exit(kxtj9_exit); + +MODULE_DESCRIPTION("KXTJ9 accelerometer driver"); +MODULE_AUTHOR("Chris Hudson <chudson@kionix.com>"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/mma8450.c b/drivers/input/misc/mma8450.c new file mode 100644 index 000000000000..20f8f9284f02 --- /dev/null +++ b/drivers/input/misc/mma8450.c @@ -0,0 +1,256 @@ +/* + * Driver for Freescale's 3-Axis Accelerometer MMA8450 + * + * Copyright (C) 2011 Freescale Semiconductor, Inc. All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> +#include <linux/i2c.h> +#include <linux/input-polldev.h> + +#define MMA8450_DRV_NAME "mma8450" + +#define MODE_CHANGE_DELAY_MS 100 +#define POLL_INTERVAL 100 +#define POLL_INTERVAL_MAX 500 + +/* register definitions */ +#define MMA8450_STATUS 0x00 +#define MMA8450_STATUS_ZXYDR 0x08 + +#define MMA8450_OUT_X8 0x01 +#define MMA8450_OUT_Y8 0x02 +#define MMA8450_OUT_Z8 0x03 + +#define MMA8450_OUT_X_LSB 0x05 +#define MMA8450_OUT_X_MSB 0x06 +#define MMA8450_OUT_Y_LSB 0x07 +#define MMA8450_OUT_Y_MSB 0x08 +#define MMA8450_OUT_Z_LSB 0x09 +#define MMA8450_OUT_Z_MSB 0x0a + +#define MMA8450_XYZ_DATA_CFG 0x16 + +#define MMA8450_CTRL_REG1 0x38 +#define MMA8450_CTRL_REG2 0x39 + +/* mma8450 status */ +struct mma8450 { + struct i2c_client *client; + struct input_polled_dev *idev; +}; + +static int mma8450_read(struct mma8450 *m, unsigned off) +{ + struct i2c_client *c = m->client; + int ret; + + ret = i2c_smbus_read_byte_data(c, off); + if (ret < 0) + dev_err(&c->dev, + "failed to read register 0x%02x, error %d\n", + off, ret); + + return ret; +} + +static int mma8450_write(struct mma8450 *m, unsigned off, u8 v) +{ + struct i2c_client *c = m->client; + int error; + + error = i2c_smbus_write_byte_data(c, off, v); + if (error < 0) { + dev_err(&c->dev, + "failed to write to register 0x%02x, error %d\n", + off, error); + return error; + } + + return 0; +} + +static int mma8450_read_xyz(struct mma8450 *m, int *x, int *y, int *z) +{ + struct i2c_client *c = m->client; + u8 buff[6]; + int err; + + err = i2c_smbus_read_i2c_block_data(c, MMA8450_OUT_X_LSB, 6, buff); + if (err < 0) { + dev_err(&c->dev, + "failed to read block data at 0x%02x, error %d\n", + MMA8450_OUT_X_LSB, err); + return err; + } + + *x = ((buff[1] << 4) & 0xff0) | (buff[0] & 0xf); + *y = ((buff[3] << 4) & 0xff0) | (buff[2] & 0xf); + *z = ((buff[5] << 4) & 0xff0) | (buff[4] & 0xf); + + return 0; +} + +static void mma8450_poll(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + int x, y, z; + int ret; + int err; + + ret = mma8450_read(m, MMA8450_STATUS); + if (ret < 0) + return; + + if (!(ret & MMA8450_STATUS_ZXYDR)) + return; + + err = mma8450_read_xyz(m, &x, &y, &z); + if (err) + return; + + input_report_abs(dev->input, ABS_X, x); + input_report_abs(dev->input, ABS_Y, y); + input_report_abs(dev->input, ABS_Z, z); + input_sync(dev->input); +} + +/* Initialize the MMA8450 chip */ +static void mma8450_open(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + int err; + + /* enable all events from X/Y/Z, no FIFO */ + err = mma8450_write(m, MMA8450_XYZ_DATA_CFG, 0x07); + if (err) + return; + + /* + * Sleep mode poll rate - 50Hz + * System output data rate - 400Hz + * Full scale selection - Active, +/- 2G + */ + err = mma8450_write(m, MMA8450_CTRL_REG1, 0x01); + if (err < 0) + return; + + msleep(MODE_CHANGE_DELAY_MS); +} + +static void mma8450_close(struct input_polled_dev *dev) +{ + struct mma8450 *m = dev->private; + + mma8450_write(m, MMA8450_CTRL_REG1, 0x00); + mma8450_write(m, MMA8450_CTRL_REG2, 0x01); +} + +/* + * I2C init/probing/exit functions + */ +static int __devinit mma8450_probe(struct i2c_client *c, + const struct i2c_device_id *id) +{ + struct input_polled_dev *idev; + struct mma8450 *m; + int err; + + m = kzalloc(sizeof(struct mma8450), GFP_KERNEL); + idev = input_allocate_polled_device(); + if (!m || !idev) { + err = -ENOMEM; + goto err_free_mem; + } + + m->client = c; + m->idev = idev; + + idev->private = m; + idev->input->name = MMA8450_DRV_NAME; + idev->input->id.bustype = BUS_I2C; + idev->poll = mma8450_poll; + idev->poll_interval = POLL_INTERVAL; + idev->poll_interval_max = POLL_INTERVAL_MAX; + idev->open = mma8450_open; + idev->close = mma8450_close; + + __set_bit(EV_ABS, idev->input->evbit); + input_set_abs_params(idev->input, ABS_X, -2048, 2047, 32, 32); + input_set_abs_params(idev->input, ABS_Y, -2048, 2047, 32, 32); + input_set_abs_params(idev->input, ABS_Z, -2048, 2047, 32, 32); + + err = input_register_polled_device(idev); + if (err) { + dev_err(&c->dev, "failed to register polled input device\n"); + goto err_free_mem; + } + + return 0; + +err_free_mem: + input_free_polled_device(idev); + kfree(m); + return err; +} + +static int __devexit mma8450_remove(struct i2c_client *c) +{ + struct mma8450 *m = i2c_get_clientdata(c); + struct input_polled_dev *idev = m->idev; + + input_unregister_polled_device(idev); + input_free_polled_device(idev); + kfree(m); + + return 0; +} + +static const struct i2c_device_id mma8450_id[] = { + { MMA8450_DRV_NAME, 0 }, + { }, +}; +MODULE_DEVICE_TABLE(i2c, mma8450_id); + +static struct i2c_driver mma8450_driver = { + .driver = { + .name = MMA8450_DRV_NAME, + .owner = THIS_MODULE, + }, + .probe = mma8450_probe, + .remove = __devexit_p(mma8450_remove), + .id_table = mma8450_id, +}; + +static int __init mma8450_init(void) +{ + return i2c_add_driver(&mma8450_driver); +} +module_init(mma8450_init); + +static void __exit mma8450_exit(void) +{ + i2c_del_driver(&mma8450_driver); +} +module_exit(mma8450_exit); + +MODULE_AUTHOR("Freescale Semiconductor, Inc."); +MODULE_DESCRIPTION("MMA8450 3-Axis Accelerometer Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/mpu3050.c b/drivers/input/misc/mpu3050.c new file mode 100644 index 000000000000..b95fac15b2ea --- /dev/null +++ b/drivers/input/misc/mpu3050.c @@ -0,0 +1,376 @@ +/* + * MPU3050 Tri-axis gyroscope driver + * + * Copyright (C) 2011 Wistron Co.Ltd + * Joseph Lai <joseph_lai@wistron.com> + * + * Trimmed down by Alan Cox <alan@linux.intel.com> to produce this version + * + * This is a 'lite' version of the driver, while we consider the right way + * to present the other features to user space. In particular it requires the + * device has an IRQ, and it only provides an input interface, so is not much + * use for device orientation. A fuller version is available from the Meego + * tree. + * + * This program is based on bma023.c. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/mutex.h> +#include <linux/err.h> +#include <linux/i2c.h> +#include <linux/input.h> +#include <linux/delay.h> +#include <linux/slab.h> +#include <linux/pm_runtime.h> + +#define MPU3050_CHIP_ID_REG 0x00 +#define MPU3050_CHIP_ID 0x69 +#define MPU3050_XOUT_H 0x1D +#define MPU3050_PWR_MGM 0x3E +#define MPU3050_PWR_MGM_POS 6 +#define MPU3050_PWR_MGM_MASK 0x40 + +#define MPU3050_AUTO_DELAY 1000 + +#define MPU3050_MIN_VALUE -32768 +#define MPU3050_MAX_VALUE 32767 + +struct axis_data { + s16 x; + s16 y; + s16 z; +}; + +struct mpu3050_sensor { + struct i2c_client *client; + struct device *dev; + struct input_dev *idev; +}; + +/** + * mpu3050_xyz_read_reg - read the axes values + * @buffer: provide register addr and get register + * @length: length of register + * + * Reads the register values in one transaction or returns a negative + * error code on failure. + */ +static int mpu3050_xyz_read_reg(struct i2c_client *client, + u8 *buffer, int length) +{ + /* + * Annoying we can't make this const because the i2c layer doesn't + * declare input buffers const. + */ + char cmd = MPU3050_XOUT_H; + struct i2c_msg msg[] = { + { + .addr = client->addr, + .flags = 0, + .len = 1, + .buf = &cmd, + }, + { + .addr = client->addr, + .flags = I2C_M_RD, + .len = length, + .buf = buffer, + }, + }; + + return i2c_transfer(client->adapter, msg, 2); +} + +/** + * mpu3050_read_xyz - get co-ordinates from device + * @client: i2c address of sensor + * @coords: co-ordinates to update + * + * Return the converted X Y and Z co-ordinates from the sensor device + */ +static void mpu3050_read_xyz(struct i2c_client *client, + struct axis_data *coords) +{ + u16 buffer[3]; + + mpu3050_xyz_read_reg(client, (u8 *)buffer, 6); + coords->x = be16_to_cpu(buffer[0]); + coords->y = be16_to_cpu(buffer[1]); + coords->z = be16_to_cpu(buffer[2]); + dev_dbg(&client->dev, "%s: x %d, y %d, z %d\n", __func__, + coords->x, coords->y, coords->z); +} + +/** + * mpu3050_set_power_mode - set the power mode + * @client: i2c client for the sensor + * @val: value to switch on/off of power, 1: normal power, 0: low power + * + * Put device to normal-power mode or low-power mode. + */ +static void mpu3050_set_power_mode(struct i2c_client *client, u8 val) +{ + u8 value; + + value = i2c_smbus_read_byte_data(client, MPU3050_PWR_MGM); + value = (value & ~MPU3050_PWR_MGM_MASK) | + (((val << MPU3050_PWR_MGM_POS) & MPU3050_PWR_MGM_MASK) ^ + MPU3050_PWR_MGM_MASK); + i2c_smbus_write_byte_data(client, MPU3050_PWR_MGM, value); +} + +/** + * mpu3050_input_open - called on input event open + * @input: input dev of opened device + * + * The input layer calls this function when input event is opened. The + * function will push the device to resume. Then, the device is ready + * to provide data. + */ +static int mpu3050_input_open(struct input_dev *input) +{ + struct mpu3050_sensor *sensor = input_get_drvdata(input); + + pm_runtime_get(sensor->dev); + + return 0; +} + +/** + * mpu3050_input_close - called on input event close + * @input: input dev of closed device + * + * The input layer calls this function when input event is closed. The + * function will push the device to suspend. + */ +static void mpu3050_input_close(struct input_dev *input) +{ + struct mpu3050_sensor *sensor = input_get_drvdata(input); + + pm_runtime_put(sensor->dev); +} + +/** + * mpu3050_interrupt_thread - handle an IRQ + * @irq: interrupt numner + * @data: the sensor + * + * Called by the kernel single threaded after an interrupt occurs. Read + * the sensor data and generate an input event for it. + */ +static irqreturn_t mpu3050_interrupt_thread(int irq, void *data) +{ + struct mpu3050_sensor *sensor = data; + struct axis_data axis; + + mpu3050_read_xyz(sensor->client, &axis); + + input_report_abs(sensor->idev, ABS_X, axis.x); + input_report_abs(sensor->idev, ABS_Y, axis.y); + input_report_abs(sensor->idev, ABS_Z, axis.z); + input_sync(sensor->idev); + + return IRQ_HANDLED; +} + +/** + * mpu3050_probe - device detection callback + * @client: i2c client of found device + * @id: id match information + * + * The I2C layer calls us when it believes a sensor is present at this + * address. Probe to see if this is correct and to validate the device. + * + * If present install the relevant sysfs interfaces and input device. + */ +static int __devinit mpu3050_probe(struct i2c_client *client, + const struct i2c_device_id *id) +{ + struct mpu3050_sensor *sensor; + struct input_dev *idev; + int ret; + int error; + + sensor = kzalloc(sizeof(struct mpu3050_sensor), GFP_KERNEL); + idev = input_allocate_device(); + if (!sensor || !idev) { + dev_err(&client->dev, "failed to allocate driver data\n"); + error = -ENOMEM; + goto err_free_mem; + } + + sensor->client = client; + sensor->dev = &client->dev; + sensor->idev = idev; + + mpu3050_set_power_mode(client, 1); + msleep(10); + + ret = i2c_smbus_read_byte_data(client, MPU3050_CHIP_ID_REG); + if (ret < 0) { + dev_err(&client->dev, "failed to detect device\n"); + error = -ENXIO; + goto err_free_mem; + } + + if (ret != MPU3050_CHIP_ID) { + dev_err(&client->dev, "unsupported chip id\n"); + error = -ENXIO; + goto err_free_mem; + } + + idev->name = "MPU3050"; + idev->id.bustype = BUS_I2C; + idev->dev.parent = &client->dev; + + idev->open = mpu3050_input_open; + idev->close = mpu3050_input_close; + + __set_bit(EV_ABS, idev->evbit); + input_set_abs_params(idev, ABS_X, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + input_set_abs_params(idev, ABS_Y, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + input_set_abs_params(idev, ABS_Z, + MPU3050_MIN_VALUE, MPU3050_MAX_VALUE, 0, 0); + + input_set_drvdata(idev, sensor); + + pm_runtime_set_active(&client->dev); + + error = request_threaded_irq(client->irq, + NULL, mpu3050_interrupt_thread, + IRQF_TRIGGER_RISING, + "mpu_int", sensor); + if (error) { + dev_err(&client->dev, + "can't get IRQ %d, error %d\n", client->irq, error); + goto err_pm_set_suspended; + } + + error = input_register_device(idev); + if (error) { + dev_err(&client->dev, "failed to register input device\n"); + goto err_free_irq; + } + + pm_runtime_enable(&client->dev); + pm_runtime_set_autosuspend_delay(&client->dev, MPU3050_AUTO_DELAY); + + return 0; + +err_free_irq: + free_irq(client->irq, sensor); +err_pm_set_suspended: + pm_runtime_set_suspended(&client->dev); +err_free_mem: + input_unregister_device(idev); + kfree(sensor); + return error; +} + +/** + * mpu3050_remove - remove a sensor + * @client: i2c client of sensor being removed + * + * Our sensor is going away, clean up the resources. + */ +static int __devexit mpu3050_remove(struct i2c_client *client) +{ + struct mpu3050_sensor *sensor = i2c_get_clientdata(client); + + pm_runtime_disable(&client->dev); + pm_runtime_set_suspended(&client->dev); + + free_irq(client->irq, sensor); + input_unregister_device(sensor->idev); + kfree(sensor); + + return 0; +} + +#ifdef CONFIG_PM +/** + * mpu3050_suspend - called on device suspend + * @dev: device being suspended + * + * Put the device into sleep mode before we suspend the machine. + */ +static int mpu3050_suspend(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + mpu3050_set_power_mode(client, 0); + + return 0; +} + +/** + * mpu3050_resume - called on device resume + * @dev: device being resumed + * + * Put the device into powered mode on resume. + */ +static int mpu3050_resume(struct device *dev) +{ + struct i2c_client *client = to_i2c_client(dev); + + mpu3050_set_power_mode(client, 1); + msleep(100); /* wait for gyro chip resume */ + + return 0; +} +#endif + +static UNIVERSAL_DEV_PM_OPS(mpu3050_pm, mpu3050_suspend, mpu3050_resume, NULL); + +static const struct i2c_device_id mpu3050_ids[] = { + { "mpu3050", 0 }, + { } +}; +MODULE_DEVICE_TABLE(i2c, mpu3050_ids); + +static struct i2c_driver mpu3050_i2c_driver = { + .driver = { + .name = "mpu3050", + .owner = THIS_MODULE, + .pm = &mpu3050_pm, + }, + .probe = mpu3050_probe, + .remove = __devexit_p(mpu3050_remove), + .id_table = mpu3050_ids, +}; + +static int __init mpu3050_init(void) +{ + return i2c_add_driver(&mpu3050_i2c_driver); +} +module_init(mpu3050_init); + +static void __exit mpu3050_exit(void) +{ + i2c_del_driver(&mpu3050_i2c_driver); +} +module_exit(mpu3050_exit); + +MODULE_AUTHOR("Wistron Corp."); +MODULE_DESCRIPTION("MPU3050 Tri-axis gyroscope driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/input/misc/xen-kbdfront.c b/drivers/input/misc/xen-kbdfront.c index 62bae99424e6..ad2e51c04db8 100644 --- a/drivers/input/misc/xen-kbdfront.c +++ b/drivers/input/misc/xen-kbdfront.c @@ -373,7 +373,7 @@ static struct xenbus_driver xenkbd_driver = { static int __init xenkbd_init(void) { - if (!xen_pv_domain()) + if (!xen_domain()) return -ENODEV; /* Nothing to do if running in dom0. */ diff --git a/drivers/input/mouse/gpio_mouse.c b/drivers/input/mouse/gpio_mouse.c index 7b6ce178f1b6..58902fbb9896 100644 --- a/drivers/input/mouse/gpio_mouse.c +++ b/drivers/input/mouse/gpio_mouse.c @@ -191,7 +191,7 @@ static void __exit gpio_mouse_exit(void) } module_exit(gpio_mouse_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("GPIO mouse driver"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:gpio_mouse"); /* work with hotplug and coldplug */ diff --git a/drivers/input/mouse/lifebook.c b/drivers/input/mouse/lifebook.c index c31ad11df6bb..83bcaba96b89 100644 --- a/drivers/input/mouse/lifebook.c +++ b/drivers/input/mouse/lifebook.c @@ -33,7 +33,7 @@ static const char *desired_serio_phys; static int lifebook_limit_serio3(const struct dmi_system_id *d) { desired_serio_phys = "isa0060/serio3"; - return 0; + return 1; } static bool lifebook_use_6byte_proto; @@ -41,7 +41,7 @@ static bool lifebook_use_6byte_proto; static int lifebook_set_6byte_proto(const struct dmi_system_id *d) { lifebook_use_6byte_proto = true; - return 0; + return 1; } static const struct dmi_system_id __initconst lifebook_dmi_table[] = { diff --git a/drivers/input/mouse/pxa930_trkball.c b/drivers/input/mouse/pxa930_trkball.c index 943cfec15665..6c5d84fcdea1 100644 --- a/drivers/input/mouse/pxa930_trkball.c +++ b/drivers/input/mouse/pxa930_trkball.c @@ -12,7 +12,6 @@ #include <linux/init.h> #include <linux/input.h> -#include <linux/version.h> #include <linux/interrupt.h> #include <linux/module.h> #include <linux/platform_device.h> diff --git a/drivers/input/mouse/sentelic.c b/drivers/input/mouse/sentelic.c index 1242775fee19..2fc887a51066 100644 --- a/drivers/input/mouse/sentelic.c +++ b/drivers/input/mouse/sentelic.c @@ -20,7 +20,6 @@ */ #include <linux/module.h> -#include <linux/version.h> #include <linux/input.h> #include <linux/ctype.h> #include <linux/libps2.h> diff --git a/drivers/input/mouse/synaptics.c b/drivers/input/mouse/synaptics.c index e06e045bf907..5538fc657af1 100644 --- a/drivers/input/mouse/synaptics.c +++ b/drivers/input/mouse/synaptics.c @@ -207,27 +207,37 @@ static int synaptics_identify(struct psmouse *psmouse) static int synaptics_resolution(struct psmouse *psmouse) { struct synaptics_data *priv = psmouse->private; - unsigned char res[3]; - unsigned char max[3]; + unsigned char resp[3]; if (SYN_ID_MAJOR(priv->identity) < 4) return 0; - if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, res) == 0) { - if (res[0] != 0 && (res[1] & 0x80) && res[2] != 0) { - priv->x_res = res[0]; /* x resolution in units/mm */ - priv->y_res = res[2]; /* y resolution in units/mm */ + if (synaptics_send_cmd(psmouse, SYN_QUE_RESOLUTION, resp) == 0) { + if (resp[0] != 0 && (resp[1] & 0x80) && resp[2] != 0) { + priv->x_res = resp[0]; /* x resolution in units/mm */ + priv->y_res = resp[2]; /* y resolution in units/mm */ } } if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 5 && SYN_CAP_MAX_DIMENSIONS(priv->ext_cap_0c)) { - if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_DIMENSIONS, max)) { - printk(KERN_ERR "Synaptics claims to have dimensions query," - " but I'm not able to read it.\n"); + if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MAX_COORDS, resp)) { + printk(KERN_ERR "Synaptics claims to have max coordinates" + " query, but I'm not able to read it.\n"); + } else { + priv->x_max = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); + priv->y_max = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); + } + } + + if (SYN_EXT_CAP_REQUESTS(priv->capabilities) >= 7 && + SYN_CAP_MIN_DIMENSIONS(priv->ext_cap_0c)) { + if (synaptics_send_cmd(psmouse, SYN_QUE_EXT_MIN_COORDS, resp)) { + printk(KERN_ERR "Synaptics claims to have min coordinates" + " query, but I'm not able to read it.\n"); } else { - priv->x_max = (max[0] << 5) | ((max[1] & 0x0f) << 1); - priv->y_max = (max[2] << 5) | ((max[1] & 0xf0) >> 3); + priv->x_min = (resp[0] << 5) | ((resp[1] & 0x0f) << 1); + priv->y_min = (resp[2] << 5) | ((resp[1] & 0xf0) >> 3); } } @@ -406,26 +416,10 @@ static int synaptics_parse_hw_state(const unsigned char buf[], memset(hw, 0, sizeof(struct synaptics_hw_state)); if (SYN_MODEL_NEWABS(priv->model_id)) { - hw->x = (((buf[3] & 0x10) << 8) | - ((buf[1] & 0x0f) << 8) | - buf[4]); - hw->y = (((buf[3] & 0x20) << 7) | - ((buf[1] & 0xf0) << 4) | - buf[5]); - - hw->z = buf[2]; hw->w = (((buf[0] & 0x30) >> 2) | ((buf[0] & 0x04) >> 1) | ((buf[3] & 0x04) >> 2)); - if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) { - /* Gesture packet: (x, y, z) at half resolution */ - priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1; - priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1; - priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1; - return 1; - } - hw->left = (buf[0] & 0x01) ? 1 : 0; hw->right = (buf[0] & 0x02) ? 1 : 0; @@ -448,6 +442,22 @@ static int synaptics_parse_hw_state(const unsigned char buf[], hw->down = ((buf[0] ^ buf[3]) & 0x02) ? 1 : 0; } + if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c) && hw->w == 2) { + /* Gesture packet: (x, y, z) at half resolution */ + priv->mt.x = (((buf[4] & 0x0f) << 8) | buf[1]) << 1; + priv->mt.y = (((buf[4] & 0xf0) << 4) | buf[2]) << 1; + priv->mt.z = ((buf[3] & 0x30) | (buf[5] & 0x0f)) << 1; + return 1; + } + + hw->x = (((buf[3] & 0x10) << 8) | + ((buf[1] & 0x0f) << 8) | + buf[4]); + hw->y = (((buf[3] & 0x20) << 7) | + ((buf[1] & 0xf0) << 4) | + buf[5]); + hw->z = buf[2]; + if (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) && ((buf[0] ^ buf[3]) & 0x02)) { switch (SYN_CAP_MULTI_BUTTON_NO(priv->ext_cap) & ~0x01) { @@ -485,7 +495,8 @@ static int synaptics_parse_hw_state(const unsigned char buf[], return 0; } -static void set_slot(struct input_dev *dev, int slot, bool active, int x, int y) +static void synaptics_report_semi_mt_slot(struct input_dev *dev, int slot, + bool active, int x, int y) { input_mt_slot(dev, slot); input_mt_report_slot_state(dev, MT_TOOL_FINGER, active); @@ -502,14 +513,16 @@ static void synaptics_report_semi_mt_data(struct input_dev *dev, int num_fingers) { if (num_fingers >= 2) { - set_slot(dev, 0, true, min(a->x, b->x), min(a->y, b->y)); - set_slot(dev, 1, true, max(a->x, b->x), max(a->y, b->y)); + synaptics_report_semi_mt_slot(dev, 0, true, min(a->x, b->x), + min(a->y, b->y)); + synaptics_report_semi_mt_slot(dev, 1, true, max(a->x, b->x), + max(a->y, b->y)); } else if (num_fingers == 1) { - set_slot(dev, 0, true, a->x, a->y); - set_slot(dev, 1, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 0, true, a->x, a->y); + synaptics_report_semi_mt_slot(dev, 1, false, 0, 0); } else { - set_slot(dev, 0, false, 0, 0); - set_slot(dev, 1, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 0, false, 0, 0); + synaptics_report_semi_mt_slot(dev, 1, false, 0, 0); } } @@ -684,23 +697,36 @@ static psmouse_ret_t synaptics_process_byte(struct psmouse *psmouse) static void set_input_params(struct input_dev *dev, struct synaptics_data *priv) { int i; + int fuzz = SYN_CAP_REDUCED_FILTERING(priv->ext_cap_0c) ? + SYN_REDUCED_FILTER_FUZZ : 0; __set_bit(INPUT_PROP_POINTER, dev->propbit); __set_bit(EV_ABS, dev->evbit); input_set_abs_params(dev, ABS_X, - XMIN_NOMINAL, priv->x_max ?: XMAX_NOMINAL, 0, 0); + priv->x_min ?: XMIN_NOMINAL, + priv->x_max ?: XMAX_NOMINAL, + fuzz, 0); input_set_abs_params(dev, ABS_Y, - YMIN_NOMINAL, priv->y_max ?: YMAX_NOMINAL, 0, 0); + priv->y_min ?: YMIN_NOMINAL, + priv->y_max ?: YMAX_NOMINAL, + fuzz, 0); input_set_abs_params(dev, ABS_PRESSURE, 0, 255, 0, 0); if (SYN_CAP_ADV_GESTURE(priv->ext_cap_0c)) { __set_bit(INPUT_PROP_SEMI_MT, dev->propbit); input_mt_init_slots(dev, 2); - input_set_abs_params(dev, ABS_MT_POSITION_X, XMIN_NOMINAL, - priv->x_max ?: XMAX_NOMINAL, 0, 0); - input_set_abs_params(dev, ABS_MT_POSITION_Y, YMIN_NOMINAL, - priv->y_max ?: YMAX_NOMINAL, 0, 0); + input_set_abs_params(dev, ABS_MT_POSITION_X, + priv->x_min ?: XMIN_NOMINAL, + priv->x_max ?: XMAX_NOMINAL, + fuzz, 0); + input_set_abs_params(dev, ABS_MT_POSITION_Y, + priv->y_min ?: YMIN_NOMINAL, + priv->y_max ?: YMAX_NOMINAL, + fuzz, 0); + + input_abs_set_res(dev, ABS_MT_POSITION_X, priv->x_res); + input_abs_set_res(dev, ABS_MT_POSITION_Y, priv->y_res); } if (SYN_CAP_PALMDETECT(priv->capabilities)) @@ -971,4 +997,3 @@ bool synaptics_supported(void) } #endif /* CONFIG_MOUSE_PS2_SYNAPTICS */ - diff --git a/drivers/input/mouse/synaptics.h b/drivers/input/mouse/synaptics.h index 7453938bf5ef..ca040aa80fa7 100644 --- a/drivers/input/mouse/synaptics.h +++ b/drivers/input/mouse/synaptics.h @@ -19,7 +19,8 @@ #define SYN_QUE_RESOLUTION 0x08 #define SYN_QUE_EXT_CAPAB 0x09 #define SYN_QUE_EXT_CAPAB_0C 0x0c -#define SYN_QUE_EXT_DIMENSIONS 0x0d +#define SYN_QUE_EXT_MAX_COORDS 0x0d +#define SYN_QUE_EXT_MIN_COORDS 0x0f /* synatics modes */ #define SYN_BIT_ABSOLUTE_MODE (1 << 7) @@ -66,18 +67,21 @@ * 1 0x60 multifinger mode identifies firmware finger counting * (not reporting!) algorithm. * Not particularly meaningful - * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered - * 2 0x01 clickpad bit 1 2-button ClickPad - * 2 0x02 deluxe LED controls touchpad support LED commands + * 1 0x80 covered pad W clipped to 14, 15 == pad mostly covered + * 2 0x01 clickpad bit 1 2-button ClickPad + * 2 0x02 deluxe LED controls touchpad support LED commands * ala multimedia control bar * 2 0x04 reduced filtering firmware does less filtering on * position data, driver should watch * for noise. + * 2 0x20 report min query 0x0f gives min coord reported */ #define SYN_CAP_CLICKPAD(ex0c) ((ex0c) & 0x100000) /* 1-button ClickPad */ #define SYN_CAP_CLICKPAD2BTN(ex0c) ((ex0c) & 0x000100) /* 2-button ClickPad */ #define SYN_CAP_MAX_DIMENSIONS(ex0c) ((ex0c) & 0x020000) +#define SYN_CAP_MIN_DIMENSIONS(ex0c) ((ex0c) & 0x002000) #define SYN_CAP_ADV_GESTURE(ex0c) ((ex0c) & 0x080000) +#define SYN_CAP_REDUCED_FILTERING(ex0c) ((ex0c) & 0x000400) /* synaptics modes query bits */ #define SYN_MODE_ABSOLUTE(m) ((m) & (1 << 7)) @@ -104,6 +108,9 @@ #define SYN_NEWABS_RELAXED 2 #define SYN_OLDABS 3 +/* amount to fuzz position data when touchpad reports reduced filtering */ +#define SYN_REDUCED_FILTER_FUZZ 8 + /* * A structure to describe the state of the touchpad hardware (buttons and pad) */ @@ -130,7 +137,8 @@ struct synaptics_data { unsigned long int ext_cap_0c; /* Ext Caps from 0x0c query */ unsigned long int identity; /* Identification */ unsigned int x_res, y_res; /* X/Y resolution in units/mm */ - unsigned int x_max, y_max; /* Max dimensions (from FW) */ + unsigned int x_max, y_max; /* Max coordinates (from FW) */ + unsigned int x_min, y_min; /* Min coordinates (from FW) */ unsigned char pkt_type; /* packet type - old, new, etc */ unsigned char mode; /* current mode byte */ diff --git a/drivers/input/serio/at32psif.c b/drivers/input/serio/at32psif.c index 6ee8f0ddad51..95280f9207e1 100644 --- a/drivers/input/serio/at32psif.c +++ b/drivers/input/serio/at32psif.c @@ -372,6 +372,6 @@ static void __exit psif_exit(void) module_init(psif_init); module_exit(psif_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("Atmel AVR32 PSIF PS/2 driver"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c index 42206205e4f5..979c443bf1ef 100644 --- a/drivers/input/serio/hp_sdc.c +++ b/drivers/input/serio/hp_sdc.c @@ -795,7 +795,7 @@ int hp_sdc_release_cooked_irq(hp_sdc_irqhook *callback) /************************* Keepalive timer task *********************/ -void hp_sdc_kicker (unsigned long data) +static void hp_sdc_kicker(unsigned long data) { tasklet_schedule(&hp_sdc.task); /* Re-insert the periodic task. */ diff --git a/drivers/input/tablet/aiptek.c b/drivers/input/tablet/aiptek.c index 0a619c558bfb..6d89fd1842c3 100644 --- a/drivers/input/tablet/aiptek.c +++ b/drivers/input/tablet/aiptek.c @@ -225,7 +225,6 @@ /* toolMode codes */ #define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN -#define AIPTEK_TOOL_BUTTON_PEN_MODE BTN_TOOL_PEN #define AIPTEK_TOOL_BUTTON_PENCIL_MODE BTN_TOOL_PENCIL #define AIPTEK_TOOL_BUTTON_BRUSH_MODE BTN_TOOL_BRUSH #define AIPTEK_TOOL_BUTTON_AIRBRUSH_MODE BTN_TOOL_AIRBRUSH diff --git a/drivers/input/tablet/wacom_wac.c b/drivers/input/tablet/wacom_wac.c index 08ba5ad9c9be..03ebcc8b24b5 100644 --- a/drivers/input/tablet/wacom_wac.c +++ b/drivers/input/tablet/wacom_wac.c @@ -15,6 +15,7 @@ #include "wacom_wac.h" #include "wacom.h" #include <linux/input/mt.h> +#include <linux/hid.h> /* resolution for penabled devices */ #define WACOM_PL_RES 20 @@ -264,6 +265,7 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) wacom->id[0] = 0; input_report_abs(input, ABS_MISC, wacom->id[0]); /* report tool id */ input_report_key(input, wacom->tool[0], prox); + input_event(input, EV_MSC, MSC_SERIAL, 1); input_sync(input); /* sync last event */ } @@ -273,11 +275,10 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) prox = data[7] & 0xf8; if (prox || wacom->id[1]) { wacom->id[1] = PAD_DEVICE_ID; - input_report_key(input, BTN_0, (data[7] & 0x40)); - input_report_key(input, BTN_4, (data[7] & 0x80)); + input_report_key(input, BTN_BACK, (data[7] & 0x40)); + input_report_key(input, BTN_FORWARD, (data[7] & 0x80)); rw = ((data[7] & 0x18) >> 3) - ((data[7] & 0x20) >> 3); input_report_rel(input, REL_WHEEL, rw); - input_report_key(input, BTN_TOOL_FINGER, 0xf0); if (!prox) wacom->id[1] = 0; input_report_abs(input, ABS_MISC, wacom->id[1]); @@ -290,18 +291,17 @@ static int wacom_graphire_irq(struct wacom_wac *wacom) prox = (data[7] & 0xf8) || data[8]; if (prox || wacom->id[1]) { wacom->id[1] = PAD_DEVICE_ID; - input_report_key(input, BTN_0, (data[7] & 0x08)); - input_report_key(input, BTN_1, (data[7] & 0x20)); - input_report_key(input, BTN_4, (data[7] & 0x10)); - input_report_key(input, BTN_5, (data[7] & 0x40)); + input_report_key(input, BTN_BACK, (data[7] & 0x08)); + input_report_key(input, BTN_LEFT, (data[7] & 0x20)); + input_report_key(input, BTN_FORWARD, (data[7] & 0x10)); + input_report_key(input, BTN_RIGHT, (data[7] & 0x40)); input_report_abs(input, ABS_WHEEL, (data[8] & 0x7f)); - input_report_key(input, BTN_TOOL_FINGER, 0xf0); if (!prox) wacom->id[1] = 0; input_report_abs(input, ABS_MISC, wacom->id[1]); input_event(input, EV_MSC, MSC_SERIAL, 0xf0); + retval = 1; } - retval = 1; break; } exit: @@ -494,10 +494,6 @@ static int wacom_intuos_irq(struct wacom_wac *wacom) /* pad packets. Works as a second tool and is always in prox */ if (data[0] == WACOM_REPORT_INTUOSPAD) { - /* initiate the pad as a device */ - if (wacom->tool[1] != BTN_TOOL_FINGER) - wacom->tool[1] = BTN_TOOL_FINGER; - if (features->type >= INTUOS4S && features->type <= INTUOS4L) { input_report_key(input, BTN_0, (data[2] & 0x01)); input_report_key(input, BTN_1, (data[3] & 0x01)); @@ -1080,18 +1076,14 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, switch (wacom_wac->features.type) { case WACOM_MO: - __set_bit(BTN_1, input_dev->keybit); - __set_bit(BTN_5, input_dev->keybit); - input_set_abs_params(input_dev, ABS_WHEEL, 0, 71, 0, 0); /* fall through */ case WACOM_G4: input_set_capability(input_dev, EV_MSC, MSC_SERIAL); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - __set_bit(BTN_0, input_dev->keybit); - __set_bit(BTN_4, input_dev->keybit); + __set_bit(BTN_BACK, input_dev->keybit); + __set_bit(BTN_FORWARD, input_dev->keybit); /* fall through */ case GRAPHIRE: @@ -1127,10 +1119,12 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, case CINTIQ: for (i = 0; i < 8; i++) __set_bit(BTN_0 + i, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); - input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); + if (wacom_wac->features.type != WACOM_21UX2) { + input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); + input_set_abs_params(input_dev, ABS_RY, 0, 4096, 0, 0); + } + input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); wacom_setup_cintiq(wacom_wac); break; @@ -1151,8 +1145,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, __set_bit(BTN_2, input_dev->keybit); __set_bit(BTN_3, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); - input_set_abs_params(input_dev, ABS_RX, 0, 4096, 0, 0); input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); /* fall through */ @@ -1170,7 +1162,6 @@ void wacom_setup_input_capabilities(struct input_dev *input_dev, case INTUOS4S: for (i = 0; i < 7; i++) __set_bit(BTN_0 + i, input_dev->keybit); - __set_bit(BTN_TOOL_FINGER, input_dev->keybit); input_set_abs_params(input_dev, ABS_Z, -900, 899, 0, 0); wacom_setup_intuos(wacom_wac); @@ -1295,6 +1286,12 @@ static const struct wacom_features wacom_features_0x65 = static const struct wacom_features wacom_features_0x69 = { "Wacom Bamboo1", WACOM_PKGLEN_GRAPHIRE, 5104, 3712, 511, 63, GRAPHIRE, WACOM_PENPRTN_RES, WACOM_PENPRTN_RES }; +static const struct wacom_features wacom_features_0x6A = + { "Wacom Bamboo1 4x6", WACOM_PKGLEN_GRAPHIRE, 14760, 9225, 1023, + 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x6B = + { "Wacom Bamboo1 5x8", WACOM_PKGLEN_GRAPHIRE, 21648, 13530, 1023, + 63, GRAPHIRE, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x20 = { "Wacom Intuos 4x5", WACOM_PKGLEN_INTUOS, 12700, 10600, 1023, 31, INTUOS, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1427,6 +1424,9 @@ static const struct wacom_features wacom_features_0x90 = static const struct wacom_features wacom_features_0x93 = { "Wacom ISDv4 93", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; +static const struct wacom_features wacom_features_0x97 = + { "Wacom ISDv4 97", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 511, + 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0x9A = { "Wacom ISDv4 9A", WACOM_PKGLEN_GRAPHIRE, 26202, 16325, 255, 0, TABLETPC, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; @@ -1458,7 +1458,7 @@ static const struct wacom_features wacom_features_0xD3 = { "Wacom Bamboo 2FG 6x8", WACOM_PKGLEN_BBFUN, 21648, 13530, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD4 = - { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 255, + { "Wacom Bamboo Pen", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, 63, BAMBOO_PT, WACOM_INTUOS_RES, WACOM_INTUOS_RES }; static const struct wacom_features wacom_features_0xD6 = { "Wacom BambooPT 2FG 4x5", WACOM_PKGLEN_BBFUN, 14720, 9200, 1023, @@ -1483,6 +1483,11 @@ static const struct wacom_features wacom_features_0x6004 = USB_DEVICE(USB_VENDOR_ID_WACOM, prod), \ .driver_info = (kernel_ulong_t)&wacom_features_##prod +#define USB_DEVICE_DETAILED(prod, class, sub, proto) \ + USB_DEVICE_AND_INTERFACE_INFO(USB_VENDOR_ID_WACOM, prod, class, \ + sub, proto), \ + .driver_info = (kernel_ulong_t)&wacom_features_##prod + #define USB_DEVICE_LENOVO(prod) \ USB_DEVICE(USB_VENDOR_ID_LENOVO, prod), \ .driver_info = (kernel_ulong_t)&wacom_features_##prod @@ -1506,6 +1511,8 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0x64) }, { USB_DEVICE_WACOM(0x65) }, { USB_DEVICE_WACOM(0x69) }, + { USB_DEVICE_WACOM(0x6A) }, + { USB_DEVICE_WACOM(0x6B) }, { USB_DEVICE_WACOM(0x20) }, { USB_DEVICE_WACOM(0x21) }, { USB_DEVICE_WACOM(0x22) }, @@ -1545,7 +1552,13 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xC5) }, { USB_DEVICE_WACOM(0xC6) }, { USB_DEVICE_WACOM(0xC7) }, - { USB_DEVICE_WACOM(0xCE) }, + /* + * DTU-2231 has two interfaces on the same configuration, + * only one is used. + */ + { USB_DEVICE_DETAILED(0xCE, USB_CLASS_HID, + USB_INTERFACE_SUBCLASS_BOOT, + USB_INTERFACE_PROTOCOL_MOUSE) }, { USB_DEVICE_WACOM(0xD0) }, { USB_DEVICE_WACOM(0xD1) }, { USB_DEVICE_WACOM(0xD2) }, @@ -1560,6 +1573,7 @@ const struct usb_device_id wacom_ids[] = { { USB_DEVICE_WACOM(0xCC) }, { USB_DEVICE_WACOM(0x90) }, { USB_DEVICE_WACOM(0x93) }, + { USB_DEVICE_WACOM(0x97) }, { USB_DEVICE_WACOM(0x9A) }, { USB_DEVICE_WACOM(0x9F) }, { USB_DEVICE_WACOM(0xE2) }, diff --git a/drivers/input/touchscreen/ads7846.c b/drivers/input/touchscreen/ads7846.c index 5196861b86ef..d507b9b67806 100644 --- a/drivers/input/touchscreen/ads7846.c +++ b/drivers/input/touchscreen/ads7846.c @@ -967,17 +967,12 @@ static int __devinit ads7846_setup_pendown(struct spi_device *spi, struct ads784 ts->get_pendown_state = pdata->get_pendown_state; } else if (gpio_is_valid(pdata->gpio_pendown)) { - err = gpio_request(pdata->gpio_pendown, "ads7846_pendown"); + err = gpio_request_one(pdata->gpio_pendown, GPIOF_IN, + "ads7846_pendown"); if (err) { - dev_err(&spi->dev, "failed to request pendown GPIO%d\n", - pdata->gpio_pendown); - return err; - } - err = gpio_direction_input(pdata->gpio_pendown); - if (err) { - dev_err(&spi->dev, "failed to setup pendown GPIO%d\n", - pdata->gpio_pendown); - gpio_free(pdata->gpio_pendown); + dev_err(&spi->dev, + "failed to request/setup pendown GPIO%d: %d\n", + pdata->gpio_pendown, err); return err; } diff --git a/drivers/input/touchscreen/atmel-wm97xx.c b/drivers/input/touchscreen/atmel-wm97xx.c index fa8e56bd9094..8034cbb20f74 100644 --- a/drivers/input/touchscreen/atmel-wm97xx.c +++ b/drivers/input/touchscreen/atmel-wm97xx.c @@ -164,7 +164,7 @@ static irqreturn_t atmel_wm97xx_channel_b_interrupt(int irq, void *dev_id) data = ac97c_readl(atmel_wm97xx, CBRHR); value = data & 0x0fff; - source = data & WM97XX_ADCSRC_MASK; + source = data & WM97XX_ADCSEL_MASK; pen_down = (data & WM97XX_PEN_DOWN) >> 8; if (source == WM97XX_ADCSEL_X) @@ -442,6 +442,6 @@ static void __exit atmel_wm97xx_exit(void) } module_exit(atmel_wm97xx_exit); -MODULE_AUTHOR("Hans-Christian Egtvedt <hans-christian.egtvedt@atmel.com>"); +MODULE_AUTHOR("Hans-Christian Egtvedt <egtvedt@samfundet.no>"); MODULE_DESCRIPTION("wm97xx continuous touch driver for Atmel AT91 and AVR32"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/atmel_mxt_ts.c b/drivers/input/touchscreen/atmel_mxt_ts.c index 1e61387c73ca..ae00604a6a81 100644 --- a/drivers/input/touchscreen/atmel_mxt_ts.c +++ b/drivers/input/touchscreen/atmel_mxt_ts.c @@ -48,41 +48,47 @@ #define MXT_OBJECT_SIZE 6 /* Object types */ -#define MXT_DEBUG_DIAGNOSTIC 37 -#define MXT_GEN_MESSAGE 5 -#define MXT_GEN_COMMAND 6 -#define MXT_GEN_POWER 7 -#define MXT_GEN_ACQUIRE 8 -#define MXT_TOUCH_MULTI 9 -#define MXT_TOUCH_KEYARRAY 15 -#define MXT_TOUCH_PROXIMITY 23 -#define MXT_PROCI_GRIPFACE 20 -#define MXT_PROCG_NOISE 22 -#define MXT_PROCI_ONETOUCH 24 -#define MXT_PROCI_TWOTOUCH 27 -#define MXT_PROCI_GRIP 40 -#define MXT_PROCI_PALM 41 -#define MXT_SPT_COMMSCONFIG 18 -#define MXT_SPT_GPIOPWM 19 -#define MXT_SPT_SELFTEST 25 -#define MXT_SPT_CTECONFIG 28 -#define MXT_SPT_USERDATA 38 -#define MXT_SPT_DIGITIZER 43 -#define MXT_SPT_MESSAGECOUNT 44 - -/* MXT_GEN_COMMAND field */ +#define MXT_DEBUG_DIAGNOSTIC_T37 37 +#define MXT_GEN_MESSAGE_T5 5 +#define MXT_GEN_COMMAND_T6 6 +#define MXT_GEN_POWER_T7 7 +#define MXT_GEN_ACQUIRE_T8 8 +#define MXT_GEN_DATASOURCE_T53 53 +#define MXT_TOUCH_MULTI_T9 9 +#define MXT_TOUCH_KEYARRAY_T15 15 +#define MXT_TOUCH_PROXIMITY_T23 23 +#define MXT_TOUCH_PROXKEY_T52 52 +#define MXT_PROCI_GRIPFACE_T20 20 +#define MXT_PROCG_NOISE_T22 22 +#define MXT_PROCI_ONETOUCH_T24 24 +#define MXT_PROCI_TWOTOUCH_T27 27 +#define MXT_PROCI_GRIP_T40 40 +#define MXT_PROCI_PALM_T41 41 +#define MXT_PROCI_TOUCHSUPPRESSION_T42 42 +#define MXT_PROCI_STYLUS_T47 47 +#define MXT_PROCG_NOISESUPPRESSION_T48 48 +#define MXT_SPT_COMMSCONFIG_T18 18 +#define MXT_SPT_GPIOPWM_T19 19 +#define MXT_SPT_SELFTEST_T25 25 +#define MXT_SPT_CTECONFIG_T28 28 +#define MXT_SPT_USERDATA_T38 38 +#define MXT_SPT_DIGITIZER_T43 43 +#define MXT_SPT_MESSAGECOUNT_T44 44 +#define MXT_SPT_CTECONFIG_T46 46 + +/* MXT_GEN_COMMAND_T6 field */ #define MXT_COMMAND_RESET 0 #define MXT_COMMAND_BACKUPNV 1 #define MXT_COMMAND_CALIBRATE 2 #define MXT_COMMAND_REPORTALL 3 #define MXT_COMMAND_DIAGNOSTIC 5 -/* MXT_GEN_POWER field */ +/* MXT_GEN_POWER_T7 field */ #define MXT_POWER_IDLEACQINT 0 #define MXT_POWER_ACTVACQINT 1 #define MXT_POWER_ACTV2IDLETO 2 -/* MXT_GEN_ACQUIRE field */ +/* MXT_GEN_ACQUIRE_T8 field */ #define MXT_ACQUIRE_CHRGTIME 0 #define MXT_ACQUIRE_TCHDRIFT 2 #define MXT_ACQUIRE_DRIFTST 3 @@ -91,7 +97,7 @@ #define MXT_ACQUIRE_ATCHCALST 6 #define MXT_ACQUIRE_ATCHCALSTHR 7 -/* MXT_TOUCH_MULTI field */ +/* MXT_TOUCH_MULTI_T9 field */ #define MXT_TOUCH_CTRL 0 #define MXT_TOUCH_XORIGIN 1 #define MXT_TOUCH_YORIGIN 2 @@ -121,7 +127,7 @@ #define MXT_TOUCH_YEDGEDIST 29 #define MXT_TOUCH_JUMPLIMIT 30 -/* MXT_PROCI_GRIPFACE field */ +/* MXT_PROCI_GRIPFACE_T20 field */ #define MXT_GRIPFACE_CTRL 0 #define MXT_GRIPFACE_XLOGRIP 1 #define MXT_GRIPFACE_XHIGRIP 2 @@ -151,11 +157,11 @@ #define MXT_NOISE_FREQ4 15 #define MXT_NOISE_IDLEGCAFVALID 16 -/* MXT_SPT_COMMSCONFIG */ +/* MXT_SPT_COMMSCONFIG_T18 */ #define MXT_COMMS_CTRL 0 #define MXT_COMMS_CMD 1 -/* MXT_SPT_CTECONFIG field */ +/* MXT_SPT_CTECONFIG_T28 field */ #define MXT_CTE_CTRL 0 #define MXT_CTE_CMD 1 #define MXT_CTE_MODE 2 @@ -166,7 +172,7 @@ #define MXT_VOLTAGE_DEFAULT 2700000 #define MXT_VOLTAGE_STEP 10000 -/* Define for MXT_GEN_COMMAND */ +/* Define for MXT_GEN_COMMAND_T6 */ #define MXT_BOOT_VALUE 0xa5 #define MXT_BACKUP_VALUE 0x55 #define MXT_BACKUP_TIME 25 /* msec */ @@ -256,24 +262,31 @@ struct mxt_data { static bool mxt_object_readable(unsigned int type) { switch (type) { - case MXT_GEN_MESSAGE: - case MXT_GEN_COMMAND: - case MXT_GEN_POWER: - case MXT_GEN_ACQUIRE: - case MXT_TOUCH_MULTI: - case MXT_TOUCH_KEYARRAY: - case MXT_TOUCH_PROXIMITY: - case MXT_PROCI_GRIPFACE: - case MXT_PROCG_NOISE: - case MXT_PROCI_ONETOUCH: - case MXT_PROCI_TWOTOUCH: - case MXT_PROCI_GRIP: - case MXT_PROCI_PALM: - case MXT_SPT_COMMSCONFIG: - case MXT_SPT_GPIOPWM: - case MXT_SPT_SELFTEST: - case MXT_SPT_CTECONFIG: - case MXT_SPT_USERDATA: + case MXT_GEN_MESSAGE_T5: + case MXT_GEN_COMMAND_T6: + case MXT_GEN_POWER_T7: + case MXT_GEN_ACQUIRE_T8: + case MXT_GEN_DATASOURCE_T53: + case MXT_TOUCH_MULTI_T9: + case MXT_TOUCH_KEYARRAY_T15: + case MXT_TOUCH_PROXIMITY_T23: + case MXT_TOUCH_PROXKEY_T52: + case MXT_PROCI_GRIPFACE_T20: + case MXT_PROCG_NOISE_T22: + case MXT_PROCI_ONETOUCH_T24: + case MXT_PROCI_TWOTOUCH_T27: + case MXT_PROCI_GRIP_T40: + case MXT_PROCI_PALM_T41: + case MXT_PROCI_TOUCHSUPPRESSION_T42: + case MXT_PROCI_STYLUS_T47: + case MXT_PROCG_NOISESUPPRESSION_T48: + case MXT_SPT_COMMSCONFIG_T18: + case MXT_SPT_GPIOPWM_T19: + case MXT_SPT_SELFTEST_T25: + case MXT_SPT_CTECONFIG_T28: + case MXT_SPT_USERDATA_T38: + case MXT_SPT_DIGITIZER_T43: + case MXT_SPT_CTECONFIG_T46: return true; default: return false; @@ -283,21 +296,28 @@ static bool mxt_object_readable(unsigned int type) static bool mxt_object_writable(unsigned int type) { switch (type) { - case MXT_GEN_COMMAND: - case MXT_GEN_POWER: - case MXT_GEN_ACQUIRE: - case MXT_TOUCH_MULTI: - case MXT_TOUCH_KEYARRAY: - case MXT_TOUCH_PROXIMITY: - case MXT_PROCI_GRIPFACE: - case MXT_PROCG_NOISE: - case MXT_PROCI_ONETOUCH: - case MXT_PROCI_TWOTOUCH: - case MXT_PROCI_GRIP: - case MXT_PROCI_PALM: - case MXT_SPT_GPIOPWM: - case MXT_SPT_SELFTEST: - case MXT_SPT_CTECONFIG: + case MXT_GEN_COMMAND_T6: + case MXT_GEN_POWER_T7: + case MXT_GEN_ACQUIRE_T8: + case MXT_TOUCH_MULTI_T9: + case MXT_TOUCH_KEYARRAY_T15: + case MXT_TOUCH_PROXIMITY_T23: + case MXT_TOUCH_PROXKEY_T52: + case MXT_PROCI_GRIPFACE_T20: + case MXT_PROCG_NOISE_T22: + case MXT_PROCI_ONETOUCH_T24: + case MXT_PROCI_TWOTOUCH_T27: + case MXT_PROCI_GRIP_T40: + case MXT_PROCI_PALM_T41: + case MXT_PROCI_TOUCHSUPPRESSION_T42: + case MXT_PROCI_STYLUS_T47: + case MXT_PROCG_NOISESUPPRESSION_T48: + case MXT_SPT_COMMSCONFIG_T18: + case MXT_SPT_GPIOPWM_T19: + case MXT_SPT_SELFTEST_T25: + case MXT_SPT_CTECONFIG_T28: + case MXT_SPT_DIGITIZER_T43: + case MXT_SPT_CTECONFIG_T46: return true; default: return false; @@ -455,7 +475,7 @@ static int mxt_read_message(struct mxt_data *data, struct mxt_object *object; u16 reg; - object = mxt_get_object(data, MXT_GEN_MESSAGE); + object = mxt_get_object(data, MXT_GEN_MESSAGE_T5); if (!object) return -EINVAL; @@ -597,8 +617,8 @@ static irqreturn_t mxt_interrupt(int irq, void *dev_id) reportid = message.reportid; - /* whether reportid is thing of MXT_TOUCH_MULTI */ - object = mxt_get_object(data, MXT_TOUCH_MULTI); + /* whether reportid is thing of MXT_TOUCH_MULTI_T9 */ + object = mxt_get_object(data, MXT_TOUCH_MULTI_T9); if (!object) goto end; @@ -635,7 +655,9 @@ static int mxt_check_reg_init(struct mxt_data *data) if (!mxt_object_writable(object->type)) continue; - for (j = 0; j < object->size + 1; j++) { + for (j = 0; + j < (object->size + 1) * (object->instances + 1); + j++) { config_offset = index + j; if (config_offset > pdata->config_length) { dev_err(dev, "Not enough config data!\n"); @@ -644,7 +666,7 @@ static int mxt_check_reg_init(struct mxt_data *data) mxt_write_object(data, object->type, j, pdata->config[config_offset]); } - index += object->size + 1; + index += (object->size + 1) * (object->instances + 1); } return 0; @@ -678,31 +700,31 @@ static void mxt_handle_pdata(struct mxt_data *data) u8 voltage; /* Set touchscreen lines */ - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_XSIZE, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XSIZE, pdata->x_line); - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_YSIZE, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YSIZE, pdata->y_line); /* Set touchscreen orient */ - mxt_write_object(data, MXT_TOUCH_MULTI, MXT_TOUCH_ORIENT, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_ORIENT, pdata->orient); /* Set touchscreen burst length */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_BLEN, pdata->blen); /* Set touchscreen threshold */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_TCHTHR, pdata->threshold); /* Set touchscreen resolution */ - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XRANGE_LSB, (pdata->x_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_XRANGE_MSB, (pdata->x_size - 1) >> 8); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YRANGE_LSB, (pdata->y_size - 1) & 0xff); - mxt_write_object(data, MXT_TOUCH_MULTI, + mxt_write_object(data, MXT_TOUCH_MULTI_T9, MXT_TOUCH_YRANGE_MSB, (pdata->y_size - 1) >> 8); /* Set touchscreen voltage */ @@ -715,7 +737,7 @@ static void mxt_handle_pdata(struct mxt_data *data) voltage = (pdata->voltage - MXT_VOLTAGE_DEFAULT) / MXT_VOLTAGE_STEP; - mxt_write_object(data, MXT_SPT_CTECONFIG, + mxt_write_object(data, MXT_SPT_CTECONFIG_T28, MXT_CTE_VOLTAGE, voltage); } } @@ -819,13 +841,13 @@ static int mxt_initialize(struct mxt_data *data) mxt_handle_pdata(data); /* Backup to memory */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_BACKUPNV, MXT_BACKUP_VALUE); msleep(MXT_BACKUP_TIME); /* Soft reset */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, 1); msleep(MXT_RESET_TIME); @@ -921,7 +943,7 @@ static int mxt_load_fw(struct device *dev, const char *fn) } /* Change to the bootloader mode */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, MXT_BOOT_VALUE); msleep(MXT_RESET_TIME); @@ -1027,14 +1049,14 @@ static void mxt_start(struct mxt_data *data) { /* Touch enable */ mxt_write_object(data, - MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0x83); + MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0x83); } static void mxt_stop(struct mxt_data *data) { /* Touch disable */ mxt_write_object(data, - MXT_TOUCH_MULTI, MXT_TOUCH_CTRL, 0); + MXT_TOUCH_MULTI_T9, MXT_TOUCH_CTRL, 0); } static int mxt_input_open(struct input_dev *dev) @@ -1182,7 +1204,7 @@ static int mxt_resume(struct device *dev) struct input_dev *input_dev = data->input_dev; /* Soft reset */ - mxt_write_object(data, MXT_GEN_COMMAND, + mxt_write_object(data, MXT_GEN_COMMAND_T6, MXT_COMMAND_RESET, 1); msleep(MXT_RESET_TIME); diff --git a/drivers/input/touchscreen/cy8ctmg110_ts.c b/drivers/input/touchscreen/cy8ctmg110_ts.c index a93c5c26ab3f..d8815c5d54ad 100644 --- a/drivers/input/touchscreen/cy8ctmg110_ts.c +++ b/drivers/input/touchscreen/cy8ctmg110_ts.c @@ -84,9 +84,9 @@ static int cy8ctmg110_write_regs(struct cy8ctmg110 *tsc, unsigned char reg, memcpy(i2c_data + 1, value, len); ret = i2c_master_send(client, i2c_data, len + 1); - if (ret != 1) { + if (ret != len + 1) { dev_err(&client->dev, "i2c write data cmd failed\n"); - return ret ? ret : -EIO; + return ret < 0 ? ret : -EIO; } return 0; @@ -193,6 +193,8 @@ static int __devinit cy8ctmg110_probe(struct i2c_client *client, ts->client = client; ts->input = input_dev; + ts->reset_pin = pdata->reset_pin; + ts->irq_pin = pdata->irq_pin; snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(&client->dev)); @@ -328,7 +330,7 @@ static int __devexit cy8ctmg110_remove(struct i2c_client *client) return 0; } -static struct i2c_device_id cy8ctmg110_idtable[] = { +static const struct i2c_device_id cy8ctmg110_idtable[] = { { CY8CTMG110_DRIVER_NAME, 1 }, { } }; diff --git a/drivers/input/touchscreen/intel-mid-touch.c b/drivers/input/touchscreen/intel-mid-touch.c index 66c96bfc5522..327695268e06 100644 --- a/drivers/input/touchscreen/intel-mid-touch.c +++ b/drivers/input/touchscreen/intel-mid-touch.c @@ -448,15 +448,11 @@ static int __devinit mrstouch_read_pmic_id(uint *vendor, uint *rev) */ static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev) { - int err, i, found; + int found = 0; + int err, i; u8 r8; - found = -1; - for (i = 0; i < MRSTOUCH_MAX_CHANNELS; i++) { - if (found >= 0) - break; - err = intel_scu_ipc_ioread8(PMICADDR0 + i, &r8); if (err) return err; @@ -466,16 +462,15 @@ static int __devinit mrstouch_chan_parse(struct mrstouch_dev *tsdev) break; } } - if (found < 0) - return 0; if (tsdev->vendor == PMIC_VENDOR_FS) { - if (found && found > (MRSTOUCH_MAX_CHANNELS - 18)) + if (found > MRSTOUCH_MAX_CHANNELS - 18) return -ENOSPC; } else { - if (found && found > (MRSTOUCH_MAX_CHANNELS - 4)) + if (found > MRSTOUCH_MAX_CHANNELS - 4) return -ENOSPC; } + return found; } diff --git a/drivers/input/touchscreen/mainstone-wm97xx.c b/drivers/input/touchscreen/mainstone-wm97xx.c index 3242e7076258..e966c29ff1bb 100644 --- a/drivers/input/touchscreen/mainstone-wm97xx.c +++ b/drivers/input/touchscreen/mainstone-wm97xx.c @@ -157,9 +157,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) x, y, p); /* are samples valid */ - if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X || - (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y || - (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES) + if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X || + (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y || + (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES) goto up; /* coordinate is good */ diff --git a/drivers/input/touchscreen/tnetv107x-ts.c b/drivers/input/touchscreen/tnetv107x-ts.c index 22a3411e93c5..089b0a0f3d8c 100644 --- a/drivers/input/touchscreen/tnetv107x-ts.c +++ b/drivers/input/touchscreen/tnetv107x-ts.c @@ -393,5 +393,5 @@ module_exit(tsc_exit); MODULE_AUTHOR("Cyril Chemparathy"); MODULE_DESCRIPTION("TNETV107X Touchscreen Driver"); -MODULE_ALIAS("platform: tnetv107x-ts"); +MODULE_ALIAS("platform:tnetv107x-ts"); MODULE_LICENSE("GPL"); diff --git a/drivers/input/touchscreen/wm9705.c b/drivers/input/touchscreen/wm9705.c index 98e61175d3f5..adc13a523ab5 100644 --- a/drivers/input/touchscreen/wm9705.c +++ b/drivers/input/touchscreen/wm9705.c @@ -215,8 +215,9 @@ static inline int is_pden(struct wm97xx *wm) static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -224,13 +225,10 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = ((adcsel & 0x7fff) + 3) << 12; - if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, - adcsel | WM97XX_POLL | WM97XX_DELAY(delay)); + wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK) + | WM97XX_POLL | WM97XX_DELAY(delay)); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -256,13 +254,14 @@ static int wm9705_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSEL_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -277,14 +276,14 @@ static int wm9705_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) { int rc; - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X, &data->x); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x); if (rc != RC_VALID) return rc; - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y); if (rc != RC_VALID) return rc; if (pil) { - rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES, &data->p); + rc = wm9705_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; } else diff --git a/drivers/input/touchscreen/wm9712.c b/drivers/input/touchscreen/wm9712.c index 2bc2fb801009..6e743e3dfda4 100644 --- a/drivers/input/touchscreen/wm9712.c +++ b/drivers/input/touchscreen/wm9712.c @@ -255,8 +255,9 @@ static inline int is_pden(struct wm97xx *wm) static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -264,13 +265,10 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = ((adcsel & 0x7fff) + 3) << 12; - if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, - adcsel | WM97XX_POLL | WM97XX_DELAY(delay)); + wm97xx_reg_write(wm, AC97_WM97XX_DIGITISER1, (adcsel & WM97XX_ADCSEL_MASK) + | WM97XX_POLL | WM97XX_DELAY(delay)); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -296,13 +294,14 @@ static int wm9712_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSEL_MASK) != adcsel) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSEL_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -387,16 +386,18 @@ static int wm9712_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) if (rc != RC_VALID) return rc; } else { - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X, &data->x); + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, + &data->x); if (rc != RC_VALID) return rc; - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y, &data->y); + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, + &data->y); if (rc != RC_VALID) return rc; if (pil && !five_wire) { - rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES, + rc = wm9712_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; diff --git a/drivers/input/touchscreen/wm9713.c b/drivers/input/touchscreen/wm9713.c index 73ec99568f12..7405353199d7 100644 --- a/drivers/input/touchscreen/wm9713.c +++ b/drivers/input/touchscreen/wm9713.c @@ -261,8 +261,9 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) { u16 dig1; int timeout = 5 * delay; + bool wants_pen = adcsel & WM97XX_PEN_DOWN; - if (!wm->pen_probably_down) { + if (wants_pen && !wm->pen_probably_down) { u16 data = wm97xx_reg_read(wm, AC97_WM97XX_DIGITISER_RD); if (!(data & WM97XX_PEN_DOWN)) return RC_PENUP; @@ -270,15 +271,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) } /* set up digitiser */ - if (adcsel & 0x8000) - adcsel = 1 << ((adcsel & 0x7fff) + 3); - dig1 = wm97xx_reg_read(wm, AC97_WM9713_DIG1); dig1 &= ~WM9713_ADCSEL_MASK; + /* WM97XX_ADCSEL_* channels need to be converted to WM9713 format */ + dig1 |= 1 << ((adcsel & WM97XX_ADCSEL_MASK) >> 12); if (wm->mach_ops && wm->mach_ops->pre_sample) wm->mach_ops->pre_sample(adcsel); - wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | adcsel | WM9713_POLL); + wm97xx_reg_write(wm, AC97_WM9713_DIG1, dig1 | WM9713_POLL); /* wait 3 AC97 time slots + delay for conversion */ poll_delay(delay); @@ -304,13 +304,14 @@ static int wm9713_poll_sample(struct wm97xx *wm, int adcsel, int *sample) wm->mach_ops->post_sample(adcsel); /* check we have correct sample */ - if ((*sample & WM97XX_ADCSRC_MASK) != ffs(adcsel >> 1) << 12) { - dev_dbg(wm->dev, "adc wrong sample, read %x got %x", adcsel, - *sample & WM97XX_ADCSRC_MASK); + if ((*sample ^ adcsel) & WM97XX_ADCSEL_MASK) { + dev_dbg(wm->dev, "adc wrong sample, wanted %x got %x", + adcsel & WM97XX_ADCSEL_MASK, + *sample & WM97XX_ADCSEL_MASK); return RC_PENUP; } - if (!(*sample & WM97XX_PEN_DOWN)) { + if (wants_pen && !(*sample & WM97XX_PEN_DOWN)) { wm->pen_probably_down = 0; return RC_PENUP; } @@ -400,14 +401,14 @@ static int wm9713_poll_touch(struct wm97xx *wm, struct wm97xx_data *data) if (rc != RC_VALID) return rc; } else { - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_X, &data->x); + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_X | WM97XX_PEN_DOWN, &data->x); if (rc != RC_VALID) return rc; - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_Y, &data->y); + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_Y | WM97XX_PEN_DOWN, &data->y); if (rc != RC_VALID) return rc; if (pil) { - rc = wm9713_poll_sample(wm, WM9713_ADCSEL_PRES, + rc = wm9713_poll_sample(wm, WM97XX_ADCSEL_PRES | WM97XX_PEN_DOWN, &data->p); if (rc != RC_VALID) return rc; diff --git a/drivers/input/touchscreen/zylonite-wm97xx.c b/drivers/input/touchscreen/zylonite-wm97xx.c index 5b0f15ec874a..f6328c0cded6 100644 --- a/drivers/input/touchscreen/zylonite-wm97xx.c +++ b/drivers/input/touchscreen/zylonite-wm97xx.c @@ -122,9 +122,9 @@ static int wm97xx_acc_pen_down(struct wm97xx *wm) x, y, p); /* are samples valid */ - if ((x & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_X || - (y & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_Y || - (p & WM97XX_ADCSRC_MASK) != WM97XX_ADCSEL_PRES) + if ((x & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_X || + (y & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_Y || + (p & WM97XX_ADCSEL_MASK) != WM97XX_ADCSEL_PRES) goto up; /* coordinate is good */ diff --git a/drivers/isdn/i4l/isdn_net.c b/drivers/isdn/i4l/isdn_net.c index 48e9cc0369b1..1f73d7f7e024 100644 --- a/drivers/isdn/i4l/isdn_net.c +++ b/drivers/isdn/i4l/isdn_net.c @@ -2532,6 +2532,9 @@ static void _isdn_setup(struct net_device *dev) /* Setup the generic properties */ dev->flags = IFF_NOARP|IFF_POINTOPOINT; + + /* isdn prepends a header in the tx path, can't share skbs */ + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->header_ops = NULL; dev->netdev_ops = &isdn_netdev_ops; diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 574b09afedd3..0dc6546b77a8 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -29,7 +29,6 @@ #include "md.h" #include "bitmap.h" -#include <linux/dm-dirty-log.h> /* debug macros */ #define DEBUG 0 @@ -775,10 +774,8 @@ static inline unsigned long file_page_offset(struct bitmap *bitmap, unsigned lon * 0 or page 1 */ static inline struct page *filemap_get_page(struct bitmap *bitmap, - unsigned long chunk) + unsigned long chunk) { - if (bitmap->filemap == NULL) - return NULL; if (file_page_index(bitmap, chunk) >= bitmap->file_pages) return NULL; return bitmap->filemap[file_page_index(bitmap, chunk) @@ -878,28 +875,19 @@ enum bitmap_page_attr { static inline void set_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - __set_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __set_bit(attr, &bitmap->logattrs); + __set_bit((page->index<<2) + attr, bitmap->filemap_attr); } static inline void clear_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - __clear_bit(attr, &bitmap->logattrs); + __clear_bit((page->index<<2) + attr, bitmap->filemap_attr); } static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *page, enum bitmap_page_attr attr) { - if (page) - return test_bit((page->index<<2) + attr, bitmap->filemap_attr); - else - return test_bit(attr, &bitmap->logattrs); + return test_bit((page->index<<2) + attr, bitmap->filemap_attr); } /* @@ -912,30 +900,26 @@ static inline unsigned long test_page_attr(struct bitmap *bitmap, struct page *p static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) { unsigned long bit; - struct page *page = NULL; + struct page *page; void *kaddr; unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); - if (!bitmap->filemap) { - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - if (log) - log->type->mark_region(log, chunk); - } else { + if (!bitmap->filemap) + return; - page = filemap_get_page(bitmap, chunk); - if (!page) - return; - bit = file_page_offset(bitmap, chunk); + page = filemap_get_page(bitmap, chunk); + if (!page) + return; + bit = file_page_offset(bitmap, chunk); - /* set the bit */ - kaddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - set_bit(bit, kaddr); - else - __test_and_set_bit_le(bit, kaddr); - kunmap_atomic(kaddr, KM_USER0); - PRINTK("set file bit %lu page %lu\n", bit, page->index); - } + /* set the bit */ + kaddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + set_bit(bit, kaddr); + else + __set_bit_le(bit, kaddr); + kunmap_atomic(kaddr, KM_USER0); + PRINTK("set file bit %lu page %lu\n", bit, page->index); /* record page number so it gets flushed to disk when unplug occurs */ set_page_attr(bitmap, page, BITMAP_PAGE_DIRTY); } @@ -952,16 +936,6 @@ void bitmap_unplug(struct bitmap *bitmap) if (!bitmap) return; - if (!bitmap->filemap) { - /* Must be using a dirty_log */ - struct dm_dirty_log *log = bitmap->mddev->bitmap_info.log; - dirty = test_and_clear_bit(BITMAP_PAGE_DIRTY, &bitmap->logattrs); - need_write = test_and_clear_bit(BITMAP_PAGE_NEEDWRITE, &bitmap->logattrs); - if (dirty || need_write) - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; - goto out; - } /* look at each page to see if there are any set bits that need to be * flushed out to disk */ @@ -990,7 +964,6 @@ void bitmap_unplug(struct bitmap *bitmap) else md_super_wait(bitmap->mddev); } -out: if (bitmap->flags & BITMAP_WRITE_ERROR) bitmap_file_kick(bitmap); } @@ -1199,7 +1172,6 @@ void bitmap_daemon_work(mddev_t *mddev) struct page *page = NULL, *lastpage = NULL; sector_t blocks; void *paddr; - struct dm_dirty_log *log = mddev->bitmap_info.log; /* Use a mutex to guard daemon_work against * bitmap_destroy. @@ -1224,12 +1196,11 @@ void bitmap_daemon_work(mddev_t *mddev) spin_lock_irqsave(&bitmap->lock, flags); for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; - if (!bitmap->filemap) { - if (!log) - /* error or shutdown */ - break; - } else - page = filemap_get_page(bitmap, j); + if (!bitmap->filemap) + /* error or shutdown */ + break; + + page = filemap_get_page(bitmap, j); if (page != lastpage) { /* skip this page unless it's marked as needing cleaning */ @@ -1298,17 +1269,16 @@ void bitmap_daemon_work(mddev_t *mddev) -1); /* clear the bit */ - if (page) { - paddr = kmap_atomic(page, KM_USER0); - if (bitmap->flags & BITMAP_HOSTENDIAN) - clear_bit(file_page_offset(bitmap, j), - paddr); - else - __test_and_clear_bit_le(file_page_offset(bitmap, j), - paddr); - kunmap_atomic(paddr, KM_USER0); - } else - log->type->clear_region(log, j); + paddr = kmap_atomic(page, KM_USER0); + if (bitmap->flags & BITMAP_HOSTENDIAN) + clear_bit(file_page_offset(bitmap, j), + paddr); + else + __clear_bit_le( + file_page_offset(bitmap, + j), + paddr); + kunmap_atomic(paddr, KM_USER0); } } else j |= PAGE_COUNTER_MASK; @@ -1316,16 +1286,12 @@ void bitmap_daemon_work(mddev_t *mddev) spin_unlock_irqrestore(&bitmap->lock, flags); /* now sync the final page */ - if (lastpage != NULL || log != NULL) { + if (lastpage != NULL) { spin_lock_irqsave(&bitmap->lock, flags); if (test_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE)) { clear_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); - if (lastpage) - write_page(bitmap, lastpage, 0); - else - if (log->type->flush(log)) - bitmap->flags |= BITMAP_WRITE_ERROR; + write_page(bitmap, lastpage, 0); } else { set_page_attr(bitmap, lastpage, BITMAP_PAGE_NEEDWRITE); spin_unlock_irqrestore(&bitmap->lock, flags); @@ -1767,12 +1733,10 @@ int bitmap_create(mddev_t *mddev) BUILD_BUG_ON(sizeof(bitmap_super_t) != 256); if (!file - && !mddev->bitmap_info.offset - && !mddev->bitmap_info.log) /* bitmap disabled, nothing to do */ + && !mddev->bitmap_info.offset) /* bitmap disabled, nothing to do */ return 0; BUG_ON(file && mddev->bitmap_info.offset); - BUG_ON(mddev->bitmap_info.offset && mddev->bitmap_info.log); bitmap = kzalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) @@ -1863,6 +1827,7 @@ int bitmap_create(mddev_t *mddev) int bitmap_load(mddev_t *mddev) { int err = 0; + sector_t start = 0; sector_t sector = 0; struct bitmap *bitmap = mddev->bitmap; @@ -1881,24 +1846,14 @@ int bitmap_load(mddev_t *mddev) } bitmap_close_sync(bitmap); - if (mddev->bitmap_info.log) { - unsigned long i; - struct dm_dirty_log *log = mddev->bitmap_info.log; - for (i = 0; i < bitmap->chunks; i++) - if (!log->type->in_sync(log, i, 1)) - bitmap_set_memory_bits(bitmap, - (sector_t)i << CHUNK_BLOCK_SHIFT(bitmap), - 1); - } else { - sector_t start = 0; - if (mddev->degraded == 0 - || bitmap->events_cleared == mddev->events) - /* no need to keep dirty bits to optimise a - * re-add of a missing device */ - start = mddev->recovery_cp; - - err = bitmap_init_from_disk(bitmap, start); - } + if (mddev->degraded == 0 + || bitmap->events_cleared == mddev->events) + /* no need to keep dirty bits to optimise a + * re-add of a missing device */ + start = mddev->recovery_cp; + + err = bitmap_init_from_disk(bitmap, start); + if (err) goto out; diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h index b2a127e891ac..a28f2e5588c6 100644 --- a/drivers/md/bitmap.h +++ b/drivers/md/bitmap.h @@ -212,10 +212,6 @@ struct bitmap { unsigned long file_pages; /* number of pages in the file */ int last_page_size; /* bytes in the last page */ - unsigned long logattrs; /* used when filemap_attr doesn't exist - * because we are working with a dirty_log - */ - unsigned long flags; int allclean; @@ -237,7 +233,6 @@ struct bitmap { wait_queue_head_t behind_wait; struct sysfs_dirent *sysfs_can_clear; - }; /* the bitmap API */ diff --git a/drivers/md/md.c b/drivers/md/md.c index dfc9425db70b..8e221a20f5d9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -215,6 +215,55 @@ struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, } EXPORT_SYMBOL_GPL(bio_clone_mddev); +void md_trim_bio(struct bio *bio, int offset, int size) +{ + /* 'bio' is a cloned bio which we need to trim to match + * the given offset and size. + * This requires adjusting bi_sector, bi_size, and bi_io_vec + */ + int i; + struct bio_vec *bvec; + int sofar = 0; + + size <<= 9; + if (offset == 0 && size == bio->bi_size) + return; + + bio->bi_sector += offset; + bio->bi_size = size; + offset <<= 9; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + + while (bio->bi_idx < bio->bi_vcnt && + bio->bi_io_vec[bio->bi_idx].bv_len <= offset) { + /* remove this whole bio_vec */ + offset -= bio->bi_io_vec[bio->bi_idx].bv_len; + bio->bi_idx++; + } + if (bio->bi_idx < bio->bi_vcnt) { + bio->bi_io_vec[bio->bi_idx].bv_offset += offset; + bio->bi_io_vec[bio->bi_idx].bv_len -= offset; + } + /* avoid any complications with bi_idx being non-zero*/ + if (bio->bi_idx) { + memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx, + (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec)); + bio->bi_vcnt -= bio->bi_idx; + bio->bi_idx = 0; + } + /* Make sure vcnt and last bv are not too big */ + bio_for_each_segment(bvec, bio, i) { + if (sofar + bvec->bv_len > size) + bvec->bv_len = size - sofar; + if (bvec->bv_len == 0) { + bio->bi_vcnt = i; + break; + } + sofar += bvec->bv_len; + } +} +EXPORT_SYMBOL_GPL(md_trim_bio); + /* * We have a system wide 'event count' that is incremented * on any 'interesting' event, and readers of /proc/mdstat @@ -757,6 +806,10 @@ static void free_disk_sb(mdk_rdev_t * rdev) rdev->sb_start = 0; rdev->sectors = 0; } + if (rdev->bb_page) { + put_page(rdev->bb_page); + rdev->bb_page = NULL; + } } @@ -1025,7 +1078,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = -EINVAL; bdevname(rdev->bdev, b); - sb = (mdp_super_t*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); if (sb->md_magic != MD_SB_MAGIC) { printk(KERN_ERR "md: invalid raid superblock magic on %s\n", @@ -1054,6 +1107,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; rdev->sb_size = MD_SB_BYTES; + rdev->badblocks.shift = -1; if (sb->level == LEVEL_MULTIPATH) rdev->desc_nr = -1; @@ -1064,7 +1118,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version ret = 1; } else { __u64 ev1, ev2; - mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page); + mdp_super_t *refsb = page_address(refdev->sb_page); if (!uuid_equal(refsb, sb)) { printk(KERN_WARNING "md: %s has different UUID to %s\n", b, bdevname(refdev->bdev,b2)); @@ -1099,7 +1153,7 @@ static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) { mdp_disk_t *desc; - mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page); + mdp_super_t *sb = page_address(rdev->sb_page); __u64 ev1 = md_event(sb); rdev->raid_disk = -1; @@ -1230,7 +1284,7 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) rdev->sb_size = MD_SB_BYTES; - sb = (mdp_super_t*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); memset(sb, 0, sizeof(*sb)); @@ -1395,6 +1449,8 @@ static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb) return cpu_to_le32(csum); } +static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, + int acknowledged); static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) { struct mdp_superblock_1 *sb; @@ -1435,7 +1491,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) if (ret) return ret; - sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); if (sb->magic != cpu_to_le32(MD_SB_MAGIC) || sb->major_version != cpu_to_le32(1) || @@ -1473,12 +1529,52 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) else rdev->desc_nr = le32_to_cpu(sb->dev_number); + if (!rdev->bb_page) { + rdev->bb_page = alloc_page(GFP_KERNEL); + if (!rdev->bb_page) + return -ENOMEM; + } + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) && + rdev->badblocks.count == 0) { + /* need to load the bad block list. + * Currently we limit it to one page. + */ + s32 offset; + sector_t bb_sector; + u64 *bbp; + int i; + int sectors = le16_to_cpu(sb->bblog_size); + if (sectors > (PAGE_SIZE / 512)) + return -EINVAL; + offset = le32_to_cpu(sb->bblog_offset); + if (offset == 0) + return -EINVAL; + bb_sector = (long long)offset; + if (!sync_page_io(rdev, bb_sector, sectors << 9, + rdev->bb_page, READ, true)) + return -EIO; + bbp = (u64 *)page_address(rdev->bb_page); + rdev->badblocks.shift = sb->bblog_shift; + for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) { + u64 bb = le64_to_cpu(*bbp); + int count = bb & (0x3ff); + u64 sector = bb >> 10; + sector <<= sb->bblog_shift; + count <<= sb->bblog_shift; + if (bb + 1 == 0) + break; + if (md_set_badblocks(&rdev->badblocks, + sector, count, 1) == 0) + return -EINVAL; + } + } else if (sb->bblog_offset == 0) + rdev->badblocks.shift = -1; + if (!refdev) { ret = 1; } else { __u64 ev1, ev2; - struct mdp_superblock_1 *refsb = - (struct mdp_superblock_1*)page_address(refdev->sb_page); + struct mdp_superblock_1 *refsb = page_address(refdev->sb_page); if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 || sb->level != refsb->level || @@ -1513,7 +1609,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version) static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) { - struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + struct mdp_superblock_1 *sb = page_address(rdev->sb_page); __u64 ev1 = le64_to_cpu(sb->events); rdev->raid_disk = -1; @@ -1619,13 +1715,12 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) int max_dev, i; /* make rdev->sb match mddev and rdev data. */ - sb = (struct mdp_superblock_1*)page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); sb->feature_map = 0; sb->pad0 = 0; sb->recovery_offset = cpu_to_le64(0); memset(sb->pad1, 0, sizeof(sb->pad1)); - memset(sb->pad2, 0, sizeof(sb->pad2)); memset(sb->pad3, 0, sizeof(sb->pad3)); sb->utime = cpu_to_le64((__u64)mddev->utime); @@ -1665,6 +1760,40 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors); } + if (rdev->badblocks.count == 0) + /* Nothing to do for bad blocks*/ ; + else if (sb->bblog_offset == 0) + /* Cannot record bad blocks on this device */ + md_error(mddev, rdev); + else { + struct badblocks *bb = &rdev->badblocks; + u64 *bbp = (u64 *)page_address(rdev->bb_page); + u64 *p = bb->page; + sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS); + if (bb->changed) { + unsigned seq; + +retry: + seq = read_seqbegin(&bb->lock); + + memset(bbp, 0xff, PAGE_SIZE); + + for (i = 0 ; i < bb->count ; i++) { + u64 internal_bb = *p++; + u64 store_bb = ((BB_OFFSET(internal_bb) << 10) + | BB_LEN(internal_bb)); + *bbp++ = cpu_to_le64(store_bb); + } + if (read_seqretry(&bb->lock, seq)) + goto retry; + + bb->sector = (rdev->sb_start + + (int)le32_to_cpu(sb->bblog_offset)); + bb->size = le16_to_cpu(sb->bblog_size); + bb->changed = 0; + } + } + max_dev = 0; list_for_each_entry(rdev2, &mddev->disks, same_set) if (rdev2->desc_nr+1 > max_dev) @@ -1724,7 +1853,7 @@ super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors) num_sectors = max_sectors; rdev->sb_start = sb_start; } - sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page); + sb = page_address(rdev->sb_page); sb->data_size = cpu_to_le64(num_sectors); sb->super_offset = rdev->sb_start; sb->sb_csum = calc_sb_1_csum(sb); @@ -1922,7 +2051,7 @@ static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev) bd_link_disk_holder(rdev->bdev, mddev->gendisk); /* May as well allow recovery to be retried once */ - mddev->recovery_disabled = 0; + mddev->recovery_disabled++; return 0; @@ -1953,6 +2082,9 @@ static void unbind_rdev_from_array(mdk_rdev_t * rdev) sysfs_remove_link(&rdev->kobj, "block"); sysfs_put(rdev->sysfs_state); rdev->sysfs_state = NULL; + kfree(rdev->badblocks.page); + rdev->badblocks.count = 0; + rdev->badblocks.page = NULL; /* We need to delay this, otherwise we can deadlock when * writing to 'remove' to "dev/state". We also need * to delay it due to rcu usage. @@ -2127,10 +2259,10 @@ static void print_rdev(mdk_rdev_t *rdev, int major_version) printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version); switch (major_version) { case 0: - print_sb_90((mdp_super_t*)page_address(rdev->sb_page)); + print_sb_90(page_address(rdev->sb_page)); break; case 1: - print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page)); + print_sb_1(page_address(rdev->sb_page)); break; } } else @@ -2194,6 +2326,7 @@ static void md_update_sb(mddev_t * mddev, int force_change) mdk_rdev_t *rdev; int sync_req; int nospares = 0; + int any_badblocks_changed = 0; repeat: /* First make sure individual recovery_offsets are correct */ @@ -2208,8 +2341,18 @@ repeat: if (!mddev->persistent) { clear_bit(MD_CHANGE_CLEAN, &mddev->flags); clear_bit(MD_CHANGE_DEVS, &mddev->flags); - if (!mddev->external) + if (!mddev->external) { clear_bit(MD_CHANGE_PENDING, &mddev->flags); + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->badblocks.changed) { + md_ack_all_badblocks(&rdev->badblocks); + md_error(mddev, rdev); + } + clear_bit(Blocked, &rdev->flags); + clear_bit(BlockedBadBlocks, &rdev->flags); + wake_up(&rdev->blocked_wait); + } + } wake_up(&mddev->sb_wait); return; } @@ -2265,6 +2408,14 @@ repeat: MD_BUG(); mddev->events --; } + + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (rdev->badblocks.changed) + any_badblocks_changed++; + if (test_bit(Faulty, &rdev->flags)) + set_bit(FaultRecorded, &rdev->flags); + } + sync_sbs(mddev, nospares); spin_unlock_irq(&mddev->write_lock); @@ -2290,6 +2441,13 @@ repeat: bdevname(rdev->bdev,b), (unsigned long long)rdev->sb_start); rdev->sb_events = mddev->events; + if (rdev->badblocks.size) { + md_super_write(mddev, rdev, + rdev->badblocks.sector, + rdev->badblocks.size << 9, + rdev->bb_page); + rdev->badblocks.size = 0; + } } else dprintk(")\n"); @@ -2313,6 +2471,15 @@ repeat: if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) sysfs_notify(&mddev->kobj, NULL, "sync_completed"); + list_for_each_entry(rdev, &mddev->disks, same_set) { + if (test_and_clear_bit(FaultRecorded, &rdev->flags)) + clear_bit(Blocked, &rdev->flags); + + if (any_badblocks_changed) + md_ack_all_badblocks(&rdev->badblocks); + clear_bit(BlockedBadBlocks, &rdev->flags); + wake_up(&rdev->blocked_wait); + } } /* words written to sysfs files may, or may not, be \n terminated. @@ -2347,7 +2514,8 @@ state_show(mdk_rdev_t *rdev, char *page) char *sep = ""; size_t len = 0; - if (test_bit(Faulty, &rdev->flags)) { + if (test_bit(Faulty, &rdev->flags) || + rdev->badblocks.unacked_exist) { len+= sprintf(page+len, "%sfaulty",sep); sep = ","; } @@ -2359,7 +2527,8 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%swrite_mostly",sep); sep = ","; } - if (test_bit(Blocked, &rdev->flags)) { + if (test_bit(Blocked, &rdev->flags) || + rdev->badblocks.unacked_exist) { len += sprintf(page+len, "%sblocked", sep); sep = ","; } @@ -2368,6 +2537,10 @@ state_show(mdk_rdev_t *rdev, char *page) len += sprintf(page+len, "%sspare", sep); sep = ","; } + if (test_bit(WriteErrorSeen, &rdev->flags)) { + len += sprintf(page+len, "%swrite_error", sep); + sep = ","; + } return len+sprintf(page+len, "\n"); } @@ -2375,13 +2548,15 @@ static ssize_t state_store(mdk_rdev_t *rdev, const char *buf, size_t len) { /* can write - * faulty - simulates and error + * faulty - simulates an error * remove - disconnects the device * writemostly - sets write_mostly * -writemostly - clears write_mostly - * blocked - sets the Blocked flag - * -blocked - clears the Blocked flag + * blocked - sets the Blocked flags + * -blocked - clears the Blocked and possibly simulates an error * insync - sets Insync providing device isn't active + * write_error - sets WriteErrorSeen + * -write_error - clears WriteErrorSeen */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2408,7 +2583,15 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) set_bit(Blocked, &rdev->flags); err = 0; } else if (cmd_match(buf, "-blocked")) { + if (!test_bit(Faulty, &rdev->flags) && + test_bit(BlockedBadBlocks, &rdev->flags)) { + /* metadata handler doesn't understand badblocks, + * so we need to fail the device + */ + md_error(rdev->mddev, rdev); + } clear_bit(Blocked, &rdev->flags); + clear_bit(BlockedBadBlocks, &rdev->flags); wake_up(&rdev->blocked_wait); set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); @@ -2417,6 +2600,12 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { set_bit(In_sync, &rdev->flags); err = 0; + } else if (cmd_match(buf, "write_error")) { + set_bit(WriteErrorSeen, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-write_error")) { + clear_bit(WriteErrorSeen, &rdev->flags); + err = 0; } if (!err) sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -2459,7 +2648,6 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) { char *e; int err; - char nm[20]; int slot = simple_strtoul(buf, &e, 10); if (strncmp(buf, "none", 4)==0) slot = -1; @@ -2482,8 +2670,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) hot_remove_disk(rdev->mddev, rdev->raid_disk); if (err) return err; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&rdev->mddev->kobj, nm); + sysfs_unlink_rdev(rdev->mddev, rdev); rdev->raid_disk = -1; set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); @@ -2522,8 +2709,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) return err; } else sysfs_notify_dirent_safe(rdev->sysfs_state); - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&rdev->mddev->kobj, &rdev->kobj, nm)) + if (sysfs_link_rdev(rdev->mddev, rdev)) /* failure here is OK */; /* don't wakeup anyone, leave that to userspace. */ } else { @@ -2712,6 +2898,39 @@ static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t le static struct rdev_sysfs_entry rdev_recovery_start = __ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store); + +static ssize_t +badblocks_show(struct badblocks *bb, char *page, int unack); +static ssize_t +badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack); + +static ssize_t bb_show(mdk_rdev_t *rdev, char *page) +{ + return badblocks_show(&rdev->badblocks, page, 0); +} +static ssize_t bb_store(mdk_rdev_t *rdev, const char *page, size_t len) +{ + int rv = badblocks_store(&rdev->badblocks, page, len, 0); + /* Maybe that ack was all we needed */ + if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags)) + wake_up(&rdev->blocked_wait); + return rv; +} +static struct rdev_sysfs_entry rdev_bad_blocks = +__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store); + + +static ssize_t ubb_show(mdk_rdev_t *rdev, char *page) +{ + return badblocks_show(&rdev->badblocks, page, 1); +} +static ssize_t ubb_store(mdk_rdev_t *rdev, const char *page, size_t len) +{ + return badblocks_store(&rdev->badblocks, page, len, 1); +} +static struct rdev_sysfs_entry rdev_unack_bad_blocks = +__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store); + static struct attribute *rdev_default_attrs[] = { &rdev_state.attr, &rdev_errors.attr, @@ -2719,6 +2938,8 @@ static struct attribute *rdev_default_attrs[] = { &rdev_offset.attr, &rdev_size.attr, &rdev_recovery_start.attr, + &rdev_bad_blocks.attr, + &rdev_unack_bad_blocks.attr, NULL, }; static ssize_t @@ -2782,7 +3003,7 @@ static struct kobj_type rdev_ktype = { .default_attrs = rdev_default_attrs, }; -void md_rdev_init(mdk_rdev_t *rdev) +int md_rdev_init(mdk_rdev_t *rdev) { rdev->desc_nr = -1; rdev->saved_raid_disk = -1; @@ -2792,12 +3013,27 @@ void md_rdev_init(mdk_rdev_t *rdev) rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; + rdev->sb_loaded = 0; + rdev->bb_page = NULL; atomic_set(&rdev->nr_pending, 0); atomic_set(&rdev->read_errors, 0); atomic_set(&rdev->corrected_errors, 0); INIT_LIST_HEAD(&rdev->same_set); init_waitqueue_head(&rdev->blocked_wait); + + /* Add space to store bad block list. + * This reserves the space even on arrays where it cannot + * be used - I wonder if that matters + */ + rdev->badblocks.count = 0; + rdev->badblocks.shift = 0; + rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL); + seqlock_init(&rdev->badblocks.lock); + if (rdev->badblocks.page == NULL) + return -ENOMEM; + + return 0; } EXPORT_SYMBOL_GPL(md_rdev_init); /* @@ -2823,8 +3059,11 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi return ERR_PTR(-ENOMEM); } - md_rdev_init(rdev); - if ((err = alloc_disk_sb(rdev))) + err = md_rdev_init(rdev); + if (err) + goto abort_free; + err = alloc_disk_sb(rdev); + if (err) goto abort_free; err = lock_rdev(rdev, newdev, super_format == -2); @@ -2860,15 +3099,17 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_mi goto abort_free; } } + if (super_format == -1) + /* hot-add for 0.90, or non-persistent: so no badblocks */ + rdev->badblocks.shift = -1; return rdev; abort_free: - if (rdev->sb_page) { - if (rdev->bdev) - unlock_rdev(rdev); - free_disk_sb(rdev); - } + if (rdev->bdev) + unlock_rdev(rdev); + free_disk_sb(rdev); + kfree(rdev->badblocks.page); kfree(rdev); return ERR_PTR(err); } @@ -3149,15 +3390,13 @@ level_store(mddev_t *mddev, const char *buf, size_t len) } list_for_each_entry(rdev, &mddev->disks, same_set) { - char nm[20]; if (rdev->raid_disk < 0) continue; if (rdev->new_raid_disk >= mddev->raid_disks) rdev->new_raid_disk = -1; if (rdev->new_raid_disk == rdev->raid_disk) continue; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); } list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk < 0) @@ -3168,11 +3407,10 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (rdev->raid_disk < 0) clear_bit(In_sync, &rdev->flags); else { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if(sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) - printk("md: cannot register %s for %s after level change\n", - nm, mdname(mddev)); + if (sysfs_link_rdev(mddev, rdev)) + printk(KERN_WARNING "md: cannot register rd%d" + " for %s after level change\n", + rdev->raid_disk, mdname(mddev)); } } @@ -4504,7 +4742,8 @@ int md_run(mddev_t *mddev) } if (mddev->bio_set == NULL) - mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev)); + mddev->bio_set = bioset_create(BIO_POOL_SIZE, + sizeof(mddev_t *)); spin_lock(&pers_lock); pers = find_pers(mddev->level, mddev->clevel); @@ -4621,12 +4860,9 @@ int md_run(mddev_t *mddev) smp_wmb(); mddev->ready = 1; list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, &rdev->kobj, nm)) + if (rdev->raid_disk >= 0) + if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; - } set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -4854,11 +5090,8 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) sysfs_notify_dirent_safe(mddev->sysfs_state); list_for_each_entry(rdev, &mddev->disks, same_set) - if (rdev->raid_disk >= 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - } + if (rdev->raid_disk >= 0) + sysfs_unlink_rdev(mddev, rdev); set_capacity(disk, 0); mutex_unlock(&mddev->open_mutex); @@ -6198,18 +6431,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) if (!rdev || test_bit(Faulty, &rdev->flags)) return; - if (mddev->external) - set_bit(Blocked, &rdev->flags); -/* - dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n", - mdname(mddev), - MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev), - __builtin_return_address(0),__builtin_return_address(1), - __builtin_return_address(2),__builtin_return_address(3)); -*/ - if (!mddev->pers) - return; - if (!mddev->pers->error_handler) + if (!mddev->pers || !mddev->pers->error_handler) return; mddev->pers->error_handler(mddev,rdev); if (mddev->degraded) @@ -6933,11 +7155,14 @@ void md_do_sync(mddev_t *mddev) atomic_add(sectors, &mddev->recovery_active); } + if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) + break; + j += sectors; if (j>1) mddev->curr_resync = j; mddev->curr_mark_cnt = io_sectors; if (last_check == 0) - /* this is the earliers that rebuilt will be + /* this is the earliest that rebuild will be * visible in /proc/mdstat */ md_new_event(mddev); @@ -6946,10 +7171,6 @@ void md_do_sync(mddev_t *mddev) continue; last_check = io_sectors; - - if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) - break; - repeat: if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) { /* step marks */ @@ -7067,29 +7288,23 @@ static int remove_and_add_spares(mddev_t *mddev) atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } - if (mddev->degraded && !mddev->recovery_disabled) { + if (mddev->degraded) { list_for_each_entry(rdev, &mddev->disks, same_set) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && - !test_bit(Faulty, &rdev->flags) && - !test_bit(Blocked, &rdev->flags)) + !test_bit(Faulty, &rdev->flags)) spares++; if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { rdev->recovery_offset = 0; if (mddev->pers-> hot_add_disk(mddev, rdev) == 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + if (sysfs_link_rdev(mddev, rdev)) /* failure here is OK */; spares++; md_new_event(mddev); @@ -7138,6 +7353,8 @@ static void reap_sync_thread(mddev_t *mddev) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); sysfs_notify_dirent_safe(mddev->sysfs_action); md_new_event(mddev); + if (mddev->event_work.func) + queue_work(md_misc_wq, &mddev->event_work); } /* @@ -7170,9 +7387,6 @@ void md_check_recovery(mddev_t *mddev) if (mddev->bitmap) bitmap_daemon_work(mddev); - if (mddev->ro) - return; - if (signal_pending(current)) { if (mddev->pers->sync_request && !mddev->external) { printk(KERN_INFO "md: %s in immediate safe mode\n", @@ -7209,9 +7423,7 @@ void md_check_recovery(mddev_t *mddev) atomic_read(&rdev->nr_pending)==0) { if (mddev->pers->hot_remove_disk( mddev, rdev->raid_disk)==0) { - char nm[20]; - sprintf(nm,"rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } @@ -7331,12 +7543,499 @@ void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev) { sysfs_notify_dirent_safe(rdev->sysfs_state); wait_event_timeout(rdev->blocked_wait, - !test_bit(Blocked, &rdev->flags), + !test_bit(Blocked, &rdev->flags) && + !test_bit(BlockedBadBlocks, &rdev->flags), msecs_to_jiffies(5000)); rdev_dec_pending(rdev, mddev); } EXPORT_SYMBOL(md_wait_for_blocked_rdev); + +/* Bad block management. + * We can record which blocks on each device are 'bad' and so just + * fail those blocks, or that stripe, rather than the whole device. + * Entries in the bad-block table are 64bits wide. This comprises: + * Length of bad-range, in sectors: 0-511 for lengths 1-512 + * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes) + * A 'shift' can be set so that larger blocks are tracked and + * consequently larger devices can be covered. + * 'Acknowledged' flag - 1 bit. - the most significant bit. + * + * Locking of the bad-block table uses a seqlock so md_is_badblock + * might need to retry if it is very unlucky. + * We will sometimes want to check for bad blocks in a bi_end_io function, + * so we use the write_seqlock_irq variant. + * + * When looking for a bad block we specify a range and want to + * know if any block in the range is bad. So we binary-search + * to the last range that starts at-or-before the given endpoint, + * (or "before the sector after the target range") + * then see if it ends after the given start. + * We return + * 0 if there are no known bad blocks in the range + * 1 if there are known bad block which are all acknowledged + * -1 if there are bad blocks which have not yet been acknowledged in metadata. + * plus the start/length of the first bad section we overlap. + */ +int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors) +{ + int hi; + int lo = 0; + u64 *p = bb->page; + int rv = 0; + sector_t target = s + sectors; + unsigned seq; + + if (bb->shift > 0) { + /* round the start down, and the end up */ + s >>= bb->shift; + target += (1<<bb->shift) - 1; + target >>= bb->shift; + sectors = target - s; + } + /* 'target' is now the first block after the bad range */ + +retry: + seq = read_seqbegin(&bb->lock); + + hi = bb->count; + + /* Binary search between lo and hi for 'target' + * i.e. for the last range that starts before 'target' + */ + /* INVARIANT: ranges before 'lo' and at-or-after 'hi' + * are known not to be the last range before target. + * VARIANT: hi-lo is the number of possible + * ranges, and decreases until it reaches 1 + */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a < target) + /* This could still be the one, earlier ranges + * could not. */ + lo = mid; + else + /* This and later ranges are definitely out. */ + hi = mid; + } + /* 'lo' might be the last that started before target, but 'hi' isn't */ + if (hi > lo) { + /* need to check all range that end after 's' to see if + * any are unacknowledged. + */ + while (lo >= 0 && + BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { + if (BB_OFFSET(p[lo]) < target) { + /* starts before the end, and finishes after + * the start, so they must overlap + */ + if (rv != -1 && BB_ACK(p[lo])) + rv = 1; + else + rv = -1; + *first_bad = BB_OFFSET(p[lo]); + *bad_sectors = BB_LEN(p[lo]); + } + lo--; + } + } + + if (read_seqretry(&bb->lock, seq)) + goto retry; + + return rv; +} +EXPORT_SYMBOL_GPL(md_is_badblock); + +/* + * Add a range of bad blocks to the table. + * This might extend the table, or might contract it + * if two adjacent ranges can be merged. + * We binary-search to find the 'insertion' point, then + * decide how best to handle it. + */ +static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, + int acknowledged) +{ + u64 *p; + int lo, hi; + int rv = 1; + + if (bb->shift < 0) + /* badblocks are disabled */ + return 0; + + if (bb->shift) { + /* round the start down, and the end up */ + sector_t next = s + sectors; + s >>= bb->shift; + next += (1<<bb->shift) - 1; + next >>= bb->shift; + sectors = next - s; + } + + write_seqlock_irq(&bb->lock); + + p = bb->page; + lo = 0; + hi = bb->count; + /* Find the last range that starts at-or-before 's' */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a <= s) + lo = mid; + else + hi = mid; + } + if (hi > lo && BB_OFFSET(p[lo]) > s) + hi = lo; + + if (hi > lo) { + /* we found a range that might merge with the start + * of our new range + */ + sector_t a = BB_OFFSET(p[lo]); + sector_t e = a + BB_LEN(p[lo]); + int ack = BB_ACK(p[lo]); + if (e >= s) { + /* Yes, we can merge with a previous range */ + if (s == a && s + sectors >= e) + /* new range covers old */ + ack = acknowledged; + else + ack = ack && acknowledged; + + if (e < s + sectors) + e = s + sectors; + if (e - a <= BB_MAX_LEN) { + p[lo] = BB_MAKE(a, e-a, ack); + s = e; + } else { + /* does not all fit in one range, + * make p[lo] maximal + */ + if (BB_LEN(p[lo]) != BB_MAX_LEN) + p[lo] = BB_MAKE(a, BB_MAX_LEN, ack); + s = a + BB_MAX_LEN; + } + sectors = e - s; + } + } + if (sectors && hi < bb->count) { + /* 'hi' points to the first range that starts after 's'. + * Maybe we can merge with the start of that range */ + sector_t a = BB_OFFSET(p[hi]); + sector_t e = a + BB_LEN(p[hi]); + int ack = BB_ACK(p[hi]); + if (a <= s + sectors) { + /* merging is possible */ + if (e <= s + sectors) { + /* full overlap */ + e = s + sectors; + ack = acknowledged; + } else + ack = ack && acknowledged; + + a = s; + if (e - a <= BB_MAX_LEN) { + p[hi] = BB_MAKE(a, e-a, ack); + s = e; + } else { + p[hi] = BB_MAKE(a, BB_MAX_LEN, ack); + s = a + BB_MAX_LEN; + } + sectors = e - s; + lo = hi; + hi++; + } + } + if (sectors == 0 && hi < bb->count) { + /* we might be able to combine lo and hi */ + /* Note: 's' is at the end of 'lo' */ + sector_t a = BB_OFFSET(p[hi]); + int lolen = BB_LEN(p[lo]); + int hilen = BB_LEN(p[hi]); + int newlen = lolen + hilen - (s - a); + if (s >= a && newlen < BB_MAX_LEN) { + /* yes, we can combine them */ + int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]); + p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack); + memmove(p + hi, p + hi + 1, + (bb->count - hi - 1) * 8); + bb->count--; + } + } + while (sectors) { + /* didn't merge (it all). + * Need to add a range just before 'hi' */ + if (bb->count >= MD_MAX_BADBLOCKS) { + /* No room for more */ + rv = 0; + break; + } else { + int this_sectors = sectors; + memmove(p + hi + 1, p + hi, + (bb->count - hi) * 8); + bb->count++; + + if (this_sectors > BB_MAX_LEN) + this_sectors = BB_MAX_LEN; + p[hi] = BB_MAKE(s, this_sectors, acknowledged); + sectors -= this_sectors; + s += this_sectors; + } + } + + bb->changed = 1; + if (!acknowledged) + bb->unacked_exist = 1; + write_sequnlock_irq(&bb->lock); + + return rv; +} + +int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors, + int acknowledged) +{ + int rv = md_set_badblocks(&rdev->badblocks, + s + rdev->data_offset, sectors, acknowledged); + if (rv) { + /* Make sure they get written out promptly */ + set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); + md_wakeup_thread(rdev->mddev->thread); + } + return rv; +} +EXPORT_SYMBOL_GPL(rdev_set_badblocks); + +/* + * Remove a range of bad blocks from the table. + * This may involve extending the table if we spilt a region, + * but it must not fail. So if the table becomes full, we just + * drop the remove request. + */ +static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors) +{ + u64 *p; + int lo, hi; + sector_t target = s + sectors; + int rv = 0; + + if (bb->shift > 0) { + /* When clearing we round the start up and the end down. + * This should not matter as the shift should align with + * the block size and no rounding should ever be needed. + * However it is better the think a block is bad when it + * isn't than to think a block is not bad when it is. + */ + s += (1<<bb->shift) - 1; + s >>= bb->shift; + target >>= bb->shift; + sectors = target - s; + } + + write_seqlock_irq(&bb->lock); + + p = bb->page; + lo = 0; + hi = bb->count; + /* Find the last range that starts before 'target' */ + while (hi - lo > 1) { + int mid = (lo + hi) / 2; + sector_t a = BB_OFFSET(p[mid]); + if (a < target) + lo = mid; + else + hi = mid; + } + if (hi > lo) { + /* p[lo] is the last range that could overlap the + * current range. Earlier ranges could also overlap, + * but only this one can overlap the end of the range. + */ + if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) { + /* Partial overlap, leave the tail of this range */ + int ack = BB_ACK(p[lo]); + sector_t a = BB_OFFSET(p[lo]); + sector_t end = a + BB_LEN(p[lo]); + + if (a < s) { + /* we need to split this range */ + if (bb->count >= MD_MAX_BADBLOCKS) { + rv = 0; + goto out; + } + memmove(p+lo+1, p+lo, (bb->count - lo) * 8); + bb->count++; + p[lo] = BB_MAKE(a, s-a, ack); + lo++; + } + p[lo] = BB_MAKE(target, end - target, ack); + /* there is no longer an overlap */ + hi = lo; + lo--; + } + while (lo >= 0 && + BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) { + /* This range does overlap */ + if (BB_OFFSET(p[lo]) < s) { + /* Keep the early parts of this range. */ + int ack = BB_ACK(p[lo]); + sector_t start = BB_OFFSET(p[lo]); + p[lo] = BB_MAKE(start, s - start, ack); + /* now low doesn't overlap, so.. */ + break; + } + lo--; + } + /* 'lo' is strictly before, 'hi' is strictly after, + * anything between needs to be discarded + */ + if (hi - lo > 1) { + memmove(p+lo+1, p+hi, (bb->count - hi) * 8); + bb->count -= (hi - lo - 1); + } + } + + bb->changed = 1; +out: + write_sequnlock_irq(&bb->lock); + return rv; +} + +int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors) +{ + return md_clear_badblocks(&rdev->badblocks, + s + rdev->data_offset, + sectors); +} +EXPORT_SYMBOL_GPL(rdev_clear_badblocks); + +/* + * Acknowledge all bad blocks in a list. + * This only succeeds if ->changed is clear. It is used by + * in-kernel metadata updates + */ +void md_ack_all_badblocks(struct badblocks *bb) +{ + if (bb->page == NULL || bb->changed) + /* no point even trying */ + return; + write_seqlock_irq(&bb->lock); + + if (bb->changed == 0) { + u64 *p = bb->page; + int i; + for (i = 0; i < bb->count ; i++) { + if (!BB_ACK(p[i])) { + sector_t start = BB_OFFSET(p[i]); + int len = BB_LEN(p[i]); + p[i] = BB_MAKE(start, len, 1); + } + } + bb->unacked_exist = 0; + } + write_sequnlock_irq(&bb->lock); +} +EXPORT_SYMBOL_GPL(md_ack_all_badblocks); + +/* sysfs access to bad-blocks list. + * We present two files. + * 'bad-blocks' lists sector numbers and lengths of ranges that + * are recorded as bad. The list is truncated to fit within + * the one-page limit of sysfs. + * Writing "sector length" to this file adds an acknowledged + * bad block list. + * 'unacknowledged-bad-blocks' lists bad blocks that have not yet + * been acknowledged. Writing to this file adds bad blocks + * without acknowledging them. This is largely for testing. + */ + +static ssize_t +badblocks_show(struct badblocks *bb, char *page, int unack) +{ + size_t len; + int i; + u64 *p = bb->page; + unsigned seq; + + if (bb->shift < 0) + return 0; + +retry: + seq = read_seqbegin(&bb->lock); + + len = 0; + i = 0; + + while (len < PAGE_SIZE && i < bb->count) { + sector_t s = BB_OFFSET(p[i]); + unsigned int length = BB_LEN(p[i]); + int ack = BB_ACK(p[i]); + i++; + + if (unack && ack) + continue; + + len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n", + (unsigned long long)s << bb->shift, + length << bb->shift); + } + if (unack && len == 0) + bb->unacked_exist = 0; + + if (read_seqretry(&bb->lock, seq)) + goto retry; + + return len; +} + +#define DO_DEBUG 1 + +static ssize_t +badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack) +{ + unsigned long long sector; + int length; + char newline; +#ifdef DO_DEBUG + /* Allow clearing via sysfs *only* for testing/debugging. + * Normally only a successful write may clear a badblock + */ + int clear = 0; + if (page[0] == '-') { + clear = 1; + page++; + } +#endif /* DO_DEBUG */ + + switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) { + case 3: + if (newline != '\n') + return -EINVAL; + case 2: + if (length <= 0) + return -EINVAL; + break; + default: + return -EINVAL; + } + +#ifdef DO_DEBUG + if (clear) { + md_clear_badblocks(bb, sector, length); + return len; + } +#endif /* DO_DEBUG */ + if (md_set_badblocks(bb, sector, length, !unack)) + return len; + else + return -ENOSPC; +} + static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c26c7a08ae6..1e586bb4452e 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -29,6 +29,13 @@ typedef struct mddev_s mddev_t; typedef struct mdk_rdev_s mdk_rdev_t; +/* Bad block numbers are stored sorted in a single page. + * 64bits is used for each block or extent. + * 54 bits are sector number, 9 bits are extent size, + * 1 bit is an 'acknowledged' flag. + */ +#define MD_MAX_BADBLOCKS (PAGE_SIZE/8) + /* * MD's 'extended' device */ @@ -48,7 +55,7 @@ struct mdk_rdev_s struct block_device *meta_bdev; struct block_device *bdev; /* block device handle */ - struct page *sb_page; + struct page *sb_page, *bb_page; int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ @@ -74,9 +81,29 @@ struct mdk_rdev_s #define In_sync 2 /* device is in_sync with rest of array */ #define WriteMostly 4 /* Avoid reading if at all possible */ #define AutoDetected 7 /* added by auto-detect */ -#define Blocked 8 /* An error occurred on an externally - * managed array, don't allow writes +#define Blocked 8 /* An error occurred but has not yet + * been acknowledged by the metadata + * handler, so don't allow writes * until it is cleared */ +#define WriteErrorSeen 9 /* A write error has been seen on this + * device + */ +#define FaultRecorded 10 /* Intermediate state for clearing + * Blocked. The Fault is/will-be + * recorded in the metadata, but that + * metadata hasn't been stored safely + * on disk yet. + */ +#define BlockedBadBlocks 11 /* A writer is blocked because they + * found an unacknowledged bad-block. + * This can safely be cleared at any + * time, and the writer will re-check. + * It may be set at any time, and at + * worst the writer will timeout and + * re-check. So setting it as + * accurately as possible is good, but + * not absolutely critical. + */ wait_queue_head_t blocked_wait; int desc_nr; /* descriptor index in the superblock */ @@ -111,8 +138,54 @@ struct mdk_rdev_s struct sysfs_dirent *sysfs_state; /* handle for 'state' * sysfs entry */ + + struct badblocks { + int count; /* count of bad blocks */ + int unacked_exist; /* there probably are unacknowledged + * bad blocks. This is only cleared + * when a read discovers none + */ + int shift; /* shift from sectors to block size + * a -ve shift means badblocks are + * disabled.*/ + u64 *page; /* badblock list */ + int changed; + seqlock_t lock; + + sector_t sector; + sector_t size; /* in sectors */ + } badblocks; }; +#define BB_LEN_MASK (0x00000000000001FFULL) +#define BB_OFFSET_MASK (0x7FFFFFFFFFFFFE00ULL) +#define BB_ACK_MASK (0x8000000000000000ULL) +#define BB_MAX_LEN 512 +#define BB_OFFSET(x) (((x) & BB_OFFSET_MASK) >> 9) +#define BB_LEN(x) (((x) & BB_LEN_MASK) + 1) +#define BB_ACK(x) (!!((x) & BB_ACK_MASK)) +#define BB_MAKE(a, l, ack) (((a)<<9) | ((l)-1) | ((u64)(!!(ack)) << 63)) + +extern int md_is_badblock(struct badblocks *bb, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors); +static inline int is_badblock(mdk_rdev_t *rdev, sector_t s, int sectors, + sector_t *first_bad, int *bad_sectors) +{ + if (unlikely(rdev->badblocks.count)) { + int rv = md_is_badblock(&rdev->badblocks, rdev->data_offset + s, + sectors, + first_bad, bad_sectors); + if (rv) + *first_bad -= rdev->data_offset; + return rv; + } + return 0; +} +extern int rdev_set_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors, + int acknowledged); +extern int rdev_clear_badblocks(mdk_rdev_t *rdev, sector_t s, int sectors); +extern void md_ack_all_badblocks(struct badblocks *bb); + struct mddev_s { void *private; @@ -239,9 +312,12 @@ struct mddev_s #define MD_RECOVERY_FROZEN 9 unsigned long recovery; - int recovery_disabled; /* if we detect that recovery - * will always fail, set this - * so we don't loop trying */ + /* If a RAID personality determines that recovery (of a particular + * device) will fail due to a read error on the source device, it + * takes a copy of this number and does not attempt recovery again + * until this number changes. + */ + int recovery_disabled; int in_sync; /* know to not need resync */ /* 'open_mutex' avoids races between 'md_open' and 'do_md_stop', so @@ -304,11 +380,6 @@ struct mddev_s * hot-adding a bitmap. It should * eventually be settable by sysfs. */ - /* When md is serving under dm, it might use a - * dirty_log to store the bits. - */ - struct dm_dirty_log *log; - struct mutex mutex; unsigned long chunksize; unsigned long daemon_sleep; /* how many jiffies between updates? */ @@ -413,6 +484,20 @@ static inline char * mdname (mddev_t * mddev) return mddev->gendisk ? mddev->gendisk->disk_name : "mdX"; } +static inline int sysfs_link_rdev(mddev_t *mddev, mdk_rdev_t *rdev) +{ + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + return sysfs_create_link(&mddev->kobj, &rdev->kobj, nm); +} + +static inline void sysfs_unlink_rdev(mddev_t *mddev, mdk_rdev_t *rdev) +{ + char nm[20]; + sprintf(nm, "rd%d", rdev->raid_disk); + sysfs_remove_link(&mddev->kobj, nm); +} + /* * iterates through some rdev ringlist. It's safe to remove the * current 'rdev'. Dont touch 'tmp' though. @@ -505,7 +590,7 @@ extern void mddev_init(mddev_t *mddev); extern int md_run(mddev_t *mddev); extern void md_stop(mddev_t *mddev); extern void md_stop_writes(mddev_t *mddev); -extern void md_rdev_init(mdk_rdev_t *rdev); +extern int md_rdev_init(mdk_rdev_t *rdev); extern void mddev_suspend(mddev_t *mddev); extern void mddev_resume(mddev_t *mddev); @@ -514,4 +599,5 @@ extern struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask, extern struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs, mddev_t *mddev); extern int mddev_check_plugged(mddev_t *mddev); +extern void md_trim_bio(struct bio *bio, int offset, int size); #endif /* _MD_MD_H */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index f7431b6d8447..32323f0afd89 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -35,16 +35,13 @@ #include <linux/delay.h> #include <linux/blkdev.h> #include <linux/seq_file.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid1.h" #include "bitmap.h" #define DEBUG 0 -#if DEBUG -#define PRINTK(x...) printk(x) -#else -#define PRINTK(x...) -#endif +#define PRINTK(x...) do { if (DEBUG) printk(x); } while (0) /* * Number of guaranteed r1bios in case of extreme VM load: @@ -166,7 +163,7 @@ static void put_all_bios(conf_t *conf, r1bio_t *r1_bio) for (i = 0; i < conf->raid_disks; i++) { struct bio **bio = r1_bio->bios + i; - if (*bio && *bio != IO_BLOCKED) + if (!BIO_SPECIAL(*bio)) bio_put(*bio); *bio = NULL; } @@ -176,12 +173,6 @@ static void free_r1bio(r1bio_t *r1_bio) { conf_t *conf = r1_bio->mddev->private; - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf); - put_all_bios(conf, r1_bio); mempool_free(r1_bio, conf->r1bio_pool); } @@ -222,6 +213,33 @@ static void reschedule_retry(r1bio_t *r1_bio) * operation and are ready to return a success/failure code to the buffer * cache layer. */ +static void call_bio_endio(r1bio_t *r1_bio) +{ + struct bio *bio = r1_bio->master_bio; + int done; + conf_t *conf = r1_bio->mddev->private; + + if (bio->bi_phys_segments) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + bio->bi_phys_segments--; + done = (bio->bi_phys_segments == 0); + spin_unlock_irqrestore(&conf->device_lock, flags); + } else + done = 1; + + if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + if (done) { + bio_endio(bio, 0); + /* + * Wake up any possible resync thread that waits for the device + * to go idle. + */ + allow_barrier(conf); + } +} + static void raid_end_bio_io(r1bio_t *r1_bio) { struct bio *bio = r1_bio->master_bio; @@ -234,8 +252,7 @@ static void raid_end_bio_io(r1bio_t *r1_bio) (unsigned long long) bio->bi_sector + (bio->bi_size >> 9) - 1); - bio_endio(bio, - test_bit(R1BIO_Uptodate, &r1_bio->state) ? 0 : -EIO); + call_bio_endio(r1_bio); } free_r1bio(r1_bio); } @@ -287,36 +304,52 @@ static void raid1_end_read_request(struct bio *bio, int error) * oops, read error: */ char b[BDEVNAME_SIZE]; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid1:%s: %s: rescheduling sector %llu\n", - mdname(conf->mddev), - bdevname(conf->mirrors[mirror].rdev->bdev,b), (unsigned long long)r1_bio->sector); + printk_ratelimited( + KERN_ERR "md/raid1:%s: %s: " + "rescheduling sector %llu\n", + mdname(conf->mddev), + bdevname(conf->mirrors[mirror].rdev->bdev, + b), + (unsigned long long)r1_bio->sector); + set_bit(R1BIO_ReadError, &r1_bio->state); reschedule_retry(r1_bio); } rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } +static void close_write(r1bio_t *r1_bio) +{ + /* it really is the end of this request */ + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + /* free extra copy of the data pages */ + int i = r1_bio->behind_page_count; + while (i--) + safe_put_page(r1_bio->behind_bvecs[i].bv_page); + kfree(r1_bio->behind_bvecs); + r1_bio->behind_bvecs = NULL; + } + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, + r1_bio->sectors, + !test_bit(R1BIO_Degraded, &r1_bio->state), + test_bit(R1BIO_BehindIO, &r1_bio->state)); + md_write_end(r1_bio->mddev); +} + static void r1_bio_write_done(r1bio_t *r1_bio) { - if (atomic_dec_and_test(&r1_bio->remaining)) - { - /* it really is the end of this request */ - if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { - /* free extra copy of the data pages */ - int i = r1_bio->behind_page_count; - while (i--) - safe_put_page(r1_bio->behind_pages[i]); - kfree(r1_bio->behind_pages); - r1_bio->behind_pages = NULL; - } - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r1_bio->mddev->bitmap, r1_bio->sector, - r1_bio->sectors, - !test_bit(R1BIO_Degraded, &r1_bio->state), - test_bit(R1BIO_BehindIO, &r1_bio->state)); - md_write_end(r1_bio->mddev); - raid_end_bio_io(r1_bio); + if (!atomic_dec_and_test(&r1_bio->remaining)) + return; + + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + close_write(r1_bio); + if (test_bit(R1BIO_MadeGood, &r1_bio->state)) + reschedule_retry(r1_bio); + else + raid_end_bio_io(r1_bio); } } @@ -336,13 +369,11 @@ static void raid1_end_write_request(struct bio *bio, int error) /* * 'one mirror IO has finished' event handler: */ - r1_bio->bios[mirror] = NULL; - to_put = bio; if (!uptodate) { - md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R1BIO_Degraded, &r1_bio->state); - } else + set_bit(WriteErrorSeen, + &conf->mirrors[mirror].rdev->flags); + set_bit(R1BIO_WriteError, &r1_bio->state); + } else { /* * Set R1BIO_Uptodate in our master bio, so that we * will return a good error code for to the higher @@ -353,8 +384,22 @@ static void raid1_end_write_request(struct bio *bio, int error) * to user-side. So if something waits for IO, then it * will wait for the 'master' bio. */ + sector_t first_bad; + int bad_sectors; + + r1_bio->bios[mirror] = NULL; + to_put = bio; set_bit(R1BIO_Uptodate, &r1_bio->state); + /* Maybe we can clear some bad blocks. */ + if (is_badblock(conf->mirrors[mirror].rdev, + r1_bio->sector, r1_bio->sectors, + &first_bad, &bad_sectors)) { + r1_bio->bios[mirror] = IO_MADE_GOOD; + set_bit(R1BIO_MadeGood, &r1_bio->state); + } + } + update_head_pos(mirror, r1_bio); if (behind) { @@ -377,11 +422,13 @@ static void raid1_end_write_request(struct bio *bio, int error) (unsigned long long) mbio->bi_sector, (unsigned long long) mbio->bi_sector + (mbio->bi_size >> 9) - 1); - bio_endio(mbio, 0); + call_bio_endio(r1_bio); } } } - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); + if (r1_bio->bios[mirror] == NULL) + rdev_dec_pending(conf->mirrors[mirror].rdev, + conf->mddev); /* * Let's see if all mirrored write operations have finished @@ -408,10 +455,11 @@ static void raid1_end_write_request(struct bio *bio, int error) * * The rdev for the device selected will have nr_pending incremented. */ -static int read_balance(conf_t *conf, r1bio_t *r1_bio) +static int read_balance(conf_t *conf, r1bio_t *r1_bio, int *max_sectors) { const sector_t this_sector = r1_bio->sector; - const int sectors = r1_bio->sectors; + int sectors; + int best_good_sectors; int start_disk; int best_disk; int i; @@ -426,8 +474,11 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) * We take the first readable disk when above the resync window. */ retry: + sectors = r1_bio->sectors; best_disk = -1; best_dist = MaxSector; + best_good_sectors = 0; + if (conf->mddev->recovery_cp < MaxSector && (this_sector + sectors >= conf->next_resync)) { choose_first = 1; @@ -439,6 +490,9 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) for (i = 0 ; i < conf->raid_disks ; i++) { sector_t dist; + sector_t first_bad; + int bad_sectors; + int disk = start_disk + i; if (disk >= conf->raid_disks) disk -= conf->raid_disks; @@ -461,6 +515,35 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) /* This is a reasonable device to use. It might * even be best. */ + if (is_badblock(rdev, this_sector, sectors, + &first_bad, &bad_sectors)) { + if (best_dist < MaxSector) + /* already have a better device */ + continue; + if (first_bad <= this_sector) { + /* cannot read here. If this is the 'primary' + * device, then we must not read beyond + * bad_sectors from another device.. + */ + bad_sectors -= (this_sector - first_bad); + if (choose_first && sectors > bad_sectors) + sectors = bad_sectors; + if (best_good_sectors > sectors) + best_good_sectors = sectors; + + } else { + sector_t good_sectors = first_bad - this_sector; + if (good_sectors > best_good_sectors) { + best_good_sectors = good_sectors; + best_disk = disk; + } + if (choose_first) + break; + } + continue; + } else + best_good_sectors = sectors; + dist = abs(this_sector - conf->mirrors[disk].head_position); if (choose_first /* Don't change to another disk for sequential reads */ @@ -489,10 +572,12 @@ static int read_balance(conf_t *conf, r1bio_t *r1_bio) rdev_dec_pending(rdev, conf->mddev); goto retry; } + sectors = best_good_sectors; conf->next_seq_sect = this_sector + sectors; conf->last_used = best_disk; } rcu_read_unlock(); + *max_sectors = sectors; return best_disk; } @@ -672,30 +757,31 @@ static void alloc_behind_pages(struct bio *bio, r1bio_t *r1_bio) { int i; struct bio_vec *bvec; - struct page **pages = kzalloc(bio->bi_vcnt * sizeof(struct page*), + struct bio_vec *bvecs = kzalloc(bio->bi_vcnt * sizeof(struct bio_vec), GFP_NOIO); - if (unlikely(!pages)) + if (unlikely(!bvecs)) return; bio_for_each_segment(bvec, bio, i) { - pages[i] = alloc_page(GFP_NOIO); - if (unlikely(!pages[i])) + bvecs[i] = *bvec; + bvecs[i].bv_page = alloc_page(GFP_NOIO); + if (unlikely(!bvecs[i].bv_page)) goto do_sync_io; - memcpy(kmap(pages[i]) + bvec->bv_offset, - kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); - kunmap(pages[i]); + memcpy(kmap(bvecs[i].bv_page) + bvec->bv_offset, + kmap(bvec->bv_page) + bvec->bv_offset, bvec->bv_len); + kunmap(bvecs[i].bv_page); kunmap(bvec->bv_page); } - r1_bio->behind_pages = pages; + r1_bio->behind_bvecs = bvecs; r1_bio->behind_page_count = bio->bi_vcnt; set_bit(R1BIO_BehindIO, &r1_bio->state); return; do_sync_io: for (i = 0; i < bio->bi_vcnt; i++) - if (pages[i]) - put_page(pages[i]); - kfree(pages); + if (bvecs[i].bv_page) + put_page(bvecs[i].bv_page); + kfree(bvecs); PRINTK("%dB behind alloc failed, doing sync I/O\n", bio->bi_size); } @@ -705,7 +791,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror_info_t *mirror; r1bio_t *r1_bio; struct bio *read_bio; - int i, targets = 0, disks; + int i, disks; struct bitmap *bitmap; unsigned long flags; const int rw = bio_data_dir(bio); @@ -713,6 +799,9 @@ static int make_request(mddev_t *mddev, struct bio * bio) const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA)); mdk_rdev_t *blocked_rdev; int plugged; + int first_clone; + int sectors_handled; + int max_sectors; /* * Register the new request and wait if the reconstruction @@ -759,11 +848,24 @@ static int make_request(mddev_t *mddev, struct bio * bio) r1_bio->mddev = mddev; r1_bio->sector = bio->bi_sector; + /* We might need to issue multiple reads to different + * devices if there are bad blocks around, so we keep + * track of the number of reads in bio->bi_phys_segments. + * If this is 0, there is only one r1_bio and no locking + * will be needed when requests complete. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + if (rw == READ) { /* * read balancing logic: */ - int rdisk = read_balance(conf, r1_bio); + int rdisk; + +read_again: + rdisk = read_balance(conf, r1_bio, &max_sectors); if (rdisk < 0) { /* couldn't find anywhere to read from */ @@ -784,6 +886,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) r1_bio->read_disk = rdisk; read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(read_bio, r1_bio->sector - bio->bi_sector, + max_sectors); r1_bio->bios[rdisk] = read_bio; @@ -793,16 +897,52 @@ static int make_request(mddev_t *mddev, struct bio * bio) read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r1_bio; - generic_make_request(read_bio); + if (max_sectors < r1_bio->sectors) { + /* could not read all from this device, so we will + * need another r1_bio. + */ + + sectors_handled = (r1_bio->sector + max_sectors + - bio->bi_sector); + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + /* Cannot call generic_make_request directly + * as that will be queued in __make_request + * and subsequent mempool_alloc might block waiting + * for it. So hand bio over to raid1d. + */ + reschedule_retry(r1_bio); + + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = bio; + r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); return 0; } /* * WRITE: */ - /* first select target devices under spinlock and + /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio + * If there are known/acknowledged bad blocks on any device on + * which we have seen a write error, we want to avoid writing those + * blocks. + * This potentially requires several writes to write around + * the bad blocks. Each set of writes gets it's own r1bio + * with a set of bios attached. */ plugged = mddev_check_plugged(mddev); @@ -810,6 +950,7 @@ static int make_request(mddev_t *mddev, struct bio * bio) retry_write: blocked_rdev = NULL; rcu_read_lock(); + max_sectors = r1_bio->sectors; for (i = 0; i < disks; i++) { mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { @@ -817,17 +958,56 @@ static int make_request(mddev_t *mddev, struct bio * bio) blocked_rdev = rdev; break; } - if (rdev && !test_bit(Faulty, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - if (test_bit(Faulty, &rdev->flags)) { + r1_bio->bios[i] = NULL; + if (!rdev || test_bit(Faulty, &rdev->flags)) { + set_bit(R1BIO_Degraded, &r1_bio->state); + continue; + } + + atomic_inc(&rdev->nr_pending); + if (test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + int is_bad; + + is_bad = is_badblock(rdev, r1_bio->sector, + max_sectors, + &first_bad, &bad_sectors); + if (is_bad < 0) { + /* mustn't write here until the bad block is + * acknowledged*/ + set_bit(BlockedBadBlocks, &rdev->flags); + blocked_rdev = rdev; + break; + } + if (is_bad && first_bad <= r1_bio->sector) { + /* Cannot write here at all */ + bad_sectors -= (r1_bio->sector - first_bad); + if (bad_sectors < max_sectors) + /* mustn't write more than bad_sectors + * to other devices yet + */ + max_sectors = bad_sectors; rdev_dec_pending(rdev, mddev); - r1_bio->bios[i] = NULL; - } else { - r1_bio->bios[i] = bio; - targets++; + /* We don't set R1BIO_Degraded as that + * only applies if the disk is + * missing, so it might be re-added, + * and we want to know to recover this + * chunk. + * In this case the device is here, + * and the fact that this chunk is not + * in-sync is recorded in the bad + * block log + */ + continue; } - } else - r1_bio->bios[i] = NULL; + if (is_bad) { + int good_sectors = first_bad - r1_bio->sector; + if (good_sectors < max_sectors) + max_sectors = good_sectors; + } + } + r1_bio->bios[i] = bio; } rcu_read_unlock(); @@ -838,51 +1018,57 @@ static int make_request(mddev_t *mddev, struct bio * bio) for (j = 0; j < i; j++) if (r1_bio->bios[j]) rdev_dec_pending(conf->mirrors[j].rdev, mddev); - + r1_bio->state = 0; allow_barrier(conf); md_wait_for_blocked_rdev(blocked_rdev, mddev); wait_barrier(conf); goto retry_write; } - BUG_ON(targets == 0); /* we never fail the last device */ - - if (targets < conf->raid_disks) { - /* array is degraded, we will not clear the bitmap - * on I/O completion (see raid1_end_write_request) */ - set_bit(R1BIO_Degraded, &r1_bio->state); + if (max_sectors < r1_bio->sectors) { + /* We are splitting this write into multiple parts, so + * we need to prepare for allocating another r1_bio. + */ + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); } - - /* do behind I/O ? - * Not if there are too many, or cannot allocate memory, - * or a reader on WriteMostly is waiting for behind writes - * to flush */ - if (bitmap && - (atomic_read(&bitmap->behind_writes) - < mddev->bitmap_info.max_write_behind) && - !waitqueue_active(&bitmap->behind_wait)) - alloc_behind_pages(bio, r1_bio); + sectors_handled = r1_bio->sector + max_sectors - bio->bi_sector; atomic_set(&r1_bio->remaining, 1); atomic_set(&r1_bio->behind_remaining, 0); - bitmap_startwrite(bitmap, bio->bi_sector, r1_bio->sectors, - test_bit(R1BIO_BehindIO, &r1_bio->state)); + first_clone = 1; for (i = 0; i < disks; i++) { struct bio *mbio; if (!r1_bio->bios[i]) continue; mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); - r1_bio->bios[i] = mbio; - - mbio->bi_sector = r1_bio->sector + conf->mirrors[i].rdev->data_offset; - mbio->bi_bdev = conf->mirrors[i].rdev->bdev; - mbio->bi_end_io = raid1_end_write_request; - mbio->bi_rw = WRITE | do_flush_fua | do_sync; - mbio->bi_private = r1_bio; - - if (r1_bio->behind_pages) { + md_trim_bio(mbio, r1_bio->sector - bio->bi_sector, max_sectors); + + if (first_clone) { + /* do behind I/O ? + * Not if there are too many, or cannot + * allocate memory, or a reader on WriteMostly + * is waiting for behind writes to flush */ + if (bitmap && + (atomic_read(&bitmap->behind_writes) + < mddev->bitmap_info.max_write_behind) && + !waitqueue_active(&bitmap->behind_wait)) + alloc_behind_pages(mbio, r1_bio); + + bitmap_startwrite(bitmap, r1_bio->sector, + r1_bio->sectors, + test_bit(R1BIO_BehindIO, + &r1_bio->state)); + first_clone = 0; + } + if (r1_bio->behind_bvecs) { struct bio_vec *bvec; int j; @@ -894,11 +1080,20 @@ static int make_request(mddev_t *mddev, struct bio * bio) * them all */ __bio_for_each_segment(bvec, mbio, j, 0) - bvec->bv_page = r1_bio->behind_pages[j]; + bvec->bv_page = r1_bio->behind_bvecs[j].bv_page; if (test_bit(WriteMostly, &conf->mirrors[i].rdev->flags)) atomic_inc(&r1_bio->behind_remaining); } + r1_bio->bios[i] = mbio; + + mbio->bi_sector = (r1_bio->sector + + conf->mirrors[i].rdev->data_offset); + mbio->bi_bdev = conf->mirrors[i].rdev->bdev; + mbio->bi_end_io = raid1_end_write_request; + mbio->bi_rw = WRITE | do_flush_fua | do_sync; + mbio->bi_private = r1_bio; + atomic_inc(&r1_bio->remaining); spin_lock_irqsave(&conf->device_lock, flags); bio_list_add(&conf->pending_bio_list, mbio); @@ -909,6 +1104,19 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid1d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + if (sectors_handled < (bio->bi_size >> 9)) { + /* We need another r1_bio. It has already been counted + * in bio->bi_phys_segments + */ + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + r1_bio->master_bio = bio; + r1_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + r1_bio->state = 0; + r1_bio->mddev = mddev; + r1_bio->sector = bio->bi_sector + sectors_handled; + goto retry_write; + } + if (do_sync || !bitmap || !plugged) md_wakeup_thread(mddev->thread); @@ -952,9 +1160,10 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * However don't try a recovery from this drive as * it is very likely to fail. */ - mddev->recovery_disabled = 1; + conf->recovery_disabled = mddev->recovery_disabled; return; } + set_bit(Blocked, &rdev->flags); if (test_and_clear_bit(In_sync, &rdev->flags)) { unsigned long flags; spin_lock_irqsave(&conf->device_lock, flags); @@ -1027,7 +1236,7 @@ static int raid1_spare_active(mddev_t *mddev) && !test_bit(Faulty, &rdev->flags) && !test_and_set_bit(In_sync, &rdev->flags)) { count++; - sysfs_notify_dirent(rdev->sysfs_state); + sysfs_notify_dirent_safe(rdev->sysfs_state); } } spin_lock_irqsave(&conf->device_lock, flags); @@ -1048,6 +1257,9 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int first = 0; int last = mddev->raid_disks - 1; + if (mddev->recovery_disabled == conf->recovery_disabled) + return -EBUSY; + if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; @@ -1103,7 +1315,7 @@ static int raid1_remove_disk(mddev_t *mddev, int number) * is not possible. */ if (!test_bit(Faulty, &rdev->flags) && - !mddev->recovery_disabled && + mddev->recovery_disabled != conf->recovery_disabled && mddev->degraded < conf->raid_disks) { err = -EBUSY; goto abort; @@ -1155,6 +1367,8 @@ static void end_sync_write(struct bio *bio, int error) conf_t *conf = mddev->private; int i; int mirror=0; + sector_t first_bad; + int bad_sectors; for (i = 0; i < conf->raid_disks; i++) if (r1_bio->bios[i] == bio) { @@ -1172,18 +1386,48 @@ static void end_sync_write(struct bio *bio, int error) s += sync_blocks; sectors_to_go -= sync_blocks; } while (sectors_to_go > 0); - md_error(mddev, conf->mirrors[mirror].rdev); - } + set_bit(WriteErrorSeen, + &conf->mirrors[mirror].rdev->flags); + set_bit(R1BIO_WriteError, &r1_bio->state); + } else if (is_badblock(conf->mirrors[mirror].rdev, + r1_bio->sector, + r1_bio->sectors, + &first_bad, &bad_sectors) && + !is_badblock(conf->mirrors[r1_bio->read_disk].rdev, + r1_bio->sector, + r1_bio->sectors, + &first_bad, &bad_sectors) + ) + set_bit(R1BIO_MadeGood, &r1_bio->state); update_head_pos(mirror, r1_bio); if (atomic_dec_and_test(&r1_bio->remaining)) { - sector_t s = r1_bio->sectors; - put_buf(r1_bio); - md_done_sync(mddev, s, uptodate); + int s = r1_bio->sectors; + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + reschedule_retry(r1_bio); + else { + put_buf(r1_bio); + md_done_sync(mddev, s, uptodate); + } } } +static int r1_sync_page_io(mdk_rdev_t *rdev, sector_t sector, + int sectors, struct page *page, int rw) +{ + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + /* success */ + return 1; + if (rw == WRITE) + set_bit(WriteErrorSeen, &rdev->flags); + /* need to record an error - either for the block or the device */ + if (!rdev_set_badblocks(rdev, sector, sectors, 0)) + md_error(rdev->mddev, rdev); + return 0; +} + static int fix_sync_read_error(r1bio_t *r1_bio) { /* Try some synchronous reads of other devices to get @@ -1193,6 +1437,9 @@ static int fix_sync_read_error(r1bio_t *r1_bio) * We don't need to freeze the array, because being in an * active sync request, there is no normal IO, and * no overlapping syncs. + * We don't need to check is_badblock() again as we + * made sure that anything with a bad block in range + * will have bi_end_io clear. */ mddev_t *mddev = r1_bio->mddev; conf_t *conf = mddev->private; @@ -1217,9 +1464,7 @@ static int fix_sync_read_error(r1bio_t *r1_bio) * active, and resync is currently active */ rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, + if (sync_page_io(rdev, sect, s<<9, bio->bi_io_vec[idx].bv_page, READ, false)) { success = 1; @@ -1233,16 +1478,36 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (!success) { char b[BDEVNAME_SIZE]; - /* Cannot read from anywhere, array is toast */ - md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + int abort = 0; + /* Cannot read from anywhere, this block is lost. + * Record a bad block on each device. If that doesn't + * work just disable and interrupt the recovery. + * Don't fail devices as that won't really help. + */ printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O read error" " for block %llu\n", mdname(mddev), bdevname(bio->bi_bdev, b), (unsigned long long)r1_bio->sector); - md_done_sync(mddev, r1_bio->sectors, 0); - put_buf(r1_bio); - return 0; + for (d = 0; d < conf->raid_disks; d++) { + rdev = conf->mirrors[d].rdev; + if (!rdev || test_bit(Faulty, &rdev->flags)) + continue; + if (!rdev_set_badblocks(rdev, sect, s, 0)) + abort = 1; + } + if (abort) { + mddev->recovery_disabled = 1; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_done_sync(mddev, r1_bio->sectors, 0); + put_buf(r1_bio); + return 0; + } + /* Try next page */ + sectors -= s; + sect += s; + idx++; + continue; } start = d; @@ -1254,16 +1519,12 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - WRITE, false) == 0) { + if (r1_sync_page_io(rdev, sect, s, + bio->bi_io_vec[idx].bv_page, + WRITE) == 0) { r1_bio->bios[d]->bi_end_io = NULL; rdev_dec_pending(rdev, mddev); - md_error(mddev, rdev); - } else - atomic_add(s, &rdev->corrected_errors); + } } d = start; while (d != r1_bio->read_disk) { @@ -1273,12 +1534,10 @@ static int fix_sync_read_error(r1bio_t *r1_bio) if (r1_bio->bios[d]->bi_end_io != end_sync_read) continue; rdev = conf->mirrors[d].rdev; - if (sync_page_io(rdev, - sect, - s<<9, - bio->bi_io_vec[idx].bv_page, - READ, false) == 0) - md_error(mddev, rdev); + if (r1_sync_page_io(rdev, sect, s, + bio->bi_io_vec[idx].bv_page, + READ) != 0) + atomic_add(s, &rdev->corrected_errors); } sectors -= s; sect += s; @@ -1420,7 +1679,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio) * * 1. Retries failed read operations on working mirrors. * 2. Updates the raid superblock when problems encounter. - * 3. Performs writes following reads for array syncronising. + * 3. Performs writes following reads for array synchronising. */ static void fix_read_error(conf_t *conf, int read_disk, @@ -1443,9 +1702,14 @@ static void fix_read_error(conf_t *conf, int read_disk, * which is the thread that might remove * a device. If raid1d ever becomes multi-threaded.... */ + sector_t first_bad; + int bad_sectors; + rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags) && + is_badblock(rdev, sect, s, + &first_bad, &bad_sectors) == 0 && sync_page_io(rdev, sect, s<<9, conf->tmppage, READ, false)) success = 1; @@ -1457,8 +1721,10 @@ static void fix_read_error(conf_t *conf, int read_disk, } while (!success && d != read_disk); if (!success) { - /* Cannot read from anywhere -- bye bye array */ - md_error(mddev, conf->mirrors[read_disk].rdev); + /* Cannot read from anywhere - mark it bad */ + mdk_rdev_t *rdev = conf->mirrors[read_disk].rdev; + if (!rdev_set_badblocks(rdev, sect, s, 0)) + md_error(mddev, rdev); break; } /* write it back and re-read */ @@ -1469,13 +1735,9 @@ static void fix_read_error(conf_t *conf, int read_disk, d--; rdev = conf->mirrors[d].rdev; if (rdev && - test_bit(In_sync, &rdev->flags)) { - if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, WRITE, false) - == 0) - /* Well, this device is dead */ - md_error(mddev, rdev); - } + test_bit(In_sync, &rdev->flags)) + r1_sync_page_io(rdev, sect, s, + conf->tmppage, WRITE); } d = start; while (d != read_disk) { @@ -1486,12 +1748,8 @@ static void fix_read_error(conf_t *conf, int read_disk, rdev = conf->mirrors[d].rdev; if (rdev && test_bit(In_sync, &rdev->flags)) { - if (sync_page_io(rdev, sect, s<<9, - conf->tmppage, READ, false) - == 0) - /* Well, this device is dead */ - md_error(mddev, rdev); - else { + if (r1_sync_page_io(rdev, sect, s, + conf->tmppage, READ)) { atomic_add(s, &rdev->corrected_errors); printk(KERN_INFO "md/raid1:%s: read error corrected " @@ -1508,21 +1766,255 @@ static void fix_read_error(conf_t *conf, int read_disk, } } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +static int submit_bio_wait(int rw, struct bio *bio) +{ + struct completion event; + rw |= REQ_SYNC; + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(rw, bio); + wait_for_completion(&event); + + return test_bit(BIO_UPTODATE, &bio->bi_flags); +} + +static int narrow_write_error(r1bio_t *r1_bio, int i) +{ + mddev_t *mddev = r1_bio->mddev; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev = conf->mirrors[i].rdev; + int vcnt, idx; + struct bio_vec *vec; + + /* bio has the data to be written to device 'i' where + * we just recently had a write error. + * We repeatedly clone the bio and trim down to one block, + * then try the write. Where the write fails we record + * a bad block. + * It is conceivable that the bio doesn't exactly align with + * blocks. We must handle this somehow. + * + * We currently own a reference on the rdev. + */ + + int block_sectors; + sector_t sector; + int sectors; + int sect_to_write = r1_bio->sectors; + int ok = 1; + + if (rdev->badblocks.shift < 0) + return 0; + + block_sectors = 1 << rdev->badblocks.shift; + sector = r1_bio->sector; + sectors = ((sector + block_sectors) + & ~(sector_t)(block_sectors - 1)) + - sector; + + if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { + vcnt = r1_bio->behind_page_count; + vec = r1_bio->behind_bvecs; + idx = 0; + while (vec[idx].bv_page == NULL) + idx++; + } else { + vcnt = r1_bio->master_bio->bi_vcnt; + vec = r1_bio->master_bio->bi_io_vec; + idx = r1_bio->master_bio->bi_idx; + } + while (sect_to_write) { + struct bio *wbio; + if (sectors > sect_to_write) + sectors = sect_to_write; + /* Write at 'sector' for 'sectors'*/ + + wbio = bio_alloc_mddev(GFP_NOIO, vcnt, mddev); + memcpy(wbio->bi_io_vec, vec, vcnt * sizeof(struct bio_vec)); + wbio->bi_sector = r1_bio->sector; + wbio->bi_rw = WRITE; + wbio->bi_vcnt = vcnt; + wbio->bi_size = r1_bio->sectors << 9; + wbio->bi_idx = idx; + + md_trim_bio(wbio, sector - r1_bio->sector, sectors); + wbio->bi_sector += rdev->data_offset; + wbio->bi_bdev = rdev->bdev; + if (submit_bio_wait(WRITE, wbio) == 0) + /* failure! */ + ok = rdev_set_badblocks(rdev, sector, + sectors, 0) + && ok; + + bio_put(wbio); + sect_to_write -= sectors; + sector += sectors; + sectors = block_sectors; + } + return ok; +} + +static void handle_sync_write_finished(conf_t *conf, r1bio_t *r1_bio) +{ + int m; + int s = r1_bio->sectors; + for (m = 0; m < conf->raid_disks ; m++) { + mdk_rdev_t *rdev = conf->mirrors[m].rdev; + struct bio *bio = r1_bio->bios[m]; + if (bio->bi_end_io == NULL) + continue; + if (test_bit(BIO_UPTODATE, &bio->bi_flags) && + test_bit(R1BIO_MadeGood, &r1_bio->state)) { + rdev_clear_badblocks(rdev, r1_bio->sector, s); + } + if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && + test_bit(R1BIO_WriteError, &r1_bio->state)) { + if (!rdev_set_badblocks(rdev, r1_bio->sector, s, 0)) + md_error(conf->mddev, rdev); + } + } + put_buf(r1_bio); + md_done_sync(conf->mddev, s, 1); +} + +static void handle_write_finished(conf_t *conf, r1bio_t *r1_bio) +{ + int m; + for (m = 0; m < conf->raid_disks ; m++) + if (r1_bio->bios[m] == IO_MADE_GOOD) { + mdk_rdev_t *rdev = conf->mirrors[m].rdev; + rdev_clear_badblocks(rdev, + r1_bio->sector, + r1_bio->sectors); + rdev_dec_pending(rdev, conf->mddev); + } else if (r1_bio->bios[m] != NULL) { + /* This drive got a write error. We need to + * narrow down and record precise write + * errors. + */ + if (!narrow_write_error(r1_bio, m)) { + md_error(conf->mddev, + conf->mirrors[m].rdev); + /* an I/O failed, we can't clear the bitmap */ + set_bit(R1BIO_Degraded, &r1_bio->state); + } + rdev_dec_pending(conf->mirrors[m].rdev, + conf->mddev); + } + if (test_bit(R1BIO_WriteError, &r1_bio->state)) + close_write(r1_bio); + raid_end_bio_io(r1_bio); +} + +static void handle_read_error(conf_t *conf, r1bio_t *r1_bio) +{ + int disk; + int max_sectors; + mddev_t *mddev = conf->mddev; + struct bio *bio; + char b[BDEVNAME_SIZE]; + mdk_rdev_t *rdev; + + clear_bit(R1BIO_ReadError, &r1_bio->state); + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen + */ + if (mddev->ro == 0) { + freeze_array(conf); + fix_read_error(conf, r1_bio->read_disk, + r1_bio->sector, r1_bio->sectors); + unfreeze_array(conf); + } else + md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev); + + bio = r1_bio->bios[r1_bio->read_disk]; + bdevname(bio->bi_bdev, b); +read_more: + disk = read_balance(conf, r1_bio, &max_sectors); + if (disk == -1) { + printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" + " read error for block %llu\n", + mdname(mddev), b, (unsigned long long)r1_bio->sector); + raid_end_bio_io(r1_bio); + } else { + const unsigned long do_sync + = r1_bio->master_bio->bi_rw & REQ_SYNC; + if (bio) { + r1_bio->bios[r1_bio->read_disk] = + mddev->ro ? IO_BLOCKED : NULL; + bio_put(bio); + } + r1_bio->read_disk = disk; + bio = bio_clone_mddev(r1_bio->master_bio, GFP_NOIO, mddev); + md_trim_bio(bio, r1_bio->sector - bio->bi_sector, max_sectors); + r1_bio->bios[r1_bio->read_disk] = bio; + rdev = conf->mirrors[disk].rdev; + printk_ratelimited(KERN_ERR + "md/raid1:%s: redirecting sector %llu" + " to other mirror: %s\n", + mdname(mddev), + (unsigned long long)r1_bio->sector, + bdevname(rdev->bdev, b)); + bio->bi_sector = r1_bio->sector + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_end_io = raid1_end_read_request; + bio->bi_rw = READ | do_sync; + bio->bi_private = r1_bio; + if (max_sectors < r1_bio->sectors) { + /* Drat - have to split this up more */ + struct bio *mbio = r1_bio->master_bio; + int sectors_handled = (r1_bio->sector + max_sectors + - mbio->bi_sector); + r1_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (mbio->bi_phys_segments == 0) + mbio->bi_phys_segments = 2; + else + mbio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + generic_make_request(bio); + bio = NULL; + + r1_bio = mempool_alloc(conf->r1bio_pool, GFP_NOIO); + + r1_bio->master_bio = mbio; + r1_bio->sectors = (mbio->bi_size >> 9) + - sectors_handled; + r1_bio->state = 0; + set_bit(R1BIO_ReadError, &r1_bio->state); + r1_bio->mddev = mddev; + r1_bio->sector = mbio->bi_sector + sectors_handled; + + goto read_more; + } else + generic_make_request(bio); + } +} + static void raid1d(mddev_t *mddev) { r1bio_t *r1_bio; - struct bio *bio; unsigned long flags; conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; - mdk_rdev_t *rdev; struct blk_plug plug; md_check_recovery(mddev); blk_start_plug(&plug); for (;;) { - char b[BDEVNAME_SIZE]; if (atomic_read(&mddev->plug_cnt) == 0) flush_pending_writes(conf); @@ -1539,62 +2031,26 @@ static void raid1d(mddev_t *mddev) mddev = r1_bio->mddev; conf = mddev->private; - if (test_bit(R1BIO_IsSync, &r1_bio->state)) - sync_request_write(mddev, r1_bio); - else { - int disk; - - /* we got a read error. Maybe the drive is bad. Maybe just - * the block and we can fix it. - * We freeze all other IO, and try reading the block from - * other devices. When we find one, we re-write - * and check it that fixes the read error. - * This is all done synchronously while the array is - * frozen + if (test_bit(R1BIO_IsSync, &r1_bio->state)) { + if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + handle_sync_write_finished(conf, r1_bio); + else + sync_request_write(mddev, r1_bio); + } else if (test_bit(R1BIO_MadeGood, &r1_bio->state) || + test_bit(R1BIO_WriteError, &r1_bio->state)) + handle_write_finished(conf, r1_bio); + else if (test_bit(R1BIO_ReadError, &r1_bio->state)) + handle_read_error(conf, r1_bio); + else + /* just a partial read to be scheduled from separate + * context */ - if (mddev->ro == 0) { - freeze_array(conf); - fix_read_error(conf, r1_bio->read_disk, - r1_bio->sector, - r1_bio->sectors); - unfreeze_array(conf); - } else - md_error(mddev, - conf->mirrors[r1_bio->read_disk].rdev); - - bio = r1_bio->bios[r1_bio->read_disk]; - if ((disk=read_balance(conf, r1_bio)) == -1) { - printk(KERN_ALERT "md/raid1:%s: %s: unrecoverable I/O" - " read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev,b), - (unsigned long long)r1_bio->sector); - raid_end_bio_io(r1_bio); - } else { - const unsigned long do_sync = r1_bio->master_bio->bi_rw & REQ_SYNC; - r1_bio->bios[r1_bio->read_disk] = - mddev->ro ? IO_BLOCKED : NULL; - r1_bio->read_disk = disk; - bio_put(bio); - bio = bio_clone_mddev(r1_bio->master_bio, - GFP_NOIO, mddev); - r1_bio->bios[r1_bio->read_disk] = bio; - rdev = conf->mirrors[disk].rdev; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid1:%s: redirecting sector %llu to" - " other mirror: %s\n", - mdname(mddev), - (unsigned long long)r1_bio->sector, - bdevname(rdev->bdev,b)); - bio->bi_sector = r1_bio->sector + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_end_io = raid1_end_read_request; - bio->bi_rw = READ | do_sync; - bio->bi_private = r1_bio; - generic_make_request(bio); - } - } + generic_make_request(r1_bio->bios[r1_bio->read_disk]); + cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); } blk_finish_plug(&plug); } @@ -1636,6 +2092,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int write_targets = 0, read_targets = 0; sector_t sync_blocks; int still_degraded = 0; + int good_sectors = RESYNC_SECTORS; + int min_bad = 0; /* number of sectors that are bad in all devices */ if (!conf->r1buf_pool) if (init_resync(conf)) @@ -1723,36 +2181,89 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i rdev = rcu_dereference(conf->mirrors[i].rdev); if (rdev == NULL || - test_bit(Faulty, &rdev->flags)) { + test_bit(Faulty, &rdev->flags)) { still_degraded = 1; - continue; } else if (!test_bit(In_sync, &rdev->flags)) { bio->bi_rw = WRITE; bio->bi_end_io = end_sync_write; write_targets ++; } else { /* may need to read from here */ - bio->bi_rw = READ; - bio->bi_end_io = end_sync_read; - if (test_bit(WriteMostly, &rdev->flags)) { - if (wonly < 0) - wonly = i; - } else { - if (disk < 0) - disk = i; + sector_t first_bad = MaxSector; + int bad_sectors; + + if (is_badblock(rdev, sector_nr, good_sectors, + &first_bad, &bad_sectors)) { + if (first_bad > sector_nr) + good_sectors = first_bad - sector_nr; + else { + bad_sectors -= (sector_nr - first_bad); + if (min_bad == 0 || + min_bad > bad_sectors) + min_bad = bad_sectors; + } + } + if (sector_nr < first_bad) { + if (test_bit(WriteMostly, &rdev->flags)) { + if (wonly < 0) + wonly = i; + } else { + if (disk < 0) + disk = i; + } + bio->bi_rw = READ; + bio->bi_end_io = end_sync_read; + read_targets++; } - read_targets++; } - atomic_inc(&rdev->nr_pending); - bio->bi_sector = sector_nr + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_private = r1_bio; + if (bio->bi_end_io) { + atomic_inc(&rdev->nr_pending); + bio->bi_sector = sector_nr + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_private = r1_bio; + } } rcu_read_unlock(); if (disk < 0) disk = wonly; r1_bio->read_disk = disk; + if (read_targets == 0 && min_bad > 0) { + /* These sectors are bad on all InSync devices, so we + * need to mark them bad on all write targets + */ + int ok = 1; + for (i = 0 ; i < conf->raid_disks ; i++) + if (r1_bio->bios[i]->bi_end_io == end_sync_write) { + mdk_rdev_t *rdev = + rcu_dereference(conf->mirrors[i].rdev); + ok = rdev_set_badblocks(rdev, sector_nr, + min_bad, 0 + ) && ok; + } + set_bit(MD_CHANGE_DEVS, &mddev->flags); + *skipped = 1; + put_buf(r1_bio); + + if (!ok) { + /* Cannot record the badblocks, so need to + * abort the resync. + * If there are multiple read targets, could just + * fail the really bad ones ??? + */ + conf->recovery_disabled = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + return 0; + } else + return min_bad; + + } + if (min_bad > 0 && min_bad < good_sectors) { + /* only resync enough to reach the next bad->good + * transition */ + good_sectors = min_bad; + } + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) && read_targets > 0) /* extra read targets are also write targets */ write_targets += read_targets-1; @@ -1769,6 +2280,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (max_sector > mddev->resync_max) max_sector = mddev->resync_max; /* Don't do IO beyond here */ + if (max_sector > sector_nr + good_sectors) + max_sector = sector_nr + good_sectors; nr_sectors = 0; sync_blocks = 0; do { @@ -2154,18 +2667,13 @@ static int raid1_reshape(mddev_t *mddev) for (d = d2 = 0; d < conf->raid_disks; d++) { mdk_rdev_t *rdev = conf->mirrors[d].rdev; if (rdev && rdev->raid_disk != d2) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = d2; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + sysfs_unlink_rdev(mddev, rdev); + if (sysfs_link_rdev(mddev, rdev)) printk(KERN_WARNING - "md/raid1:%s: cannot register " - "%s\n", - mdname(mddev), nm); + "md/raid1:%s: cannot register rd%d\n", + mdname(mddev), rdev->raid_disk); } if (rdev) newmirrors[d2++].rdev = rdev; diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e743a64fac4f..e0d676b48974 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -48,6 +48,12 @@ struct r1_private_data_s { * (fresh device added). * Cleared when a sync completes. */ + int recovery_disabled; /* when the same as + * mddev->recovery_disabled + * we don't allow recovery + * to be attempted as we + * expect a read error + */ wait_queue_head_t wait_barrier; @@ -95,7 +101,7 @@ struct r1bio_s { struct list_head retry_list; /* Next two are only valid when R1BIO_BehindIO is set */ - struct page **behind_pages; + struct bio_vec *behind_bvecs; int behind_page_count; /* * if the IO is in WRITE direction, then multiple bios are used. @@ -110,13 +116,24 @@ struct r1bio_s { * correct the read error. To keep track of bad blocks on a per-bio * level, we store IO_BLOCKED in the appropriate 'bios' pointer */ -#define IO_BLOCKED ((struct bio*)1) +#define IO_BLOCKED ((struct bio *)1) +/* When we successfully write to a known bad-block, we need to remove the + * bad-block marking which must be done from process context. So we record + * the success by setting bios[n] to IO_MADE_GOOD + */ +#define IO_MADE_GOOD ((struct bio *)2) + +#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) /* bits for r1bio.state */ #define R1BIO_Uptodate 0 #define R1BIO_IsSync 1 #define R1BIO_Degraded 2 #define R1BIO_BehindIO 3 +/* Set ReadError on bios that experience a readerror so that + * raid1d knows what to do with them. + */ +#define R1BIO_ReadError 4 /* For write-behind requests, we call bi_end_io when * the last non-write-behind device completes, providing * any write was successful. Otherwise we call when @@ -125,6 +142,11 @@ struct r1bio_s { * Record that bi_end_io was called with this flag... */ #define R1BIO_Returned 6 +/* If a write for this request means we can clear some + * known-bad-block records, we set this flag + */ +#define R1BIO_MadeGood 7 +#define R1BIO_WriteError 8 extern int md_raid1_congested(mddev_t *mddev, int bits); diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 6e846688962f..8b29cd4f01c8 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -22,6 +22,7 @@ #include <linux/delay.h> #include <linux/blkdev.h> #include <linux/seq_file.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid10.h" #include "raid0.h" @@ -123,7 +124,14 @@ static void * r10buf_pool_alloc(gfp_t gfp_flags, void *data) for (j = 0 ; j < nalloc; j++) { bio = r10_bio->devs[j].bio; for (i = 0; i < RESYNC_PAGES; i++) { - page = alloc_page(gfp_flags); + if (j == 1 && !test_bit(MD_RECOVERY_SYNC, + &conf->mddev->recovery)) { + /* we can share bv_page's during recovery */ + struct bio *rbio = r10_bio->devs[0].bio; + page = rbio->bi_io_vec[i].bv_page; + get_page(page); + } else + page = alloc_page(gfp_flags); if (unlikely(!page)) goto out_free_pages; @@ -173,7 +181,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio) for (i = 0; i < conf->copies; i++) { struct bio **bio = & r10_bio->devs[i].bio; - if (*bio && *bio != IO_BLOCKED) + if (!BIO_SPECIAL(*bio)) bio_put(*bio); *bio = NULL; } @@ -183,12 +191,6 @@ static void free_r10bio(r10bio_t *r10_bio) { conf_t *conf = r10_bio->mddev->private; - /* - * Wake up any possible resync thread that waits for the device - * to go idle. - */ - allow_barrier(conf); - put_all_bios(conf, r10_bio); mempool_free(r10_bio, conf->r10bio_pool); } @@ -227,9 +229,27 @@ static void reschedule_retry(r10bio_t *r10_bio) static void raid_end_bio_io(r10bio_t *r10_bio) { struct bio *bio = r10_bio->master_bio; + int done; + conf_t *conf = r10_bio->mddev->private; - bio_endio(bio, - test_bit(R10BIO_Uptodate, &r10_bio->state) ? 0 : -EIO); + if (bio->bi_phys_segments) { + unsigned long flags; + spin_lock_irqsave(&conf->device_lock, flags); + bio->bi_phys_segments--; + done = (bio->bi_phys_segments == 0); + spin_unlock_irqrestore(&conf->device_lock, flags); + } else + done = 1; + if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + if (done) { + bio_endio(bio, 0); + /* + * Wake up any possible resync thread that waits for the device + * to go idle. + */ + allow_barrier(conf); + } free_r10bio(r10_bio); } @@ -244,6 +264,26 @@ static inline void update_head_pos(int slot, r10bio_t *r10_bio) r10_bio->devs[slot].addr + (r10_bio->sectors); } +/* + * Find the disk number which triggered given bio + */ +static int find_bio_disk(conf_t *conf, r10bio_t *r10_bio, + struct bio *bio, int *slotp) +{ + int slot; + + for (slot = 0; slot < conf->copies; slot++) + if (r10_bio->devs[slot].bio == bio) + break; + + BUG_ON(slot == conf->copies); + update_head_pos(slot, r10_bio); + + if (slotp) + *slotp = slot; + return r10_bio->devs[slot].devnum; +} + static void raid10_end_read_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -277,34 +317,45 @@ static void raid10_end_read_request(struct bio *bio, int error) * oops, read error - keep the refcount on the rdev */ char b[BDEVNAME_SIZE]; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid10:%s: %s: rescheduling sector %llu\n", - mdname(conf->mddev), - bdevname(conf->mirrors[dev].rdev->bdev,b), (unsigned long long)r10_bio->sector); + printk_ratelimited(KERN_ERR + "md/raid10:%s: %s: rescheduling sector %llu\n", + mdname(conf->mddev), + bdevname(conf->mirrors[dev].rdev->bdev, b), + (unsigned long long)r10_bio->sector); + set_bit(R10BIO_ReadError, &r10_bio->state); reschedule_retry(r10_bio); } } +static void close_write(r10bio_t *r10_bio) +{ + /* clear the bitmap if all writes complete successfully */ + bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, + r10_bio->sectors, + !test_bit(R10BIO_Degraded, &r10_bio->state), + 0); + md_write_end(r10_bio->mddev); +} + static void raid10_end_write_request(struct bio *bio, int error) { int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); r10bio_t *r10_bio = bio->bi_private; - int slot, dev; + int dev; + int dec_rdev = 1; conf_t *conf = r10_bio->mddev->private; + int slot; - for (slot = 0; slot < conf->copies; slot++) - if (r10_bio->devs[slot].bio == bio) - break; - dev = r10_bio->devs[slot].devnum; + dev = find_bio_disk(conf, r10_bio, bio, &slot); /* * this branch is our 'one mirror IO has finished' event handler: */ if (!uptodate) { - md_error(r10_bio->mddev, conf->mirrors[dev].rdev); - /* an I/O failed, we can't clear the bitmap */ - set_bit(R10BIO_Degraded, &r10_bio->state); - } else + set_bit(WriteErrorSeen, &conf->mirrors[dev].rdev->flags); + set_bit(R10BIO_WriteError, &r10_bio->state); + dec_rdev = 0; + } else { /* * Set R10BIO_Uptodate in our master bio, so that * we will return a good error code for to the higher @@ -314,9 +365,22 @@ static void raid10_end_write_request(struct bio *bio, int error) * user-side. So if something waits for IO, then it will * wait for the 'master' bio. */ + sector_t first_bad; + int bad_sectors; + set_bit(R10BIO_Uptodate, &r10_bio->state); - update_head_pos(slot, r10_bio); + /* Maybe we can clear some bad blocks. */ + if (is_badblock(conf->mirrors[dev].rdev, + r10_bio->devs[slot].addr, + r10_bio->sectors, + &first_bad, &bad_sectors)) { + bio_put(bio); + r10_bio->devs[slot].bio = IO_MADE_GOOD; + dec_rdev = 0; + set_bit(R10BIO_MadeGood, &r10_bio->state); + } + } /* * @@ -324,16 +388,18 @@ static void raid10_end_write_request(struct bio *bio, int error) * already. */ if (atomic_dec_and_test(&r10_bio->remaining)) { - /* clear the bitmap if all writes complete successfully */ - bitmap_endwrite(r10_bio->mddev->bitmap, r10_bio->sector, - r10_bio->sectors, - !test_bit(R10BIO_Degraded, &r10_bio->state), - 0); - md_write_end(r10_bio->mddev); - raid_end_bio_io(r10_bio); + if (test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else { + close_write(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state)) + reschedule_retry(r10_bio); + else + raid_end_bio_io(r10_bio); + } } - - rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); + if (dec_rdev) + rdev_dec_pending(conf->mirrors[dev].rdev, conf->mddev); } @@ -484,11 +550,12 @@ static int raid10_mergeable_bvec(struct request_queue *q, * FIXME: possibly should rethink readbalancing and do it differently * depending on near_copies / far_copies geometry. */ -static int read_balance(conf_t *conf, r10bio_t *r10_bio) +static int read_balance(conf_t *conf, r10bio_t *r10_bio, int *max_sectors) { const sector_t this_sector = r10_bio->sector; int disk, slot; - const int sectors = r10_bio->sectors; + int sectors = r10_bio->sectors; + int best_good_sectors; sector_t new_distance, best_dist; mdk_rdev_t *rdev; int do_balance; @@ -497,8 +564,10 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) raid10_find_phys(conf, r10_bio); rcu_read_lock(); retry: + sectors = r10_bio->sectors; best_slot = -1; best_dist = MaxSector; + best_good_sectors = 0; do_balance = 1; /* * Check if we can balance. We can balance on the whole @@ -511,6 +580,10 @@ retry: do_balance = 0; for (slot = 0; slot < conf->copies ; slot++) { + sector_t first_bad; + int bad_sectors; + sector_t dev_sector; + if (r10_bio->devs[slot].bio == IO_BLOCKED) continue; disk = r10_bio->devs[slot].devnum; @@ -520,6 +593,37 @@ retry: if (!test_bit(In_sync, &rdev->flags)) continue; + dev_sector = r10_bio->devs[slot].addr; + if (is_badblock(rdev, dev_sector, sectors, + &first_bad, &bad_sectors)) { + if (best_dist < MaxSector) + /* Already have a better slot */ + continue; + if (first_bad <= dev_sector) { + /* Cannot read here. If this is the + * 'primary' device, then we must not read + * beyond 'bad_sectors' from another device. + */ + bad_sectors -= (dev_sector - first_bad); + if (!do_balance && sectors > bad_sectors) + sectors = bad_sectors; + if (best_good_sectors > sectors) + best_good_sectors = sectors; + } else { + sector_t good_sectors = + first_bad - dev_sector; + if (good_sectors > best_good_sectors) { + best_good_sectors = good_sectors; + best_slot = slot; + } + if (!do_balance) + /* Must read from here */ + break; + } + continue; + } else + best_good_sectors = sectors; + if (!do_balance) break; @@ -561,6 +665,7 @@ retry: } else disk = -1; rcu_read_unlock(); + *max_sectors = best_good_sectors; return disk; } @@ -734,6 +839,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) unsigned long flags; mdk_rdev_t *blocked_rdev; int plugged; + int sectors_handled; + int max_sectors; if (unlikely(bio->bi_rw & REQ_FLUSH)) { md_flush_request(mddev, bio); @@ -808,12 +915,26 @@ static int make_request(mddev_t *mddev, struct bio * bio) r10_bio->sector = bio->bi_sector; r10_bio->state = 0; + /* We might need to issue multiple reads to different + * devices if there are bad blocks around, so we keep + * track of the number of reads in bio->bi_phys_segments. + * If this is 0, there is only one r10_bio and no locking + * will be needed when the request completes. If it is + * non-zero, then it is the number of not-completed requests. + */ + bio->bi_phys_segments = 0; + clear_bit(BIO_SEG_VALID, &bio->bi_flags); + if (rw == READ) { /* * read balancing logic: */ - int disk = read_balance(conf, r10_bio); - int slot = r10_bio->read_slot; + int disk; + int slot; + +read_again: + disk = read_balance(conf, r10_bio, &max_sectors); + slot = r10_bio->read_slot; if (disk < 0) { raid_end_bio_io(r10_bio); return 0; @@ -821,6 +942,8 @@ static int make_request(mddev_t *mddev, struct bio * bio) mirror = conf->mirrors + disk; read_bio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(read_bio, r10_bio->sector - bio->bi_sector, + max_sectors); r10_bio->devs[slot].bio = read_bio; @@ -831,7 +954,37 @@ static int make_request(mddev_t *mddev, struct bio * bio) read_bio->bi_rw = READ | do_sync; read_bio->bi_private = r10_bio; - generic_make_request(read_bio); + if (max_sectors < r10_bio->sectors) { + /* Could not read all from this device, so we will + * need another r10_bio. + */ + sectors_handled = (r10_bio->sectors + max_sectors + - bio->bi_sector); + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock(&conf->device_lock); + /* Cannot call generic_make_request directly + * as that will be queued in __generic_make_request + * and subsequent mempool_alloc might block + * waiting for it. so hand bio over to raid10d. + */ + reschedule_retry(r10_bio); + + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = ((bio->bi_size >> 9) + - sectors_handled); + r10_bio->state = 0; + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_sector + sectors_handled; + goto read_again; + } else + generic_make_request(read_bio); return 0; } @@ -841,13 +994,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio + * If there are known/acknowledged bad blocks on any device + * on which we have seen a write error, we want to avoid + * writing to those blocks. This potentially requires several + * writes to write around the bad blocks. Each set of writes + * gets its own r10_bio with a set of bios attached. The number + * of r10_bios is recored in bio->bi_phys_segments just as with + * the read case. */ plugged = mddev_check_plugged(mddev); raid10_find_phys(conf, r10_bio); - retry_write: +retry_write: blocked_rdev = NULL; rcu_read_lock(); + max_sectors = r10_bio->sectors; + for (i = 0; i < conf->copies; i++) { int d = r10_bio->devs[i].devnum; mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[d].rdev); @@ -856,13 +1018,55 @@ static int make_request(mddev_t *mddev, struct bio * bio) blocked_rdev = rdev; break; } - if (rdev && !test_bit(Faulty, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - r10_bio->devs[i].bio = bio; - } else { - r10_bio->devs[i].bio = NULL; + r10_bio->devs[i].bio = NULL; + if (!rdev || test_bit(Faulty, &rdev->flags)) { set_bit(R10BIO_Degraded, &r10_bio->state); + continue; } + if (test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + sector_t dev_sector = r10_bio->devs[i].addr; + int bad_sectors; + int is_bad; + + is_bad = is_badblock(rdev, dev_sector, + max_sectors, + &first_bad, &bad_sectors); + if (is_bad < 0) { + /* Mustn't write here until the bad block + * is acknowledged + */ + atomic_inc(&rdev->nr_pending); + set_bit(BlockedBadBlocks, &rdev->flags); + blocked_rdev = rdev; + break; + } + if (is_bad && first_bad <= dev_sector) { + /* Cannot write here at all */ + bad_sectors -= (dev_sector - first_bad); + if (bad_sectors < max_sectors) + /* Mustn't write more than bad_sectors + * to other devices yet + */ + max_sectors = bad_sectors; + /* We don't set R10BIO_Degraded as that + * only applies if the disk is missing, + * so it might be re-added, and we want to + * know to recover this chunk. + * In this case the device is here, and the + * fact that this chunk is not in-sync is + * recorded in the bad block log. + */ + continue; + } + if (is_bad) { + int good_sectors = first_bad - dev_sector; + if (good_sectors < max_sectors) + max_sectors = good_sectors; + } + } + r10_bio->devs[i].bio = bio; + atomic_inc(&rdev->nr_pending); } rcu_read_unlock(); @@ -882,8 +1086,22 @@ static int make_request(mddev_t *mddev, struct bio * bio) goto retry_write; } + if (max_sectors < r10_bio->sectors) { + /* We are splitting this into multiple parts, so + * we need to prepare for allocating another r10_bio. + */ + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (bio->bi_phys_segments == 0) + bio->bi_phys_segments = 2; + else + bio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + } + sectors_handled = r10_bio->sector + max_sectors - bio->bi_sector; + atomic_set(&r10_bio->remaining, 1); - bitmap_startwrite(mddev->bitmap, bio->bi_sector, r10_bio->sectors, 0); + bitmap_startwrite(mddev->bitmap, r10_bio->sector, r10_bio->sectors, 0); for (i = 0; i < conf->copies; i++) { struct bio *mbio; @@ -892,10 +1110,12 @@ static int make_request(mddev_t *mddev, struct bio * bio) continue; mbio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(mbio, r10_bio->sector - bio->bi_sector, + max_sectors); r10_bio->devs[i].bio = mbio; - mbio->bi_sector = r10_bio->devs[i].addr+ - conf->mirrors[d].rdev->data_offset; + mbio->bi_sector = (r10_bio->devs[i].addr+ + conf->mirrors[d].rdev->data_offset); mbio->bi_bdev = conf->mirrors[d].rdev->bdev; mbio->bi_end_io = raid10_end_write_request; mbio->bi_rw = WRITE | do_sync | do_fua; @@ -920,6 +1140,21 @@ static int make_request(mddev_t *mddev, struct bio * bio) /* In case raid10d snuck in to freeze_array */ wake_up(&conf->wait_barrier); + if (sectors_handled < (bio->bi_size >> 9)) { + /* We need another r10_bio. It has already been counted + * in bio->bi_phys_segments. + */ + r10_bio = mempool_alloc(conf->r10bio_pool, GFP_NOIO); + + r10_bio->master_bio = bio; + r10_bio->sectors = (bio->bi_size >> 9) - sectors_handled; + + r10_bio->mddev = mddev; + r10_bio->sector = bio->bi_sector + sectors_handled; + r10_bio->state = 0; + goto retry_write; + } + if (do_sync || !mddev->bitmap || !plugged) md_wakeup_thread(mddev->thread); return 0; @@ -949,6 +1184,30 @@ static void status(struct seq_file *seq, mddev_t *mddev) seq_printf(seq, "]"); } +/* check if there are enough drives for + * every block to appear on atleast one. + * Don't consider the device numbered 'ignore' + * as we might be about to remove it. + */ +static int enough(conf_t *conf, int ignore) +{ + int first = 0; + + do { + int n = conf->copies; + int cnt = 0; + while (n--) { + if (conf->mirrors[first].rdev && + first != ignore) + cnt++; + first = (first+1) % conf->raid_disks; + } + if (cnt == 0) + return 0; + } while (first != 0); + return 1; +} + static void error(mddev_t *mddev, mdk_rdev_t *rdev) { char b[BDEVNAME_SIZE]; @@ -961,13 +1220,9 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) * else mark the drive as failed */ if (test_bit(In_sync, &rdev->flags) - && conf->raid_disks-mddev->degraded == 1) + && !enough(conf, rdev->raid_disk)) /* * Don't fail the drive, just return an IO error. - * The test should really be more sophisticated than - * "working_disks == 1", but it isn't critical, and - * can wait until we do more sophisticated "is the drive - * really dead" tests... */ return; if (test_and_clear_bit(In_sync, &rdev->flags)) { @@ -980,6 +1235,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT @@ -1022,27 +1278,6 @@ static void close_sync(conf_t *conf) conf->r10buf_pool = NULL; } -/* check if there are enough drives for - * every block to appear on atleast one - */ -static int enough(conf_t *conf) -{ - int first = 0; - - do { - int n = conf->copies; - int cnt = 0; - while (n--) { - if (conf->mirrors[first].rdev) - cnt++; - first = (first+1) % conf->raid_disks; - } - if (cnt == 0) - return 0; - } while (first != 0); - return 1; -} - static int raid10_spare_active(mddev_t *mddev) { int i; @@ -1078,7 +1313,6 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) conf_t *conf = mddev->private; int err = -EEXIST; int mirror; - mirror_info_t *p; int first = 0; int last = conf->raid_disks - 1; @@ -1087,44 +1321,47 @@ static int raid10_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) * very different from resync */ return -EBUSY; - if (!enough(conf)) + if (!enough(conf, -1)) return -EINVAL; if (rdev->raid_disk >= 0) first = last = rdev->raid_disk; - if (rdev->saved_raid_disk >= 0 && - rdev->saved_raid_disk >= first && + if (rdev->saved_raid_disk >= first && conf->mirrors[rdev->saved_raid_disk].rdev == NULL) mirror = rdev->saved_raid_disk; else mirror = first; - for ( ; mirror <= last ; mirror++) - if ( !(p=conf->mirrors+mirror)->rdev) { - - disk_stack_limits(mddev->gendisk, rdev->bdev, - rdev->data_offset << 9); - /* as we don't honour merge_bvec_fn, we must - * never risk violating it, so limit - * ->max_segments to one lying with a single - * page, as a one page request is never in - * violation. - */ - if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { - blk_queue_max_segments(mddev->queue, 1); - blk_queue_segment_boundary(mddev->queue, - PAGE_CACHE_SIZE - 1); - } + for ( ; mirror <= last ; mirror++) { + mirror_info_t *p = &conf->mirrors[mirror]; + if (p->recovery_disabled == mddev->recovery_disabled) + continue; + if (!p->rdev) + continue; - p->head_position = 0; - rdev->raid_disk = mirror; - err = 0; - if (rdev->saved_raid_disk != mirror) - conf->fullsync = 1; - rcu_assign_pointer(p->rdev, rdev); - break; + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + /* as we don't honour merge_bvec_fn, we must + * never risk violating it, so limit + * ->max_segments to one lying with a single + * page, as a one page request is never in + * violation. + */ + if (rdev->bdev->bd_disk->queue->merge_bvec_fn) { + blk_queue_max_segments(mddev->queue, 1); + blk_queue_segment_boundary(mddev->queue, + PAGE_CACHE_SIZE - 1); } + p->head_position = 0; + rdev->raid_disk = mirror; + err = 0; + if (rdev->saved_raid_disk != mirror) + conf->fullsync = 1; + rcu_assign_pointer(p->rdev, rdev); + break; + } + md_integrity_add_rdev(rdev, mddev); print_conf(conf); return err; @@ -1149,7 +1386,8 @@ static int raid10_remove_disk(mddev_t *mddev, int number) * is not possible. */ if (!test_bit(Faulty, &rdev->flags) && - enough(conf)) { + mddev->recovery_disabled != p->recovery_disabled && + enough(conf, -1)) { err = -EBUSY; goto abort; } @@ -1174,24 +1412,18 @@ static void end_sync_read(struct bio *bio, int error) { r10bio_t *r10_bio = bio->bi_private; conf_t *conf = r10_bio->mddev->private; - int i,d; + int d; - for (i=0; i<conf->copies; i++) - if (r10_bio->devs[i].bio == bio) - break; - BUG_ON(i == conf->copies); - update_head_pos(i, r10_bio); - d = r10_bio->devs[i].devnum; + d = find_bio_disk(conf, r10_bio, bio, NULL); if (test_bit(BIO_UPTODATE, &bio->bi_flags)) set_bit(R10BIO_Uptodate, &r10_bio->state); - else { + else + /* The write handler will notice the lack of + * R10BIO_Uptodate and record any errors etc + */ atomic_add(r10_bio->sectors, &conf->mirrors[d].rdev->corrected_errors); - if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery)) - md_error(r10_bio->mddev, - conf->mirrors[d].rdev); - } /* for reconstruct, we always reschedule after a read. * for resync, only after all reads @@ -1206,40 +1438,60 @@ static void end_sync_read(struct bio *bio, int error) } } -static void end_sync_write(struct bio *bio, int error) +static void end_sync_request(r10bio_t *r10_bio) { - int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); - r10bio_t *r10_bio = bio->bi_private; mddev_t *mddev = r10_bio->mddev; - conf_t *conf = mddev->private; - int i,d; - - for (i = 0; i < conf->copies; i++) - if (r10_bio->devs[i].bio == bio) - break; - d = r10_bio->devs[i].devnum; - if (!uptodate) - md_error(mddev, conf->mirrors[d].rdev); - - update_head_pos(i, r10_bio); - - rdev_dec_pending(conf->mirrors[d].rdev, mddev); while (atomic_dec_and_test(&r10_bio->remaining)) { if (r10_bio->master_bio == NULL) { /* the primary of several recovery bios */ sector_t s = r10_bio->sectors; - put_buf(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else + put_buf(r10_bio); md_done_sync(mddev, s, 1); break; } else { r10bio_t *r10_bio2 = (r10bio_t *)r10_bio->master_bio; - put_buf(r10_bio); + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + reschedule_retry(r10_bio); + else + put_buf(r10_bio); r10_bio = r10_bio2; } } } +static void end_sync_write(struct bio *bio, int error) +{ + int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); + r10bio_t *r10_bio = bio->bi_private; + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + int d; + sector_t first_bad; + int bad_sectors; + int slot; + + d = find_bio_disk(conf, r10_bio, bio, &slot); + + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->mirrors[d].rdev->flags); + set_bit(R10BIO_WriteError, &r10_bio->state); + } else if (is_badblock(conf->mirrors[d].rdev, + r10_bio->devs[slot].addr, + r10_bio->sectors, + &first_bad, &bad_sectors)) + set_bit(R10BIO_MadeGood, &r10_bio->state); + + rdev_dec_pending(conf->mirrors[d].rdev, mddev); + + end_sync_request(r10_bio); +} + /* * Note: sync and recover and handled very differently for raid10 * This code is for resync. @@ -1299,11 +1551,12 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio) if (j == vcnt) continue; mddev->resync_mismatches += r10_bio->sectors; + if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) + /* Don't fix anything. */ + continue; } - if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) - /* Don't fix anything. */ - continue; - /* Ok, we need to write this bio + /* Ok, we need to write this bio, either to correct an + * inconsistency or to correct an unreadable block. * First we need to fixup bv_offset, bv_len and * bi_vecs, as the read request might have corrupted these */ @@ -1355,32 +1608,107 @@ done: * The second for writing. * */ +static void fix_recovery_read_error(r10bio_t *r10_bio) +{ + /* We got a read error during recovery. + * We repeat the read in smaller page-sized sections. + * If a read succeeds, write it to the new device or record + * a bad block if we cannot. + * If a read fails, record a bad block on both old and + * new devices. + */ + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + struct bio *bio = r10_bio->devs[0].bio; + sector_t sect = 0; + int sectors = r10_bio->sectors; + int idx = 0; + int dr = r10_bio->devs[0].devnum; + int dw = r10_bio->devs[1].devnum; + + while (sectors) { + int s = sectors; + mdk_rdev_t *rdev; + sector_t addr; + int ok; + + if (s > (PAGE_SIZE>>9)) + s = PAGE_SIZE >> 9; + + rdev = conf->mirrors[dr].rdev; + addr = r10_bio->devs[0].addr + sect, + ok = sync_page_io(rdev, + addr, + s << 9, + bio->bi_io_vec[idx].bv_page, + READ, false); + if (ok) { + rdev = conf->mirrors[dw].rdev; + addr = r10_bio->devs[1].addr + sect; + ok = sync_page_io(rdev, + addr, + s << 9, + bio->bi_io_vec[idx].bv_page, + WRITE, false); + if (!ok) + set_bit(WriteErrorSeen, &rdev->flags); + } + if (!ok) { + /* We don't worry if we cannot set a bad block - + * it really is bad so there is no loss in not + * recording it yet + */ + rdev_set_badblocks(rdev, addr, s, 0); + + if (rdev != conf->mirrors[dw].rdev) { + /* need bad block on destination too */ + mdk_rdev_t *rdev2 = conf->mirrors[dw].rdev; + addr = r10_bio->devs[1].addr + sect; + ok = rdev_set_badblocks(rdev2, addr, s, 0); + if (!ok) { + /* just abort the recovery */ + printk(KERN_NOTICE + "md/raid10:%s: recovery aborted" + " due to read error\n", + mdname(mddev)); + + conf->mirrors[dw].recovery_disabled + = mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, + &mddev->recovery); + break; + } + } + } + + sectors -= s; + sect += s; + idx++; + } +} static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio) { conf_t *conf = mddev->private; - int i, d; - struct bio *bio, *wbio; + int d; + struct bio *wbio; + if (!test_bit(R10BIO_Uptodate, &r10_bio->state)) { + fix_recovery_read_error(r10_bio); + end_sync_request(r10_bio); + return; + } - /* move the pages across to the second bio + /* + * share the pages with the first bio * and submit the write request */ - bio = r10_bio->devs[0].bio; wbio = r10_bio->devs[1].bio; - for (i=0; i < wbio->bi_vcnt; i++) { - struct page *p = bio->bi_io_vec[i].bv_page; - bio->bi_io_vec[i].bv_page = wbio->bi_io_vec[i].bv_page; - wbio->bi_io_vec[i].bv_page = p; - } d = r10_bio->devs[1].devnum; atomic_inc(&conf->mirrors[d].rdev->nr_pending); md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9); - if (test_bit(R10BIO_Uptodate, &r10_bio->state)) - generic_make_request(wbio); - else - bio_endio(wbio, -EIO); + generic_make_request(wbio); } @@ -1421,6 +1749,26 @@ static void check_decay_read_errors(mddev_t *mddev, mdk_rdev_t *rdev) atomic_set(&rdev->read_errors, read_errors >> hours_since_last); } +static int r10_sync_page_io(mdk_rdev_t *rdev, sector_t sector, + int sectors, struct page *page, int rw) +{ + sector_t first_bad; + int bad_sectors; + + if (is_badblock(rdev, sector, sectors, &first_bad, &bad_sectors) + && (rw == READ || test_bit(WriteErrorSeen, &rdev->flags))) + return -1; + if (sync_page_io(rdev, sector, sectors << 9, page, rw, false)) + /* success */ + return 1; + if (rw == WRITE) + set_bit(WriteErrorSeen, &rdev->flags); + /* need to record an error - either for the block or the device */ + if (!rdev_set_badblocks(rdev, sector, sectors, 0)) + md_error(rdev->mddev, rdev); + return 0; +} + /* * This is a kernel thread which: * @@ -1476,10 +1824,15 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) rcu_read_lock(); do { + sector_t first_bad; + int bad_sectors; + d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && - test_bit(In_sync, &rdev->flags)) { + test_bit(In_sync, &rdev->flags) && + is_badblock(rdev, r10_bio->devs[sl].addr + sect, s, + &first_bad, &bad_sectors) == 0) { atomic_inc(&rdev->nr_pending); rcu_read_unlock(); success = sync_page_io(rdev, @@ -1499,9 +1852,19 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) rcu_read_unlock(); if (!success) { - /* Cannot read from anywhere -- bye bye array */ + /* Cannot read from anywhere, just mark the block + * as bad on the first device to discourage future + * reads. + */ int dn = r10_bio->devs[r10_bio->read_slot].devnum; - md_error(mddev, conf->mirrors[dn].rdev); + rdev = conf->mirrors[dn].rdev; + + if (!rdev_set_badblocks( + rdev, + r10_bio->devs[r10_bio->read_slot].addr + + sect, + s, 0)) + md_error(mddev, rdev); break; } @@ -1516,80 +1879,82 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) sl--; d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - test_bit(In_sync, &rdev->flags)) { - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - atomic_add(s, &rdev->corrected_errors); - if (sync_page_io(rdev, - r10_bio->devs[sl].addr + - sect, - s<<9, conf->tmppage, WRITE, false) - == 0) { - /* Well, this device is dead */ - printk(KERN_NOTICE - "md/raid10:%s: read correction " - "write failed" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - printk(KERN_NOTICE "md/raid10:%s: %s: failing " - "drive\n", - mdname(mddev), - bdevname(rdev->bdev, b)); - md_error(mddev, rdev); - } - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); + if (!rdev || + !test_bit(In_sync, &rdev->flags)) + continue; + + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + if (r10_sync_page_io(rdev, + r10_bio->devs[sl].addr + + sect, + s<<9, conf->tmppage, WRITE) + == 0) { + /* Well, this device is dead */ + printk(KERN_NOTICE + "md/raid10:%s: read correction " + "write failed" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "md/raid10:%s: %s: failing " + "drive\n", + mdname(mddev), + bdevname(rdev->bdev, b)); } + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } sl = start; while (sl != r10_bio->read_slot) { + char b[BDEVNAME_SIZE]; if (sl==0) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; rdev = rcu_dereference(conf->mirrors[d].rdev); - if (rdev && - test_bit(In_sync, &rdev->flags)) { - char b[BDEVNAME_SIZE]; - atomic_inc(&rdev->nr_pending); - rcu_read_unlock(); - if (sync_page_io(rdev, - r10_bio->devs[sl].addr + - sect, - s<<9, conf->tmppage, - READ, false) == 0) { - /* Well, this device is dead */ - printk(KERN_NOTICE - "md/raid10:%s: unable to read back " - "corrected sectors" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - printk(KERN_NOTICE "md/raid10:%s: %s: failing drive\n", - mdname(mddev), - bdevname(rdev->bdev, b)); - - md_error(mddev, rdev); - } else { - printk(KERN_INFO - "md/raid10:%s: read error corrected" - " (%d sectors at %llu on %s)\n", - mdname(mddev), s, - (unsigned long long)( - sect + rdev->data_offset), - bdevname(rdev->bdev, b)); - } + if (!rdev || + !test_bit(In_sync, &rdev->flags)) + continue; - rdev_dec_pending(rdev, mddev); - rcu_read_lock(); + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + switch (r10_sync_page_io(rdev, + r10_bio->devs[sl].addr + + sect, + s<<9, conf->tmppage, + READ)) { + case 0: + /* Well, this device is dead */ + printk(KERN_NOTICE + "md/raid10:%s: unable to read back " + "corrected sectors" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + printk(KERN_NOTICE "md/raid10:%s: %s: failing " + "drive\n", + mdname(mddev), + bdevname(rdev->bdev, b)); + break; + case 1: + printk(KERN_INFO + "md/raid10:%s: read error corrected" + " (%d sectors at %llu on %s)\n", + mdname(mddev), s, + (unsigned long long)( + sect + rdev->data_offset), + bdevname(rdev->bdev, b)); + atomic_add(s, &rdev->corrected_errors); } + + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } rcu_read_unlock(); @@ -1598,21 +1963,254 @@ static void fix_read_error(conf_t *conf, mddev_t *mddev, r10bio_t *r10_bio) } } +static void bi_complete(struct bio *bio, int error) +{ + complete((struct completion *)bio->bi_private); +} + +static int submit_bio_wait(int rw, struct bio *bio) +{ + struct completion event; + rw |= REQ_SYNC; + + init_completion(&event); + bio->bi_private = &event; + bio->bi_end_io = bi_complete; + submit_bio(rw, bio); + wait_for_completion(&event); + + return test_bit(BIO_UPTODATE, &bio->bi_flags); +} + +static int narrow_write_error(r10bio_t *r10_bio, int i) +{ + struct bio *bio = r10_bio->master_bio; + mddev_t *mddev = r10_bio->mddev; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev = conf->mirrors[r10_bio->devs[i].devnum].rdev; + /* bio has the data to be written to slot 'i' where + * we just recently had a write error. + * We repeatedly clone the bio and trim down to one block, + * then try the write. Where the write fails we record + * a bad block. + * It is conceivable that the bio doesn't exactly align with + * blocks. We must handle this. + * + * We currently own a reference to the rdev. + */ + + int block_sectors; + sector_t sector; + int sectors; + int sect_to_write = r10_bio->sectors; + int ok = 1; + + if (rdev->badblocks.shift < 0) + return 0; + + block_sectors = 1 << rdev->badblocks.shift; + sector = r10_bio->sector; + sectors = ((r10_bio->sector + block_sectors) + & ~(sector_t)(block_sectors - 1)) + - sector; + + while (sect_to_write) { + struct bio *wbio; + if (sectors > sect_to_write) + sectors = sect_to_write; + /* Write at 'sector' for 'sectors' */ + wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); + md_trim_bio(wbio, sector - bio->bi_sector, sectors); + wbio->bi_sector = (r10_bio->devs[i].addr+ + rdev->data_offset+ + (sector - r10_bio->sector)); + wbio->bi_bdev = rdev->bdev; + if (submit_bio_wait(WRITE, wbio) == 0) + /* Failure! */ + ok = rdev_set_badblocks(rdev, sector, + sectors, 0) + && ok; + + bio_put(wbio); + sect_to_write -= sectors; + sector += sectors; + sectors = block_sectors; + } + return ok; +} + +static void handle_read_error(mddev_t *mddev, r10bio_t *r10_bio) +{ + int slot = r10_bio->read_slot; + int mirror = r10_bio->devs[slot].devnum; + struct bio *bio; + conf_t *conf = mddev->private; + mdk_rdev_t *rdev; + char b[BDEVNAME_SIZE]; + unsigned long do_sync; + int max_sectors; + + /* we got a read error. Maybe the drive is bad. Maybe just + * the block and we can fix it. + * We freeze all other IO, and try reading the block from + * other devices. When we find one, we re-write + * and check it that fixes the read error. + * This is all done synchronously while the array is + * frozen. + */ + if (mddev->ro == 0) { + freeze_array(conf); + fix_read_error(conf, mddev, r10_bio); + unfreeze_array(conf); + } + rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); + + bio = r10_bio->devs[slot].bio; + bdevname(bio->bi_bdev, b); + r10_bio->devs[slot].bio = + mddev->ro ? IO_BLOCKED : NULL; +read_more: + mirror = read_balance(conf, r10_bio, &max_sectors); + if (mirror == -1) { + printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O" + " read error for block %llu\n", + mdname(mddev), b, + (unsigned long long)r10_bio->sector); + raid_end_bio_io(r10_bio); + bio_put(bio); + return; + } + + do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); + if (bio) + bio_put(bio); + slot = r10_bio->read_slot; + rdev = conf->mirrors[mirror].rdev; + printk_ratelimited( + KERN_ERR + "md/raid10:%s: %s: redirecting" + "sector %llu to another mirror\n", + mdname(mddev), + bdevname(rdev->bdev, b), + (unsigned long long)r10_bio->sector); + bio = bio_clone_mddev(r10_bio->master_bio, + GFP_NOIO, mddev); + md_trim_bio(bio, + r10_bio->sector - bio->bi_sector, + max_sectors); + r10_bio->devs[slot].bio = bio; + bio->bi_sector = r10_bio->devs[slot].addr + + rdev->data_offset; + bio->bi_bdev = rdev->bdev; + bio->bi_rw = READ | do_sync; + bio->bi_private = r10_bio; + bio->bi_end_io = raid10_end_read_request; + if (max_sectors < r10_bio->sectors) { + /* Drat - have to split this up more */ + struct bio *mbio = r10_bio->master_bio; + int sectors_handled = + r10_bio->sector + max_sectors + - mbio->bi_sector; + r10_bio->sectors = max_sectors; + spin_lock_irq(&conf->device_lock); + if (mbio->bi_phys_segments == 0) + mbio->bi_phys_segments = 2; + else + mbio->bi_phys_segments++; + spin_unlock_irq(&conf->device_lock); + generic_make_request(bio); + bio = NULL; + + r10_bio = mempool_alloc(conf->r10bio_pool, + GFP_NOIO); + r10_bio->master_bio = mbio; + r10_bio->sectors = (mbio->bi_size >> 9) + - sectors_handled; + r10_bio->state = 0; + set_bit(R10BIO_ReadError, + &r10_bio->state); + r10_bio->mddev = mddev; + r10_bio->sector = mbio->bi_sector + + sectors_handled; + + goto read_more; + } else + generic_make_request(bio); +} + +static void handle_write_completed(conf_t *conf, r10bio_t *r10_bio) +{ + /* Some sort of write request has finished and it + * succeeded in writing where we thought there was a + * bad block. So forget the bad block. + * Or possibly if failed and we need to record + * a bad block. + */ + int m; + mdk_rdev_t *rdev; + + if (test_bit(R10BIO_IsSync, &r10_bio->state) || + test_bit(R10BIO_IsRecover, &r10_bio->state)) { + for (m = 0; m < conf->copies; m++) { + int dev = r10_bio->devs[m].devnum; + rdev = conf->mirrors[dev].rdev; + if (r10_bio->devs[m].bio == NULL) + continue; + if (test_bit(BIO_UPTODATE, + &r10_bio->devs[m].bio->bi_flags)) { + rdev_clear_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors); + } else { + if (!rdev_set_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors, 0)) + md_error(conf->mddev, rdev); + } + } + put_buf(r10_bio); + } else { + for (m = 0; m < conf->copies; m++) { + int dev = r10_bio->devs[m].devnum; + struct bio *bio = r10_bio->devs[m].bio; + rdev = conf->mirrors[dev].rdev; + if (bio == IO_MADE_GOOD) { + rdev_clear_badblocks( + rdev, + r10_bio->devs[m].addr, + r10_bio->sectors); + rdev_dec_pending(rdev, conf->mddev); + } else if (bio != NULL && + !test_bit(BIO_UPTODATE, &bio->bi_flags)) { + if (!narrow_write_error(r10_bio, m)) { + md_error(conf->mddev, rdev); + set_bit(R10BIO_Degraded, + &r10_bio->state); + } + rdev_dec_pending(rdev, conf->mddev); + } + } + if (test_bit(R10BIO_WriteError, + &r10_bio->state)) + close_write(r10_bio); + raid_end_bio_io(r10_bio); + } +} + static void raid10d(mddev_t *mddev) { r10bio_t *r10_bio; - struct bio *bio; unsigned long flags; conf_t *conf = mddev->private; struct list_head *head = &conf->retry_list; - mdk_rdev_t *rdev; struct blk_plug plug; md_check_recovery(mddev); blk_start_plug(&plug); for (;;) { - char b[BDEVNAME_SIZE]; flush_pending_writes(conf); @@ -1628,64 +2226,26 @@ static void raid10d(mddev_t *mddev) mddev = r10_bio->mddev; conf = mddev->private; - if (test_bit(R10BIO_IsSync, &r10_bio->state)) + if (test_bit(R10BIO_MadeGood, &r10_bio->state) || + test_bit(R10BIO_WriteError, &r10_bio->state)) + handle_write_completed(conf, r10_bio); + else if (test_bit(R10BIO_IsSync, &r10_bio->state)) sync_request_write(mddev, r10_bio); else if (test_bit(R10BIO_IsRecover, &r10_bio->state)) recovery_request_write(mddev, r10_bio); + else if (test_bit(R10BIO_ReadError, &r10_bio->state)) + handle_read_error(mddev, r10_bio); else { - int slot = r10_bio->read_slot; - int mirror = r10_bio->devs[slot].devnum; - /* we got a read error. Maybe the drive is bad. Maybe just - * the block and we can fix it. - * We freeze all other IO, and try reading the block from - * other devices. When we find one, we re-write - * and check it that fixes the read error. - * This is all done synchronously while the array is - * frozen. + /* just a partial read to be scheduled from a + * separate context */ - if (mddev->ro == 0) { - freeze_array(conf); - fix_read_error(conf, mddev, r10_bio); - unfreeze_array(conf); - } - rdev_dec_pending(conf->mirrors[mirror].rdev, mddev); - - bio = r10_bio->devs[slot].bio; - r10_bio->devs[slot].bio = - mddev->ro ? IO_BLOCKED : NULL; - mirror = read_balance(conf, r10_bio); - if (mirror == -1) { - printk(KERN_ALERT "md/raid10:%s: %s: unrecoverable I/O" - " read error for block %llu\n", - mdname(mddev), - bdevname(bio->bi_bdev,b), - (unsigned long long)r10_bio->sector); - raid_end_bio_io(r10_bio); - bio_put(bio); - } else { - const unsigned long do_sync = (r10_bio->master_bio->bi_rw & REQ_SYNC); - bio_put(bio); - slot = r10_bio->read_slot; - rdev = conf->mirrors[mirror].rdev; - if (printk_ratelimit()) - printk(KERN_ERR "md/raid10:%s: %s: redirecting sector %llu to" - " another mirror\n", - mdname(mddev), - bdevname(rdev->bdev,b), - (unsigned long long)r10_bio->sector); - bio = bio_clone_mddev(r10_bio->master_bio, - GFP_NOIO, mddev); - r10_bio->devs[slot].bio = bio; - bio->bi_sector = r10_bio->devs[slot].addr - + rdev->data_offset; - bio->bi_bdev = rdev->bdev; - bio->bi_rw = READ | do_sync; - bio->bi_private = r10_bio; - bio->bi_end_io = raid10_end_read_request; - generic_make_request(bio); - } + int slot = r10_bio->read_slot; + generic_make_request(r10_bio->devs[slot].bio); } + cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); } blk_finish_plug(&plug); } @@ -1746,7 +2306,6 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int i; int max_sync; sector_t sync_blocks; - sector_t sectors_skipped = 0; int chunks_skipped = 0; @@ -1828,7 +2387,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, max_sync = RESYNC_PAGES << (PAGE_SHIFT-9); if (!test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { /* recovery... the complicated one */ - int j, k; + int j; r10_bio = NULL; for (i=0 ; i<conf->raid_disks; i++) { @@ -1836,6 +2395,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, r10bio_t *rb2; sector_t sect; int must_sync; + int any_working; if (conf->mirrors[i].rdev == NULL || test_bit(In_sync, &conf->mirrors[i].rdev->flags)) @@ -1887,19 +2447,42 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, must_sync = bitmap_start_sync(mddev->bitmap, sect, &sync_blocks, still_degraded); + any_working = 0; for (j=0; j<conf->copies;j++) { + int k; int d = r10_bio->devs[j].devnum; + sector_t from_addr, to_addr; + mdk_rdev_t *rdev; + sector_t sector, first_bad; + int bad_sectors; if (!conf->mirrors[d].rdev || !test_bit(In_sync, &conf->mirrors[d].rdev->flags)) continue; /* This is where we read from */ + any_working = 1; + rdev = conf->mirrors[d].rdev; + sector = r10_bio->devs[j].addr; + + if (is_badblock(rdev, sector, max_sync, + &first_bad, &bad_sectors)) { + if (first_bad > sector) + max_sync = first_bad - sector; + else { + bad_sectors -= (sector + - first_bad); + if (max_sync > bad_sectors) + max_sync = bad_sectors; + continue; + } + } bio = r10_bio->devs[0].bio; bio->bi_next = biolist; biolist = bio; bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = READ; - bio->bi_sector = r10_bio->devs[j].addr + + from_addr = r10_bio->devs[j].addr; + bio->bi_sector = from_addr + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; atomic_inc(&conf->mirrors[d].rdev->nr_pending); @@ -1916,26 +2499,48 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, bio->bi_private = r10_bio; bio->bi_end_io = end_sync_write; bio->bi_rw = WRITE; - bio->bi_sector = r10_bio->devs[k].addr + + to_addr = r10_bio->devs[k].addr; + bio->bi_sector = to_addr + conf->mirrors[i].rdev->data_offset; bio->bi_bdev = conf->mirrors[i].rdev->bdev; r10_bio->devs[0].devnum = d; + r10_bio->devs[0].addr = from_addr; r10_bio->devs[1].devnum = i; + r10_bio->devs[1].addr = to_addr; break; } if (j == conf->copies) { - /* Cannot recover, so abort the recovery */ + /* Cannot recover, so abort the recovery or + * record a bad block */ put_buf(r10_bio); if (rb2) atomic_dec(&rb2->remaining); r10_bio = rb2; - if (!test_and_set_bit(MD_RECOVERY_INTR, - &mddev->recovery)) - printk(KERN_INFO "md/raid10:%s: insufficient " - "working devices for recovery.\n", - mdname(mddev)); + if (any_working) { + /* problem is that there are bad blocks + * on other device(s) + */ + int k; + for (k = 0; k < conf->copies; k++) + if (r10_bio->devs[k].devnum == i) + break; + if (!rdev_set_badblocks( + conf->mirrors[i].rdev, + r10_bio->devs[k].addr, + max_sync, 0)) + any_working = 0; + } + if (!any_working) { + if (!test_and_set_bit(MD_RECOVERY_INTR, + &mddev->recovery)) + printk(KERN_INFO "md/raid10:%s: insufficient " + "working devices for recovery.\n", + mdname(mddev)); + conf->mirrors[i].recovery_disabled + = mddev->recovery_disabled; + } break; } } @@ -1979,12 +2584,28 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, for (i=0; i<conf->copies; i++) { int d = r10_bio->devs[i].devnum; + sector_t first_bad, sector; + int bad_sectors; + bio = r10_bio->devs[i].bio; bio->bi_end_io = NULL; clear_bit(BIO_UPTODATE, &bio->bi_flags); if (conf->mirrors[d].rdev == NULL || test_bit(Faulty, &conf->mirrors[d].rdev->flags)) continue; + sector = r10_bio->devs[i].addr; + if (is_badblock(conf->mirrors[d].rdev, + sector, max_sync, + &first_bad, &bad_sectors)) { + if (first_bad > sector) + max_sync = first_bad - sector; + else { + bad_sectors -= (sector - first_bad); + if (max_sync > bad_sectors) + max_sync = max_sync; + continue; + } + } atomic_inc(&conf->mirrors[d].rdev->nr_pending); atomic_inc(&r10_bio->remaining); bio->bi_next = biolist; @@ -1992,7 +2613,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, bio->bi_private = r10_bio; bio->bi_end_io = end_sync_read; bio->bi_rw = READ; - bio->bi_sector = r10_bio->devs[i].addr + + bio->bi_sector = sector + conf->mirrors[d].rdev->data_offset; bio->bi_bdev = conf->mirrors[d].rdev->bdev; count++; @@ -2079,7 +2700,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, return sectors_skipped + nr_sectors; giveup: /* There is nowhere to write, so all non-sync - * drives must be failed, so try the next chunk... + * drives must be failed or in resync, all drives + * have a bad block, so try the next chunk... */ if (sector_nr + max_sync < max_sector) max_sector = sector_nr + max_sync; @@ -2249,6 +2871,7 @@ static int run(mddev_t *mddev) (conf->raid_disks / conf->near_copies)); list_for_each_entry(rdev, &mddev->disks, same_set) { + disk_idx = rdev->raid_disk; if (disk_idx >= conf->raid_disks || disk_idx < 0) @@ -2271,7 +2894,7 @@ static int run(mddev_t *mddev) disk->head_position = 0; } /* need to check that every block has at least one working mirror */ - if (!enough(conf)) { + if (!enough(conf, -1)) { printk(KERN_ERR "md/raid10:%s: not enough operational mirrors.\n", mdname(mddev)); goto out_free_conf; diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 944b1104d3b4..79cb52a0d4a2 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -6,6 +6,11 @@ typedef struct mirror_info mirror_info_t; struct mirror_info { mdk_rdev_t *rdev; sector_t head_position; + int recovery_disabled; /* matches + * mddev->recovery_disabled + * when we shouldn't try + * recovering this device. + */ }; typedef struct r10bio_s r10bio_t; @@ -113,10 +118,26 @@ struct r10bio_s { * level, we store IO_BLOCKED in the appropriate 'bios' pointer */ #define IO_BLOCKED ((struct bio*)1) +/* When we successfully write to a known bad-block, we need to remove the + * bad-block marking which must be done from process context. So we record + * the success by setting devs[n].bio to IO_MADE_GOOD + */ +#define IO_MADE_GOOD ((struct bio *)2) + +#define BIO_SPECIAL(bio) ((unsigned long)bio <= 2) /* bits for r10bio.state */ #define R10BIO_Uptodate 0 #define R10BIO_IsSync 1 #define R10BIO_IsRecover 2 #define R10BIO_Degraded 3 +/* Set ReadError on bios that experience a read error + * so that raid10d knows what to do with them. + */ +#define R10BIO_ReadError 4 +/* If a write for this request means we can clear some + * known-bad-block records, we set this flag. + */ +#define R10BIO_MadeGood 5 +#define R10BIO_WriteError 6 #endif diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index b72edf35ec54..dbae459fb02d 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -51,6 +51,7 @@ #include <linux/seq_file.h> #include <linux/cpu.h> #include <linux/slab.h> +#include <linux/ratelimit.h> #include "md.h" #include "raid5.h" #include "raid0.h" @@ -96,8 +97,6 @@ #define __inline__ #endif -#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) - /* * We maintain a biased count of active stripes in the bottom 16 bits of * bi_phys_segments, and a count of processed stripes in the upper 16 bits @@ -341,7 +340,7 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int previous) (unsigned long long)sh->sector, i, dev->toread, dev->read, dev->towrite, dev->written, test_bit(R5_LOCKED, &dev->flags)); - BUG(); + WARN_ON(1); } dev->flags = 0; raid5_build_block(sh, i, previous); @@ -527,6 +526,36 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) atomic_inc(&rdev->nr_pending); rcu_read_unlock(); + /* We have already checked bad blocks for reads. Now + * need to check for writes. + */ + while ((rw & WRITE) && rdev && + test_bit(WriteErrorSeen, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + int bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors); + if (!bad) + break; + + if (bad < 0) { + set_bit(BlockedBadBlocks, &rdev->flags); + if (!conf->mddev->external && + conf->mddev->flags) { + /* It is very unlikely, but we might + * still need to write out the + * bad block log - better give it + * a chance*/ + md_check_recovery(conf->mddev); + } + md_wait_for_blocked_rdev(rdev, conf->mddev); + } else { + /* Acknowledged bad block - skip the write */ + rdev_dec_pending(rdev, conf->mddev); + rdev = NULL; + } + } + if (rdev) { if (s->syncing || s->expanding || s->expanded) md_sync_acct(rdev->bdev, STRIPE_SECTORS); @@ -548,10 +577,6 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) bi->bi_io_vec[0].bv_offset = 0; bi->bi_size = STRIPE_SIZE; bi->bi_next = NULL; - if ((rw & WRITE) && - test_bit(R5_ReWrite, &sh->dev[i].flags)) - atomic_add(STRIPE_SECTORS, - &rdev->corrected_errors); generic_make_request(bi); } else { if (rw & WRITE) @@ -1020,12 +1045,12 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) if (test_and_clear_bit(R5_Wantdrain, &dev->flags)) { struct bio *wbi; - spin_lock(&sh->lock); + spin_lock_irq(&sh->raid_conf->device_lock); chosen = dev->towrite; dev->towrite = NULL; BUG_ON(dev->written); wbi = dev->written = chosen; - spin_unlock(&sh->lock); + spin_unlock_irq(&sh->raid_conf->device_lock); while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { @@ -1315,12 +1340,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) static int grow_one_stripe(raid5_conf_t *conf) { struct stripe_head *sh; - sh = kmem_cache_alloc(conf->slab_cache, GFP_KERNEL); + sh = kmem_cache_zalloc(conf->slab_cache, GFP_KERNEL); if (!sh) return 0; - memset(sh, 0, sizeof(*sh) + (conf->pool_size-1)*sizeof(struct r5dev)); + sh->raid_conf = conf; - spin_lock_init(&sh->lock); #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&sh->ops.wait_for_ops); #endif @@ -1435,14 +1459,11 @@ static int resize_stripes(raid5_conf_t *conf, int newsize) return -ENOMEM; for (i = conf->max_nr_stripes; i; i--) { - nsh = kmem_cache_alloc(sc, GFP_KERNEL); + nsh = kmem_cache_zalloc(sc, GFP_KERNEL); if (!nsh) break; - memset(nsh, 0, sizeof(*nsh) + (newsize-1)*sizeof(struct r5dev)); - nsh->raid_conf = conf; - spin_lock_init(&nsh->lock); #ifdef CONFIG_MULTICORE_RAID456 init_waitqueue_head(&nsh->ops.wait_for_ops); #endif @@ -1587,12 +1608,15 @@ static void raid5_end_read_request(struct bio * bi, int error) set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { rdev = conf->disks[i].rdev; - printk_rl(KERN_INFO "md/raid:%s: read error corrected" - " (%lu sectors at %llu on %s)\n", - mdname(conf->mddev), STRIPE_SECTORS, - (unsigned long long)(sh->sector - + rdev->data_offset), - bdevname(rdev->bdev, b)); + printk_ratelimited( + KERN_INFO + "md/raid:%s: read error corrected" + " (%lu sectors at %llu on %s)\n", + mdname(conf->mddev), STRIPE_SECTORS, + (unsigned long long)(sh->sector + + rdev->data_offset), + bdevname(rdev->bdev, b)); + atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } @@ -1606,22 +1630,24 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); if (conf->mddev->degraded >= conf->max_degraded) - printk_rl(KERN_WARNING - "md/raid:%s: read error not correctable " - "(sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector - + rdev->data_offset), - bdn); + printk_ratelimited( + KERN_WARNING + "md/raid:%s: read error not correctable " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk_rl(KERN_WARNING - "md/raid:%s: read error NOT corrected!! " - "(sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector - + rdev->data_offset), - bdn); + printk_ratelimited( + KERN_WARNING + "md/raid:%s: read error NOT corrected!! " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING @@ -1649,6 +1675,8 @@ static void raid5_end_write_request(struct bio *bi, int error) raid5_conf_t *conf = sh->raid_conf; int disks = sh->disks, i; int uptodate = test_bit(BIO_UPTODATE, &bi->bi_flags); + sector_t first_bad; + int bad_sectors; for (i=0 ; i<disks; i++) if (bi == &sh->dev[i].req) @@ -1662,8 +1690,12 @@ static void raid5_end_write_request(struct bio *bi, int error) return; } - if (!uptodate) - md_error(conf->mddev, conf->disks[i].rdev); + if (!uptodate) { + set_bit(WriteErrorSeen, &conf->disks[i].rdev->flags); + set_bit(R5_WriteError, &sh->dev[i].flags); + } else if (is_badblock(conf->disks[i].rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors)) + set_bit(R5_MadeGood, &sh->dev[i].flags); rdev_dec_pending(conf->disks[i].rdev, conf->mddev); @@ -1710,6 +1742,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); } + set_bit(Blocked, &rdev->flags); set_bit(Faulty, &rdev->flags); set_bit(MD_CHANGE_DEVS, &mddev->flags); printk(KERN_ALERT @@ -1760,7 +1793,7 @@ static sector_t raid5_compute_sector(raid5_conf_t *conf, sector_t r_sector, /* * Select the parity disk based on the user selected algorithm. */ - pd_idx = qd_idx = ~0; + pd_idx = qd_idx = -1; switch(conf->level) { case 4: pd_idx = data_disks; @@ -2143,12 +2176,11 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in raid5_conf_t *conf = sh->raid_conf; int firstwrite=0; - pr_debug("adding bh b#%llu to stripe s#%llu\n", + pr_debug("adding bi b#%llu to stripe s#%llu\n", (unsigned long long)bi->bi_sector, (unsigned long long)sh->sector); - spin_lock(&sh->lock); spin_lock_irq(&conf->device_lock); if (forwrite) { bip = &sh->dev[dd_idx].towrite; @@ -2169,19 +2201,6 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in bi->bi_next = *bip; *bip = bi; bi->bi_phys_segments++; - spin_unlock_irq(&conf->device_lock); - spin_unlock(&sh->lock); - - pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", - (unsigned long long)bi->bi_sector, - (unsigned long long)sh->sector, dd_idx); - - if (conf->mddev->bitmap && firstwrite) { - bitmap_startwrite(conf->mddev->bitmap, sh->sector, - STRIPE_SECTORS, 0); - sh->bm_seq = conf->seq_flush+1; - set_bit(STRIPE_BIT_DELAY, &sh->state); - } if (forwrite) { /* check if page is covered */ @@ -2196,12 +2215,23 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS) set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags); } + spin_unlock_irq(&conf->device_lock); + + pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", + (unsigned long long)(*bip)->bi_sector, + (unsigned long long)sh->sector, dd_idx); + + if (conf->mddev->bitmap && firstwrite) { + bitmap_startwrite(conf->mddev->bitmap, sh->sector, + STRIPE_SECTORS, 0); + sh->bm_seq = conf->seq_flush+1; + set_bit(STRIPE_BIT_DELAY, &sh->state); + } return 1; overlap: set_bit(R5_Overlap, &sh->dev[dd_idx].flags); spin_unlock_irq(&conf->device_lock); - spin_unlock(&sh->lock); return 0; } @@ -2238,9 +2268,18 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, rcu_read_lock(); rdev = rcu_dereference(conf->disks[i].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) - /* multiple read failures in one stripe */ - md_error(conf->mddev, rdev); + atomic_inc(&rdev->nr_pending); + else + rdev = NULL; rcu_read_unlock(); + if (rdev) { + if (!rdev_set_badblocks( + rdev, + sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } } spin_lock_irq(&conf->device_lock); /* fail all writes first */ @@ -2308,6 +2347,10 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, if (bitmap_end) bitmap_endwrite(conf->mddev->bitmap, sh->sector, STRIPE_SECTORS, 0, 0); + /* If we were in the middle of a write the parity block might + * still be locked - so just clear all R5_LOCKED flags + */ + clear_bit(R5_LOCKED, &sh->dev[i].flags); } if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state)) @@ -2315,109 +2358,73 @@ handle_failed_stripe(raid5_conf_t *conf, struct stripe_head *sh, md_wakeup_thread(conf->mddev->thread); } -/* fetch_block5 - checks the given member device to see if its data needs - * to be read or computed to satisfy a request. - * - * Returns 1 when no more member devices need to be checked, otherwise returns - * 0 to tell the loop in handle_stripe_fill5 to continue - */ -static int fetch_block5(struct stripe_head *sh, struct stripe_head_state *s, - int disk_idx, int disks) -{ - struct r5dev *dev = &sh->dev[disk_idx]; - struct r5dev *failed_dev = &sh->dev[s->failed_num]; - - /* is the data in this block needed, and can we get it? */ - if (!test_bit(R5_LOCKED, &dev->flags) && - !test_bit(R5_UPTODATE, &dev->flags) && - (dev->toread || - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || - s->syncing || s->expanding || - (s->failed && - (failed_dev->toread || - (failed_dev->towrite && - !test_bit(R5_OVERWRITE, &failed_dev->flags)))))) { - /* We would like to get this block, possibly by computing it, - * otherwise read it if the backing disk is insync - */ - if ((s->uptodate == disks - 1) && - (s->failed && disk_idx == s->failed_num)) { - set_bit(STRIPE_COMPUTE_RUN, &sh->state); - set_bit(STRIPE_OP_COMPUTE_BLK, &s->ops_request); - set_bit(R5_Wantcompute, &dev->flags); - sh->ops.target = disk_idx; - sh->ops.target2 = -1; - s->req_compute = 1; - /* Careful: from this point on 'uptodate' is in the eye - * of raid_run_ops which services 'compute' operations - * before writes. R5_Wantcompute flags a block that will - * be R5_UPTODATE by the time it is needed for a - * subsequent operation. - */ - s->uptodate++; - return 1; /* uptodate + compute == disks */ - } else if (test_bit(R5_Insync, &dev->flags)) { - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - pr_debug("Reading block %d (sync=%d)\n", disk_idx, - s->syncing); - } - } - - return 0; -} - -/** - * handle_stripe_fill5 - read or compute data to satisfy pending requests. - */ -static void handle_stripe_fill5(struct stripe_head *sh, - struct stripe_head_state *s, int disks) +static void +handle_failed_sync(raid5_conf_t *conf, struct stripe_head *sh, + struct stripe_head_state *s) { + int abort = 0; int i; - /* look for blocks to read/compute, skip this if a compute - * is already in flight, or if the stripe contents are in the - * midst of changing due to a write + md_done_sync(conf->mddev, STRIPE_SECTORS, 0); + clear_bit(STRIPE_SYNCING, &sh->state); + s->syncing = 0; + /* There is nothing more to do for sync/check/repair. + * For recover we need to record a bad block on all + * non-sync devices, or abort the recovery */ - if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && - !sh->reconstruct_state) - for (i = disks; i--; ) - if (fetch_block5(sh, s, i, disks)) - break; - set_bit(STRIPE_HANDLE, &sh->state); + if (!test_bit(MD_RECOVERY_RECOVER, &conf->mddev->recovery)) + return; + /* During recovery devices cannot be removed, so locking and + * refcounting of rdevs is not needed + */ + for (i = 0; i < conf->raid_disks; i++) { + mdk_rdev_t *rdev = conf->disks[i].rdev; + if (!rdev + || test_bit(Faulty, &rdev->flags) + || test_bit(In_sync, &rdev->flags)) + continue; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + abort = 1; + } + if (abort) { + conf->recovery_disabled = conf->mddev->recovery_disabled; + set_bit(MD_RECOVERY_INTR, &conf->mddev->recovery); + } } -/* fetch_block6 - checks the given member device to see if its data needs +/* fetch_block - checks the given member device to see if its data needs * to be read or computed to satisfy a request. * * Returns 1 when no more member devices need to be checked, otherwise returns - * 0 to tell the loop in handle_stripe_fill6 to continue + * 0 to tell the loop in handle_stripe_fill to continue */ -static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disk_idx, int disks) +static int fetch_block(struct stripe_head *sh, struct stripe_head_state *s, + int disk_idx, int disks) { struct r5dev *dev = &sh->dev[disk_idx]; - struct r5dev *fdev[2] = { &sh->dev[r6s->failed_num[0]], - &sh->dev[r6s->failed_num[1]] }; + struct r5dev *fdev[2] = { &sh->dev[s->failed_num[0]], + &sh->dev[s->failed_num[1]] }; + /* is the data in this block needed, and can we get it? */ if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || s->syncing || s->expanding || - (s->failed >= 1 && - (fdev[0]->toread || s->to_write)) || - (s->failed >= 2 && - (fdev[1]->toread || s->to_write)))) { + (s->failed >= 1 && fdev[0]->toread) || + (s->failed >= 2 && fdev[1]->toread) || + (sh->raid_conf->level <= 5 && s->failed && fdev[0]->towrite && + !test_bit(R5_OVERWRITE, &fdev[0]->flags)) || + (sh->raid_conf->level == 6 && s->failed && s->to_write))) { /* we would like to get this block, possibly by computing it, * otherwise read it if the backing disk is insync */ BUG_ON(test_bit(R5_Wantcompute, &dev->flags)); BUG_ON(test_bit(R5_Wantread, &dev->flags)); if ((s->uptodate == disks - 1) && - (s->failed && (disk_idx == r6s->failed_num[0] || - disk_idx == r6s->failed_num[1]))) { + (s->failed && (disk_idx == s->failed_num[0] || + disk_idx == s->failed_num[1]))) { /* have disk failed, and we're requested to fetch it; * do compute it */ @@ -2429,6 +2436,12 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, sh->ops.target = disk_idx; sh->ops.target2 = -1; /* no 2nd target */ s->req_compute = 1; + /* Careful: from this point on 'uptodate' is in the eye + * of raid_run_ops which services 'compute' operations + * before writes. R5_Wantcompute flags a block that will + * be R5_UPTODATE by the time it is needed for a + * subsequent operation. + */ s->uptodate++; return 1; } else if (s->uptodate == disks-2 && s->failed >= 2) { @@ -2469,11 +2482,11 @@ static int fetch_block6(struct stripe_head *sh, struct stripe_head_state *s, } /** - * handle_stripe_fill6 - read or compute data to satisfy pending requests. + * handle_stripe_fill - read or compute data to satisfy pending requests. */ -static void handle_stripe_fill6(struct stripe_head *sh, - struct stripe_head_state *s, struct r6_state *r6s, - int disks) +static void handle_stripe_fill(struct stripe_head *sh, + struct stripe_head_state *s, + int disks) { int i; @@ -2484,7 +2497,7 @@ static void handle_stripe_fill6(struct stripe_head *sh, if (!test_bit(STRIPE_COMPUTE_RUN, &sh->state) && !sh->check_state && !sh->reconstruct_state) for (i = disks; i--; ) - if (fetch_block6(sh, s, r6s, i, disks)) + if (fetch_block(sh, s, i, disks)) break; set_bit(STRIPE_HANDLE, &sh->state); } @@ -2540,11 +2553,19 @@ static void handle_stripe_clean_event(raid5_conf_t *conf, md_wakeup_thread(conf->mddev->thread); } -static void handle_stripe_dirtying5(raid5_conf_t *conf, - struct stripe_head *sh, struct stripe_head_state *s, int disks) +static void handle_stripe_dirtying(raid5_conf_t *conf, + struct stripe_head *sh, + struct stripe_head_state *s, + int disks) { int rmw = 0, rcw = 0, i; - for (i = disks; i--; ) { + if (conf->max_degraded == 2) { + /* RAID6 requires 'rcw' in current implementation + * Calculate the real rcw later - for now fake it + * look like rcw is cheaper + */ + rcw = 1; rmw = 2; + } else for (i = disks; i--; ) { /* would I have to read this buffer for read_modify_write */ struct r5dev *dev = &sh->dev[i]; if ((dev->towrite || i == sh->pd_idx) && @@ -2591,16 +2612,19 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, } } } - if (rcw <= rmw && rcw > 0) + if (rcw <= rmw && rcw > 0) { /* want reconstruct write, but need to get some data */ + rcw = 0; for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; if (!test_bit(R5_OVERWRITE, &dev->flags) && - i != sh->pd_idx && + i != sh->pd_idx && i != sh->qd_idx && !test_bit(R5_LOCKED, &dev->flags) && !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_Wantcompute, &dev->flags)) && - test_bit(R5_Insync, &dev->flags)) { + test_bit(R5_Wantcompute, &dev->flags))) { + rcw++; + if (!test_bit(R5_Insync, &dev->flags)) + continue; /* it's a failed drive */ if ( test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { pr_debug("Read_old block " @@ -2614,6 +2638,7 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, } } } + } /* now if nothing is locked, and if we have enough data, * we can start a write request */ @@ -2630,53 +2655,6 @@ static void handle_stripe_dirtying5(raid5_conf_t *conf, schedule_reconstruction(sh, s, rcw == 0, 0); } -static void handle_stripe_dirtying6(raid5_conf_t *conf, - struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disks) -{ - int rcw = 0, pd_idx = sh->pd_idx, i; - int qd_idx = sh->qd_idx; - - set_bit(STRIPE_HANDLE, &sh->state); - for (i = disks; i--; ) { - struct r5dev *dev = &sh->dev[i]; - /* check if we haven't enough data */ - if (!test_bit(R5_OVERWRITE, &dev->flags) && - i != pd_idx && i != qd_idx && - !test_bit(R5_LOCKED, &dev->flags) && - !(test_bit(R5_UPTODATE, &dev->flags) || - test_bit(R5_Wantcompute, &dev->flags))) { - rcw++; - if (!test_bit(R5_Insync, &dev->flags)) - continue; /* it's a failed drive */ - - if ( - test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { - pr_debug("Read_old stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(R5_LOCKED, &dev->flags); - set_bit(R5_Wantread, &dev->flags); - s->locked++; - } else { - pr_debug("Request delayed stripe %llu " - "block %d for Reconstruct\n", - (unsigned long long)sh->sector, i); - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - } - } - } - /* now if nothing is locked, and if we have enough data, we can start a - * write request - */ - if ((s->req_compute || !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) && - s->locked == 0 && rcw == 0 && - !test_bit(STRIPE_BIT_DELAY, &sh->state)) { - schedule_reconstruction(sh, s, 1, 0); - } -} - static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, int disks) { @@ -2695,7 +2673,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, s->uptodate--; break; } - dev = &sh->dev[s->failed_num]; + dev = &sh->dev[s->failed_num[0]]; /* fall through */ case check_state_compute_result: sh->check_state = check_state_idle; @@ -2767,7 +2745,7 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, int disks) + int disks) { int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; @@ -2786,14 +2764,14 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, switch (sh->check_state) { case check_state_idle: /* start a new check operation if there are < 2 failures */ - if (s->failed == r6s->q_failed) { + if (s->failed == s->q_failed) { /* The only possible failed device holds Q, so it * makes sense to check P (If anything else were failed, * we would have used P to recreate it). */ sh->check_state = check_state_run; } - if (!r6s->q_failed && s->failed < 2) { + if (!s->q_failed && s->failed < 2) { /* Q is not failed, and we didn't use it to generate * anything, so it makes sense to check it */ @@ -2835,13 +2813,13 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, */ BUG_ON(s->uptodate < disks - 1); /* We don't need Q to recover */ if (s->failed == 2) { - dev = &sh->dev[r6s->failed_num[1]]; + dev = &sh->dev[s->failed_num[1]]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); } if (s->failed >= 1) { - dev = &sh->dev[r6s->failed_num[0]]; + dev = &sh->dev[s->failed_num[0]]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); @@ -2928,8 +2906,7 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, } } -static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, - struct r6_state *r6s) +static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh) { int i; @@ -2971,7 +2948,7 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); for (j = 0; j < conf->raid_disks; j++) if (j != sh2->pd_idx && - (!r6s || j != sh2->qd_idx) && + j != sh2->qd_idx && !test_bit(R5_Expanded, &sh2->dev[j].flags)) break; if (j == conf->raid_disks) { @@ -3006,43 +2983,35 @@ static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, * */ -static void handle_stripe5(struct stripe_head *sh) +static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) { raid5_conf_t *conf = sh->raid_conf; - int disks = sh->disks, i; - struct bio *return_bi = NULL; - struct stripe_head_state s; + int disks = sh->disks; struct r5dev *dev; - mdk_rdev_t *blocked_rdev = NULL; - int prexor; - int dec_preread_active = 0; + int i; - memset(&s, 0, sizeof(s)); - pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d " - "reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), sh->pd_idx, sh->check_state, - sh->reconstruct_state); + memset(s, 0, sizeof(*s)); - spin_lock(&sh->lock); - clear_bit(STRIPE_HANDLE, &sh->state); - clear_bit(STRIPE_DELAYED, &sh->state); - - s.syncing = test_bit(STRIPE_SYNCING, &sh->state); - s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + s->syncing = test_bit(STRIPE_SYNCING, &sh->state); + s->expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); + s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); + s->failed_num[0] = -1; + s->failed_num[1] = -1; /* Now to look around and see what can be done */ rcu_read_lock(); + spin_lock_irq(&conf->device_lock); for (i=disks; i--; ) { mdk_rdev_t *rdev; + sector_t first_bad; + int bad_sectors; + int is_bad = 0; dev = &sh->dev[i]; - pr_debug("check %d: state 0x%lx toread %p read %p write %p " - "written %p\n", i, dev->flags, dev->toread, dev->read, - dev->towrite, dev->written); - - /* maybe we can request a biofill operation + pr_debug("check %d: state 0x%lx read %p write %p written %p\n", + i, dev->flags, dev->toread, dev->towrite, dev->written); + /* maybe we can reply to a read * * new wantfill requests are only permitted while * ops_complete_biofill is guaranteed to be inactive @@ -3052,37 +3021,74 @@ static void handle_stripe5(struct stripe_head *sh) set_bit(R5_Wantfill, &dev->flags); /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; - if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; + if (test_bit(R5_LOCKED, &dev->flags)) + s->locked++; + if (test_bit(R5_UPTODATE, &dev->flags)) + s->uptodate++; + if (test_bit(R5_Wantcompute, &dev->flags)) { + s->compute++; + BUG_ON(s->compute > 2); + } if (test_bit(R5_Wantfill, &dev->flags)) - s.to_fill++; + s->to_fill++; else if (dev->toread) - s.to_read++; + s->to_read++; if (dev->towrite) { - s.to_write++; + s->to_write++; if (!test_bit(R5_OVERWRITE, &dev->flags)) - s.non_overwrite++; + s->non_overwrite++; } if (dev->written) - s.written++; + s->written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (blocked_rdev == NULL && - rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); + if (rdev) { + is_bad = is_badblock(rdev, sh->sector, STRIPE_SECTORS, + &first_bad, &bad_sectors); + if (s->blocked_rdev == NULL + && (test_bit(Blocked, &rdev->flags) + || is_bad < 0)) { + if (is_bad < 0) + set_bit(BlockedBadBlocks, + &rdev->flags); + s->blocked_rdev = rdev; + atomic_inc(&rdev->nr_pending); + } } clear_bit(R5_Insync, &dev->flags); if (!rdev) /* Not in-sync */; - else if (test_bit(In_sync, &rdev->flags)) + else if (is_bad) { + /* also not in-sync */ + if (!test_bit(WriteErrorSeen, &rdev->flags)) { + /* treat as in-sync, but with a read error + * which we can now try to correct + */ + set_bit(R5_Insync, &dev->flags); + set_bit(R5_ReadError, &dev->flags); + } + } else if (test_bit(In_sync, &rdev->flags)) set_bit(R5_Insync, &dev->flags); else { - /* could be in-sync depending on recovery/reshape status */ + /* in sync if before recovery_offset */ if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) set_bit(R5_Insync, &dev->flags); } + if (test_bit(R5_WriteError, &dev->flags)) { + clear_bit(R5_Insync, &dev->flags); + if (!test_bit(Faulty, &rdev->flags)) { + s->handle_bad_blocks = 1; + atomic_inc(&rdev->nr_pending); + } else + clear_bit(R5_WriteError, &dev->flags); + } + if (test_bit(R5_MadeGood, &dev->flags)) { + if (!test_bit(Faulty, &rdev->flags)) { + s->handle_bad_blocks = 1; + atomic_inc(&rdev->nr_pending); + } else + clear_bit(R5_MadeGood, &dev->flags); + } if (!test_bit(R5_Insync, &dev->flags)) { /* The ReadError flag will just be confusing now */ clear_bit(R5_ReadError, &dev->flags); @@ -3091,313 +3097,60 @@ static void handle_stripe5(struct stripe_head *sh) if (test_bit(R5_ReadError, &dev->flags)) clear_bit(R5_Insync, &dev->flags); if (!test_bit(R5_Insync, &dev->flags)) { - s.failed++; - s.failed_num = i; + if (s->failed < 2) + s->failed_num[s->failed] = i; + s->failed++; } } + spin_unlock_irq(&conf->device_lock); rcu_read_unlock(); - - if (unlikely(blocked_rdev)) { - if (s.syncing || s.expanding || s.expanded || - s.to_write || s.written) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; - } - /* There is nothing for the blocked_rdev to block */ - rdev_dec_pending(blocked_rdev, conf->mddev); - blocked_rdev = NULL; - } - - if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { - set_bit(STRIPE_OP_BIOFILL, &s.ops_request); - set_bit(STRIPE_BIOFILL_RUN, &sh->state); - } - - pr_debug("locked=%d uptodate=%d to_read=%d" - " to_write=%d failed=%d failed_num=%d\n", - s.locked, s.uptodate, s.to_read, s.to_write, - s.failed, s.failed_num); - /* check if the array has lost two devices and, if so, some requests might - * need to be failed - */ - if (s.failed > 1 && s.to_read+s.to_write+s.written) - handle_failed_stripe(conf, sh, &s, disks, &return_bi); - if (s.failed > 1 && s.syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS,0); - clear_bit(STRIPE_SYNCING, &sh->state); - s.syncing = 0; - } - - /* might be able to return some write requests if the parity block - * is safe, or on a failed drive - */ - dev = &sh->dev[sh->pd_idx]; - if ( s.written && - ((test_bit(R5_Insync, &dev->flags) && - !test_bit(R5_LOCKED, &dev->flags) && - test_bit(R5_UPTODATE, &dev->flags)) || - (s.failed == 1 && s.failed_num == sh->pd_idx))) - handle_stripe_clean_event(conf, sh, disks, &return_bi); - - /* Now we might consider reading some blocks, either to check/generate - * parity, or to satisfy requests - * or to load a block that is being partially written. - */ - if (s.to_read || s.non_overwrite || - (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) - handle_stripe_fill5(sh, &s, disks); - - /* Now we check to see if any write operations have recently - * completed - */ - prexor = 0; - if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) - prexor = 1; - if (sh->reconstruct_state == reconstruct_state_drain_result || - sh->reconstruct_state == reconstruct_state_prexor_drain_result) { - sh->reconstruct_state = reconstruct_state_idle; - - /* All the 'written' buffers and the parity block are ready to - * be written back to disk - */ - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); - for (i = disks; i--; ) { - dev = &sh->dev[i]; - if (test_bit(R5_LOCKED, &dev->flags) && - (i == sh->pd_idx || dev->written)) { - pr_debug("Writing block %d\n", i); - set_bit(R5_Wantwrite, &dev->flags); - if (prexor) - continue; - if (!test_bit(R5_Insync, &dev->flags) || - (i == sh->pd_idx && s.failed == 0)) - set_bit(STRIPE_INSYNC, &sh->state); - } - } - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - dec_preread_active = 1; - } - - /* Now to consider new write requests and what else, if anything - * should be read. We do not handle new writes when: - * 1/ A 'write' operation (copy+xor) is already in flight. - * 2/ A 'check' operation is in flight, as it may clobber the parity - * block. - */ - if (s.to_write && !sh->reconstruct_state && !sh->check_state) - handle_stripe_dirtying5(conf, sh, &s, disks); - - /* maybe we need to check and possibly fix the parity for this stripe - * Any reads will already have been scheduled, so we just see if enough - * data is available. The parity check is held off while parity - * dependent operations are in flight. - */ - if (sh->check_state || - (s.syncing && s.locked == 0 && - !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && - !test_bit(STRIPE_INSYNC, &sh->state))) - handle_parity_checks5(conf, sh, &s, disks); - - if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS,1); - clear_bit(STRIPE_SYNCING, &sh->state); - } - - /* If the failed drive is just a ReadError, then we might need to progress - * the repair/check process - */ - if (s.failed == 1 && !conf->mddev->ro && - test_bit(R5_ReadError, &sh->dev[s.failed_num].flags) - && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags) - && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags) - ) { - dev = &sh->dev[s.failed_num]; - if (!test_bit(R5_ReWrite, &dev->flags)) { - set_bit(R5_Wantwrite, &dev->flags); - set_bit(R5_ReWrite, &dev->flags); - set_bit(R5_LOCKED, &dev->flags); - s.locked++; - } else { - /* let's read it back */ - set_bit(R5_Wantread, &dev->flags); - set_bit(R5_LOCKED, &dev->flags); - s.locked++; - } - } - - /* Finish reconstruct operations initiated by the expansion process */ - if (sh->reconstruct_state == reconstruct_state_result) { - struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1, 1); - if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { - /* sh cannot be written until sh2 has been read. - * so arrange for sh to be delayed a little - */ - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, - &sh2->state)) - atomic_inc(&conf->preread_active_stripes); - release_stripe(sh2); - goto unlock; - } - if (sh2) - release_stripe(sh2); - - sh->reconstruct_state = reconstruct_state_idle; - clear_bit(STRIPE_EXPANDING, &sh->state); - for (i = conf->raid_disks; i--; ) { - set_bit(R5_Wantwrite, &sh->dev[i].flags); - set_bit(R5_LOCKED, &sh->dev[i].flags); - s.locked++; - } - } - - if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && - !sh->reconstruct_state) { - /* Need to write out all blocks after computing parity */ - sh->disks = conf->raid_disks; - stripe_set_idx(sh->sector, conf, 0, sh); - schedule_reconstruction(sh, &s, 1, 1); - } else if (s.expanded && !sh->reconstruct_state && s.locked == 0) { - clear_bit(STRIPE_EXPAND_READY, &sh->state); - atomic_dec(&conf->reshape_stripes); - wake_up(&conf->wait_for_overlap); - md_done_sync(conf->mddev, STRIPE_SECTORS, 1); - } - - if (s.expanding && s.locked == 0 && - !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) - handle_stripe_expansion(conf, sh, NULL); - - unlock: - spin_unlock(&sh->lock); - - /* wait for this device to become unblocked */ - if (unlikely(blocked_rdev)) - md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); - - if (s.ops_request) - raid_run_ops(sh, s.ops_request); - - ops_run_io(sh, &s); - - if (dec_preread_active) { - /* We delay this until after ops_run_io so that if make_request - * is waiting on a flush, it won't continue until the writes - * have actually been submitted. - */ - atomic_dec(&conf->preread_active_stripes); - if (atomic_read(&conf->preread_active_stripes) < - IO_THRESHOLD) - md_wakeup_thread(conf->mddev->thread); - } - return_io(return_bi); } -static void handle_stripe6(struct stripe_head *sh) +static void handle_stripe(struct stripe_head *sh) { + struct stripe_head_state s; raid5_conf_t *conf = sh->raid_conf; + int i; + int prexor; int disks = sh->disks; - struct bio *return_bi = NULL; - int i, pd_idx = sh->pd_idx, qd_idx = sh->qd_idx; - struct stripe_head_state s; - struct r6_state r6s; - struct r5dev *dev, *pdev, *qdev; - mdk_rdev_t *blocked_rdev = NULL; - int dec_preread_active = 0; + struct r5dev *pdev, *qdev; + + clear_bit(STRIPE_HANDLE, &sh->state); + if (test_and_set_bit(STRIPE_ACTIVE, &sh->state)) { + /* already being handled, ensure it gets handled + * again when current action finishes */ + set_bit(STRIPE_HANDLE, &sh->state); + return; + } + + if (test_and_clear_bit(STRIPE_SYNC_REQUESTED, &sh->state)) { + set_bit(STRIPE_SYNCING, &sh->state); + clear_bit(STRIPE_INSYNC, &sh->state); + } + clear_bit(STRIPE_DELAYED, &sh->state); pr_debug("handling stripe %llu, state=%#lx cnt=%d, " "pd_idx=%d, qd_idx=%d\n, check:%d, reconstruct:%d\n", (unsigned long long)sh->sector, sh->state, - atomic_read(&sh->count), pd_idx, qd_idx, + atomic_read(&sh->count), sh->pd_idx, sh->qd_idx, sh->check_state, sh->reconstruct_state); - memset(&s, 0, sizeof(s)); - - spin_lock(&sh->lock); - clear_bit(STRIPE_HANDLE, &sh->state); - clear_bit(STRIPE_DELAYED, &sh->state); - - s.syncing = test_bit(STRIPE_SYNCING, &sh->state); - s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); - s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); - /* Now to look around and see what can be done */ - - rcu_read_lock(); - for (i=disks; i--; ) { - mdk_rdev_t *rdev; - dev = &sh->dev[i]; - pr_debug("check %d: state 0x%lx read %p write %p written %p\n", - i, dev->flags, dev->toread, dev->towrite, dev->written); - /* maybe we can reply to a read - * - * new wantfill requests are only permitted while - * ops_complete_biofill is guaranteed to be inactive - */ - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && - !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) - set_bit(R5_Wantfill, &dev->flags); + analyse_stripe(sh, &s); - /* now count some things */ - if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; - if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; - if (test_bit(R5_Wantcompute, &dev->flags)) { - s.compute++; - BUG_ON(s.compute > 2); - } - - if (test_bit(R5_Wantfill, &dev->flags)) { - s.to_fill++; - } else if (dev->toread) - s.to_read++; - if (dev->towrite) { - s.to_write++; - if (!test_bit(R5_OVERWRITE, &dev->flags)) - s.non_overwrite++; - } - if (dev->written) - s.written++; - rdev = rcu_dereference(conf->disks[i].rdev); - if (blocked_rdev == NULL && - rdev && unlikely(test_bit(Blocked, &rdev->flags))) { - blocked_rdev = rdev; - atomic_inc(&rdev->nr_pending); - } - clear_bit(R5_Insync, &dev->flags); - if (!rdev) - /* Not in-sync */; - else if (test_bit(In_sync, &rdev->flags)) - set_bit(R5_Insync, &dev->flags); - else { - /* in sync if before recovery_offset */ - if (sh->sector + STRIPE_SECTORS <= rdev->recovery_offset) - set_bit(R5_Insync, &dev->flags); - } - if (!test_bit(R5_Insync, &dev->flags)) { - /* The ReadError flag will just be confusing now */ - clear_bit(R5_ReadError, &dev->flags); - clear_bit(R5_ReWrite, &dev->flags); - } - if (test_bit(R5_ReadError, &dev->flags)) - clear_bit(R5_Insync, &dev->flags); - if (!test_bit(R5_Insync, &dev->flags)) { - if (s.failed < 2) - r6s.failed_num[s.failed] = i; - s.failed++; - } + if (s.handle_bad_blocks) { + set_bit(STRIPE_HANDLE, &sh->state); + goto finish; } - rcu_read_unlock(); - if (unlikely(blocked_rdev)) { + if (unlikely(s.blocked_rdev)) { if (s.syncing || s.expanding || s.expanded || s.to_write || s.written) { set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + goto finish; } /* There is nothing for the blocked_rdev to block */ - rdev_dec_pending(blocked_rdev, conf->mddev); - blocked_rdev = NULL; + rdev_dec_pending(s.blocked_rdev, conf->mddev); + s.blocked_rdev = NULL; } if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { @@ -3408,83 +3161,88 @@ static void handle_stripe6(struct stripe_head *sh) pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, - r6s.failed_num[0], r6s.failed_num[1]); - /* check if the array has lost >2 devices and, if so, some requests - * might need to be failed + s.failed_num[0], s.failed_num[1]); + /* check if the array has lost more than max_degraded devices and, + * if so, some requests might need to be failed. */ - if (s.failed > 2 && s.to_read+s.to_write+s.written) - handle_failed_stripe(conf, sh, &s, disks, &return_bi); - if (s.failed > 2 && s.syncing) { - md_done_sync(conf->mddev, STRIPE_SECTORS,0); - clear_bit(STRIPE_SYNCING, &sh->state); - s.syncing = 0; - } + if (s.failed > conf->max_degraded && s.to_read+s.to_write+s.written) + handle_failed_stripe(conf, sh, &s, disks, &s.return_bi); + if (s.failed > conf->max_degraded && s.syncing) + handle_failed_sync(conf, sh, &s); /* * might be able to return some write requests if the parity blocks * are safe, or on a failed drive */ - pdev = &sh->dev[pd_idx]; - r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) - || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); - qdev = &sh->dev[qd_idx]; - r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == qd_idx) - || (s.failed >= 2 && r6s.failed_num[1] == qd_idx); - - if ( s.written && - ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) + pdev = &sh->dev[sh->pd_idx]; + s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx); + qdev = &sh->dev[sh->qd_idx]; + s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx) + || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx) + || conf->level < 6; + + if (s.written && + (s.p_failed || ((test_bit(R5_Insync, &pdev->flags) && !test_bit(R5_LOCKED, &pdev->flags) && test_bit(R5_UPTODATE, &pdev->flags)))) && - ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) + (s.q_failed || ((test_bit(R5_Insync, &qdev->flags) && !test_bit(R5_LOCKED, &qdev->flags) && test_bit(R5_UPTODATE, &qdev->flags))))) - handle_stripe_clean_event(conf, sh, disks, &return_bi); + handle_stripe_clean_event(conf, sh, disks, &s.return_bi); /* Now we might consider reading some blocks, either to check/generate * parity, or to satisfy requests * or to load a block that is being partially written. */ - if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || - (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) - handle_stripe_fill6(sh, &s, &r6s, disks); + if (s.to_read || s.non_overwrite + || (conf->level == 6 && s.to_write && s.failed) + || (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding) + handle_stripe_fill(sh, &s, disks); /* Now we check to see if any write operations have recently * completed */ - if (sh->reconstruct_state == reconstruct_state_drain_result) { - + prexor = 0; + if (sh->reconstruct_state == reconstruct_state_prexor_drain_result) + prexor = 1; + if (sh->reconstruct_state == reconstruct_state_drain_result || + sh->reconstruct_state == reconstruct_state_prexor_drain_result) { sh->reconstruct_state = reconstruct_state_idle; - /* All the 'written' buffers and the parity blocks are ready to + + /* All the 'written' buffers and the parity block are ready to * be written back to disk */ BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[qd_idx].flags)); + BUG_ON(sh->qd_idx >= 0 && + !test_bit(R5_UPTODATE, &sh->dev[sh->qd_idx].flags)); for (i = disks; i--; ) { - dev = &sh->dev[i]; + struct r5dev *dev = &sh->dev[i]; if (test_bit(R5_LOCKED, &dev->flags) && - (i == sh->pd_idx || i == qd_idx || - dev->written)) { + (i == sh->pd_idx || i == sh->qd_idx || + dev->written)) { pr_debug("Writing block %d\n", i); - BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); set_bit(R5_Wantwrite, &dev->flags); + if (prexor) + continue; if (!test_bit(R5_Insync, &dev->flags) || - ((i == sh->pd_idx || i == qd_idx) && - s.failed == 0)) + ((i == sh->pd_idx || i == sh->qd_idx) && + s.failed == 0)) set_bit(STRIPE_INSYNC, &sh->state); } } if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) - dec_preread_active = 1; + s.dec_preread_active = 1; } /* Now to consider new write requests and what else, if anything * should be read. We do not handle new writes when: - * 1/ A 'write' operation (copy+gen_syndrome) is already in flight. + * 1/ A 'write' operation (copy+xor) is already in flight. * 2/ A 'check' operation is in flight, as it may clobber the parity * block. */ if (s.to_write && !sh->reconstruct_state && !sh->check_state) - handle_stripe_dirtying6(conf, sh, &s, &r6s, disks); + handle_stripe_dirtying(conf, sh, &s, disks); /* maybe we need to check and possibly fix the parity for this stripe * Any reads will already have been scheduled, so we just see if enough @@ -3494,20 +3252,24 @@ static void handle_stripe6(struct stripe_head *sh) if (sh->check_state || (s.syncing && s.locked == 0 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state) && - !test_bit(STRIPE_INSYNC, &sh->state))) - handle_parity_checks6(conf, sh, &s, &r6s, disks); + !test_bit(STRIPE_INSYNC, &sh->state))) { + if (conf->level == 6) + handle_parity_checks6(conf, sh, &s, disks); + else + handle_parity_checks5(conf, sh, &s, disks); + } if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { - md_done_sync(conf->mddev, STRIPE_SECTORS,1); + md_done_sync(conf->mddev, STRIPE_SECTORS, 1); clear_bit(STRIPE_SYNCING, &sh->state); } /* If the failed drives are just a ReadError, then we might need * to progress the repair/check process */ - if (s.failed <= 2 && !conf->mddev->ro) + if (s.failed <= conf->max_degraded && !conf->mddev->ro) for (i = 0; i < s.failed; i++) { - dev = &sh->dev[r6s.failed_num[i]]; + struct r5dev *dev = &sh->dev[s.failed_num[i]]; if (test_bit(R5_ReadError, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && test_bit(R5_UPTODATE, &dev->flags) @@ -3526,8 +3288,26 @@ static void handle_stripe6(struct stripe_head *sh) } } + /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { + struct stripe_head *sh_src + = get_active_stripe(conf, sh->sector, 1, 1, 1); + if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { + /* sh cannot be written until sh_src has been read. + * so arrange for sh to be delayed a little + */ + set_bit(STRIPE_DELAYED, &sh->state); + set_bit(STRIPE_HANDLE, &sh->state); + if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, + &sh_src->state)) + atomic_inc(&conf->preread_active_stripes); + release_stripe(sh_src); + goto finish; + } + if (sh_src) + release_stripe(sh_src); + sh->reconstruct_state = reconstruct_state_idle; clear_bit(STRIPE_EXPANDING, &sh->state); for (i = conf->raid_disks; i--; ) { @@ -3539,24 +3319,7 @@ static void handle_stripe6(struct stripe_head *sh) if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && !sh->reconstruct_state) { - struct stripe_head *sh2 - = get_active_stripe(conf, sh->sector, 1, 1, 1); - if (sh2 && test_bit(STRIPE_EXPAND_SOURCE, &sh2->state)) { - /* sh cannot be written until sh2 has been read. - * so arrange for sh to be delayed a little - */ - set_bit(STRIPE_DELAYED, &sh->state); - set_bit(STRIPE_HANDLE, &sh->state); - if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, - &sh2->state)) - atomic_inc(&conf->preread_active_stripes); - release_stripe(sh2); - goto unlock; - } - if (sh2) - release_stripe(sh2); - - /* Need to write out all blocks after computing P&Q */ + /* Need to write out all blocks after computing parity */ sh->disks = conf->raid_disks; stripe_set_idx(sh->sector, conf, 0, sh); schedule_reconstruction(sh, &s, 1, 1); @@ -3569,22 +3332,39 @@ static void handle_stripe6(struct stripe_head *sh) if (s.expanding && s.locked == 0 && !test_bit(STRIPE_COMPUTE_RUN, &sh->state)) - handle_stripe_expansion(conf, sh, &r6s); - - unlock: - spin_unlock(&sh->lock); + handle_stripe_expansion(conf, sh); +finish: /* wait for this device to become unblocked */ - if (unlikely(blocked_rdev)) - md_wait_for_blocked_rdev(blocked_rdev, conf->mddev); + if (unlikely(s.blocked_rdev)) + md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev); + + if (s.handle_bad_blocks) + for (i = disks; i--; ) { + mdk_rdev_t *rdev; + struct r5dev *dev = &sh->dev[i]; + if (test_and_clear_bit(R5_WriteError, &dev->flags)) { + /* We own a safe reference to the rdev */ + rdev = conf->disks[i].rdev; + if (!rdev_set_badblocks(rdev, sh->sector, + STRIPE_SECTORS, 0)) + md_error(conf->mddev, rdev); + rdev_dec_pending(rdev, conf->mddev); + } + if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { + rdev = conf->disks[i].rdev; + rdev_clear_badblocks(rdev, sh->sector, + STRIPE_SECTORS); + rdev_dec_pending(rdev, conf->mddev); + } + } if (s.ops_request) raid_run_ops(sh, s.ops_request); ops_run_io(sh, &s); - - if (dec_preread_active) { + if (s.dec_preread_active) { /* We delay this until after ops_run_io so that if make_request * is waiting on a flush, it won't continue until the writes * have actually been submitted. @@ -3595,15 +3375,9 @@ static void handle_stripe6(struct stripe_head *sh) md_wakeup_thread(conf->mddev->thread); } - return_io(return_bi); -} + return_io(s.return_bi); -static void handle_stripe(struct stripe_head *sh) -{ - if (sh->raid_conf->level == 6) - handle_stripe6(sh); - else - handle_stripe5(sh); + clear_bit(STRIPE_ACTIVE, &sh->state); } static void raid5_activate_delayed(raid5_conf_t *conf) @@ -3833,6 +3607,9 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) rcu_read_lock(); rdev = rcu_dereference(conf->disks[dd_idx].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + sector_t first_bad; + int bad_sectors; + atomic_inc(&rdev->nr_pending); rcu_read_unlock(); raid_bio->bi_next = (void*)rdev; @@ -3840,8 +3617,10 @@ static int chunk_aligned_read(mddev_t *mddev, struct bio * raid_bio) align_bi->bi_flags &= ~(1 << BIO_SEG_VALID); align_bi->bi_sector += rdev->data_offset; - if (!bio_fits_rdev(align_bi)) { - /* too big in some way */ + if (!bio_fits_rdev(align_bi) || + is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9, + &first_bad, &bad_sectors)) { + /* too big in some way, or has a known bad block */ bio_put(align_bi); rdev_dec_pending(rdev, mddev); return 0; @@ -4016,7 +3795,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) } } - if (bio_data_dir(bi) == WRITE && + if (rw == WRITE && logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { release_stripe(sh); @@ -4034,7 +3813,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) } if (test_bit(STRIPE_EXPANDING, &sh->state) || - !add_stripe_bio(sh, bi, dd_idx, (bi->bi_rw&RW_MASK))) { + !add_stripe_bio(sh, bi, dd_idx, rw)) { /* Stripe is busy expanding or * add failed due to overlap. Flush everything * and wait a while @@ -4375,10 +4154,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); - spin_lock(&sh->lock); - set_bit(STRIPE_SYNCING, &sh->state); - clear_bit(STRIPE_INSYNC, &sh->state); - spin_unlock(&sh->lock); + set_bit(STRIPE_SYNC_REQUESTED, &sh->state); handle_stripe(sh); release_stripe(sh); @@ -4509,6 +4285,9 @@ static void raid5d(mddev_t *mddev) release_stripe(sh); cond_resched(); + if (mddev->flags & ~(1<<MD_CHANGE_PENDING)) + md_check_recovery(mddev); + spin_lock_irq(&conf->device_lock); } pr_debug("%d stripes handled\n", handled); @@ -5313,6 +5092,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number) * isn't possible. */ if (!test_bit(Faulty, &rdev->flags) && + mddev->recovery_disabled != conf->recovery_disabled && !has_failed(conf) && number < conf->raid_disks) { err = -EBUSY; @@ -5341,6 +5121,9 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int first = 0; int last = conf->raid_disks - 1; + if (mddev->recovery_disabled == conf->recovery_disabled) + return -EBUSY; + if (has_failed(conf)) /* no point adding a device */ return -EINVAL; @@ -5519,16 +5302,14 @@ static int raid5_start_reshape(mddev_t *mddev) if (rdev->raid_disk < 0 && !test_bit(Faulty, &rdev->flags)) { if (raid5_add_disk(mddev, rdev) == 0) { - char nm[20]; if (rdev->raid_disk >= conf->previous_raid_disks) { set_bit(In_sync, &rdev->flags); added_devices++; } else rdev->recovery_offset = 0; - sprintf(nm, "rd%d", rdev->raid_disk); - if (sysfs_create_link(&mddev->kobj, - &rdev->kobj, nm)) + + if (sysfs_link_rdev(mddev, rdev)) /* Failure here is OK */; } } else if (rdev->raid_disk >= conf->previous_raid_disks @@ -5624,9 +5405,7 @@ static void raid5_finish_reshape(mddev_t *mddev) d++) { mdk_rdev_t *rdev = conf->disks[d].rdev; if (rdev && raid5_remove_disk(mddev, d) == 0) { - char nm[20]; - sprintf(nm, "rd%d", rdev->raid_disk); - sysfs_remove_link(&mddev->kobj, nm); + sysfs_unlink_rdev(mddev, rdev); rdev->raid_disk = -1; } } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 3ca77a2613ba..11b9566184b2 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -6,11 +6,11 @@ /* * - * Each stripe contains one buffer per disc. Each buffer can be in + * Each stripe contains one buffer per device. Each buffer can be in * one of a number of states stored in "flags". Changes between - * these states happen *almost* exclusively under a per-stripe - * spinlock. Some very specific changes can happen in bi_end_io, and - * these are not protected by the spin lock. + * these states happen *almost* exclusively under the protection of the + * STRIPE_ACTIVE flag. Some very specific changes can happen in bi_end_io, and + * these are not protected by STRIPE_ACTIVE. * * The flag bits that are used to represent these states are: * R5_UPTODATE and R5_LOCKED @@ -76,12 +76,10 @@ * block and the cached buffer are successfully written, any buffer on * a written list can be returned with b_end_io. * - * The write list and read list both act as fifos. The read list is - * protected by the device_lock. The write and written lists are - * protected by the stripe lock. The device_lock, which can be - * claimed while the stipe lock is held, is only for list - * manipulations and will only be held for a very short time. It can - * be claimed from interrupts. + * The write list and read list both act as fifos. The read list, + * write list and written list are protected by the device_lock. + * The device_lock is only for list manipulations and will only be + * held for a very short time. It can be claimed from interrupts. * * * Stripes in the stripe cache can be on one of two lists (or on @@ -96,7 +94,6 @@ * * The inactive_list, handle_list and hash bucket lists are all protected by the * device_lock. - * - stripes on the inactive_list never have their stripe_lock held. * - stripes have a reference counter. If count==0, they are on a list. * - If a stripe might need handling, STRIPE_HANDLE is set. * - When refcount reaches zero, then if STRIPE_HANDLE it is put on @@ -116,10 +113,10 @@ * attach a request to an active stripe (add_stripe_bh()) * lockdev attach-buffer unlockdev * handle a stripe (handle_stripe()) - * lockstripe clrSTRIPE_HANDLE ... + * setSTRIPE_ACTIVE, clrSTRIPE_HANDLE ... * (lockdev check-buffers unlockdev) .. * change-state .. - * record io/ops needed unlockstripe schedule io/ops + * record io/ops needed clearSTRIPE_ACTIVE schedule io/ops * release an active stripe (release_stripe()) * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev * @@ -128,8 +125,7 @@ * on a cached buffer, and plus one if the stripe is undergoing stripe * operations. * - * Stripe operations are performed outside the stripe lock, - * the stripe operations are: + * The stripe operations are: * -copying data between the stripe cache and user application buffers * -computing blocks to save a disk access, or to recover a missing block * -updating the parity on a write operation (reconstruct write and @@ -159,7 +155,8 @@ */ /* - * Operations state - intermediate states that are visible outside of sh->lock + * Operations state - intermediate states that are visible outside of + * STRIPE_ACTIVE. * In general _idle indicates nothing is running, _run indicates a data * processing operation is active, and _result means the data processing result * is stable and can be acted upon. For simple operations like biofill and @@ -209,7 +206,6 @@ struct stripe_head { short ddf_layout;/* use DDF ordering to calculate Q */ unsigned long state; /* state flags */ atomic_t count; /* nr of active thread/requests */ - spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ enum check_states check_state; @@ -240,19 +236,20 @@ struct stripe_head { }; /* stripe_head_state - collects and tracks the dynamic state of a stripe_head - * for handle_stripe. It is only valid under spin_lock(sh->lock); + * for handle_stripe. */ struct stripe_head_state { int syncing, expanding, expanded; int locked, uptodate, to_read, to_write, failed, written; int to_fill, compute, req_compute, non_overwrite; - int failed_num; + int failed_num[2]; + int p_failed, q_failed; + int dec_preread_active; unsigned long ops_request; -}; -/* r6_state - extra state data only relevant to r6 */ -struct r6_state { - int p_failed, q_failed, failed_num[2]; + struct bio *return_bi; + mdk_rdev_t *blocked_rdev; + int handle_bad_blocks; }; /* Flags */ @@ -268,14 +265,16 @@ struct r6_state { #define R5_ReWrite 9 /* have tried to over-write the readerror */ #define R5_Expanded 10 /* This block now has post-expand data */ -#define R5_Wantcompute 11 /* compute_block in progress treat as - * uptodate - */ -#define R5_Wantfill 12 /* dev->toread contains a bio that needs - * filling - */ -#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ -#define R5_WantFUA 14 /* Write should be FUA */ +#define R5_Wantcompute 11 /* compute_block in progress treat as + * uptodate + */ +#define R5_Wantfill 12 /* dev->toread contains a bio that needs + * filling + */ +#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ +#define R5_WantFUA 14 /* Write should be FUA */ +#define R5_WriteError 15 /* got a write error - need to record it */ +#define R5_MadeGood 16 /* A bad block has been fixed by writing to it*/ /* * Write method */ @@ -289,21 +288,25 @@ struct r6_state { /* * Stripe state */ -#define STRIPE_HANDLE 2 -#define STRIPE_SYNCING 3 -#define STRIPE_INSYNC 4 -#define STRIPE_PREREAD_ACTIVE 5 -#define STRIPE_DELAYED 6 -#define STRIPE_DEGRADED 7 -#define STRIPE_BIT_DELAY 8 -#define STRIPE_EXPANDING 9 -#define STRIPE_EXPAND_SOURCE 10 -#define STRIPE_EXPAND_READY 11 -#define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ -#define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ -#define STRIPE_BIOFILL_RUN 14 -#define STRIPE_COMPUTE_RUN 15 -#define STRIPE_OPS_REQ_PENDING 16 +enum { + STRIPE_ACTIVE, + STRIPE_HANDLE, + STRIPE_SYNC_REQUESTED, + STRIPE_SYNCING, + STRIPE_INSYNC, + STRIPE_PREREAD_ACTIVE, + STRIPE_DELAYED, + STRIPE_DEGRADED, + STRIPE_BIT_DELAY, + STRIPE_EXPANDING, + STRIPE_EXPAND_SOURCE, + STRIPE_EXPAND_READY, + STRIPE_IO_STARTED, /* do not count towards 'bypass_count' */ + STRIPE_FULL_WRITE, /* all blocks are set to be overwritten */ + STRIPE_BIOFILL_RUN, + STRIPE_COMPUTE_RUN, + STRIPE_OPS_REQ_PENDING, +}; /* * Operation request flags @@ -336,7 +339,7 @@ struct r6_state { * PREREAD_ACTIVE. * In stripe_handle, if we find pre-reading is necessary, we do it if * PREREAD_ACTIVE is set, else we set DELAYED which will send it to the delayed queue. - * HANDLE gets cleared if stripe_handle leave nothing locked. + * HANDLE gets cleared if stripe_handle leaves nothing locked. */ @@ -399,7 +402,7 @@ struct raid5_private_data { * (fresh device added). * Cleared when a sync completes. */ - + int recovery_disabled; /* per cpu variables */ struct raid5_percpu { struct page *spare_page; /* Used when checking P/Q in raid6 */ diff --git a/drivers/net/Makefile b/drivers/net/Makefile index b7622c3745fa..e1eca2ab505e 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -282,6 +282,7 @@ obj-$(CONFIG_USB_HSO) += usb/ obj-$(CONFIG_USB_USBNET) += usb/ obj-$(CONFIG_USB_ZD1201) += usb/ obj-$(CONFIG_USB_IPHETH) += usb/ +obj-$(CONFIG_USB_CDC_PHONET) += usb/ obj-$(CONFIG_WLAN) += wireless/ obj-$(CONFIG_NET_TULIP) += tulip/ diff --git a/drivers/net/acenic.c b/drivers/net/acenic.c index 536038b22710..31798f5f5d06 100644 --- a/drivers/net/acenic.c +++ b/drivers/net/acenic.c @@ -1502,13 +1502,13 @@ static int __devinit ace_init(struct net_device *dev) * firmware to wipe the ring without re-initializing it. */ if (!test_and_set_bit(0, &ap->std_refill_busy)) - ace_load_std_rx_ring(ap, RX_RING_SIZE); + ace_load_std_rx_ring(dev, RX_RING_SIZE); else printk(KERN_ERR "%s: Someone is busy refilling the RX ring\n", ap->name); if (ap->version >= 2) { if (!test_and_set_bit(0, &ap->mini_refill_busy)) - ace_load_mini_rx_ring(ap, RX_MINI_SIZE); + ace_load_mini_rx_ring(dev, RX_MINI_SIZE); else printk(KERN_ERR "%s: Someone is busy refilling " "the RX mini ring\n", ap->name); @@ -1584,9 +1584,10 @@ static void ace_watchdog(struct net_device *data) } -static void ace_tasklet(unsigned long dev) +static void ace_tasklet(unsigned long arg) { - struct ace_private *ap = netdev_priv((struct net_device *)dev); + struct net_device *dev = (struct net_device *) arg; + struct ace_private *ap = netdev_priv(dev); int cur_size; cur_size = atomic_read(&ap->cur_rx_bufs); @@ -1595,7 +1596,7 @@ static void ace_tasklet(unsigned long dev) #ifdef DEBUG printk("refilling buffers (current %i)\n", cur_size); #endif - ace_load_std_rx_ring(ap, RX_RING_SIZE - cur_size); + ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); } if (ap->version >= 2) { @@ -1606,7 +1607,7 @@ static void ace_tasklet(unsigned long dev) printk("refilling mini buffers (current %i)\n", cur_size); #endif - ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size); + ace_load_mini_rx_ring(dev, RX_MINI_SIZE - cur_size); } } @@ -1616,7 +1617,7 @@ static void ace_tasklet(unsigned long dev) #ifdef DEBUG printk("refilling jumbo buffers (current %i)\n", cur_size); #endif - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE - cur_size); } ap->tasklet_pending = 0; } @@ -1642,8 +1643,9 @@ static void ace_dump_trace(struct ace_private *ap) * done only before the device is enabled, thus no interrupts are * generated and by the interrupt handler/tasklet handler. */ -static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1657,11 +1659,10 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_STD_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_STD_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_STD_BUFSIZE, @@ -1705,8 +1706,9 @@ static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs) } -static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1718,11 +1720,10 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_MINI_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_MINI_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_MINI_BUFSIZE, @@ -1762,8 +1763,9 @@ static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs) * Load the jumbo rx ring, this may happen at any time if the MTU * is changed to a value > 1500. */ -static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs) +static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs) { + struct ace_private *ap = netdev_priv(dev); struct ace_regs __iomem *regs = ap->regs; short i, idx; @@ -1774,11 +1776,10 @@ static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs) struct rx_desc *rd; dma_addr_t mapping; - skb = dev_alloc_skb(ACE_JUMBO_BUFSIZE + NET_IP_ALIGN); + skb = netdev_alloc_skb_ip_align(dev, ACE_JUMBO_BUFSIZE); if (!skb) break; - skb_reserve(skb, NET_IP_ALIGN); mapping = pci_map_page(ap->pdev, virt_to_page(skb->data), offset_in_page(skb->data), ACE_JUMBO_BUFSIZE, @@ -2196,7 +2197,7 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) #ifdef DEBUG printk("low on std buffers %i\n", cur_size); #endif - ace_load_std_rx_ring(ap, + ace_load_std_rx_ring(dev, RX_RING_SIZE - cur_size); } else run_tasklet = 1; @@ -2212,7 +2213,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) printk("low on mini buffers %i\n", cur_size); #endif - ace_load_mini_rx_ring(ap, RX_MINI_SIZE - cur_size); + ace_load_mini_rx_ring(dev, + RX_MINI_SIZE - cur_size); } else run_tasklet = 1; } @@ -2228,7 +2230,8 @@ static irqreturn_t ace_interrupt(int irq, void *dev_id) printk("low on jumbo buffers %i\n", cur_size); #endif - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE - cur_size); + ace_load_jumbo_rx_ring(dev, + RX_JUMBO_SIZE - cur_size); } else run_tasklet = 1; } @@ -2267,7 +2270,7 @@ static int ace_open(struct net_device *dev) if (ap->jumbo && !test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); if (dev->flags & IFF_PROMISC) { cmd.evt = C_SET_PROMISC_MODE; @@ -2575,7 +2578,7 @@ static int ace_change_mtu(struct net_device *dev, int new_mtu) "support\n", dev->name); ap->jumbo = 1; if (!test_and_set_bit(0, &ap->jumbo_refill_busy)) - ace_load_jumbo_rx_ring(ap, RX_JUMBO_SIZE); + ace_load_jumbo_rx_ring(dev, RX_JUMBO_SIZE); ace_set_rxtx_parms(dev, 1); } } else { diff --git a/drivers/net/acenic.h b/drivers/net/acenic.h index f67dc9b0eb80..51c486cfbb8c 100644 --- a/drivers/net/acenic.h +++ b/drivers/net/acenic.h @@ -766,9 +766,9 @@ static inline void ace_unmask_irq(struct net_device *dev) * Prototypes */ static int ace_init(struct net_device *dev); -static void ace_load_std_rx_ring(struct ace_private *ap, int nr_bufs); -static void ace_load_mini_rx_ring(struct ace_private *ap, int nr_bufs); -static void ace_load_jumbo_rx_ring(struct ace_private *ap, int nr_bufs); +static void ace_load_std_rx_ring(struct net_device *dev, int nr_bufs); +static void ace_load_mini_rx_ring(struct net_device *dev, int nr_bufs); +static void ace_load_jumbo_rx_ring(struct net_device *dev, int nr_bufs); static irqreturn_t ace_interrupt(int irq, void *dev_id); static int ace_load_firmware(struct net_device *dev); static int ace_open(struct net_device *dev); diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index 02842d05c11f..38a83acd502e 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -1557,8 +1557,10 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev) if (slave_dev->type != ARPHRD_ETHER) bond_setup_by_slave(bond_dev, slave_dev); - else + else { ether_setup(bond_dev); + bond_dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } netdev_bonding_change(bond_dev, NETDEV_POST_TYPE_CHANGE); @@ -4330,7 +4332,7 @@ static void bond_setup(struct net_device *bond_dev) bond_dev->tx_queue_len = 0; bond_dev->flags |= IFF_MASTER|IFF_MULTICAST; bond_dev->priv_flags |= IFF_BONDING; - bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + bond_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); /* At first, we block adding VLANs. That's the only way to * prevent problems that occur when adding VLANs over an @@ -4691,7 +4693,7 @@ static int bond_check_params(struct bond_params *params) /* miimon and arp_interval not set, we need one so things * work as expected, see bonding.txt for details */ - pr_warning("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); + pr_debug("Warning: either miimon or arp_interval and arp_ip_target module parameters must be specified, otherwise bonding will not detect link failures! see bonding.txt for details.\n"); } if (primary && !USES_PRIMARY(bond_mode)) { diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index b60835f58650..2dfb4bf90087 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -1025,6 +1025,7 @@ static ssize_t bonding_store_primary(struct device *d, int i; struct slave *slave; struct bonding *bond = to_bond(d); + char ifname[IFNAMSIZ]; if (!rtnl_trylock()) return restart_syscall(); @@ -1035,32 +1036,33 @@ static ssize_t bonding_store_primary(struct device *d, if (!USES_PRIMARY(bond->params.mode)) { pr_info("%s: Unable to set primary slave; %s is in mode %d\n", bond->dev->name, bond->dev->name, bond->params.mode); - } else { - bond_for_each_slave(bond, slave, i) { - if (strnicmp - (slave->dev->name, buf, - strlen(slave->dev->name)) == 0) { - pr_info("%s: Setting %s as primary slave.\n", - bond->dev->name, slave->dev->name); - bond->primary_slave = slave; - strcpy(bond->params.primary, slave->dev->name); - bond_select_active_slave(bond); - goto out; - } - } + goto out; + } - /* if we got here, then we didn't match the name of any slave */ + sscanf(buf, "%16s", ifname); /* IFNAMSIZ */ - if (strlen(buf) == 0 || buf[0] == '\n') { - pr_info("%s: Setting primary slave to None.\n", - bond->dev->name); - bond->primary_slave = NULL; - bond_select_active_slave(bond); - } else { - pr_info("%s: Unable to set %.*s as primary slave as it is not a slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); + /* check to see if we are clearing primary */ + if (!strlen(ifname) || buf[0] == '\n') { + pr_info("%s: Setting primary slave to None.\n", + bond->dev->name); + bond->primary_slave = NULL; + bond_select_active_slave(bond); + goto out; + } + + bond_for_each_slave(bond, slave, i) { + if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { + pr_info("%s: Setting %s as primary slave.\n", + bond->dev->name, slave->dev->name); + bond->primary_slave = slave; + strcpy(bond->params.primary, slave->dev->name); + bond_select_active_slave(bond); + goto out; } } + + pr_info("%s: Unable to set %.*s as primary slave.\n", + bond->dev->name, (int)strlen(buf) - 1, buf); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); @@ -1195,6 +1197,7 @@ static ssize_t bonding_store_active_slave(struct device *d, struct slave *old_active = NULL; struct slave *new_active = NULL; struct bonding *bond = to_bond(d); + char ifname[IFNAMSIZ]; if (!rtnl_trylock()) return restart_syscall(); @@ -1203,56 +1206,62 @@ static ssize_t bonding_store_active_slave(struct device *d, read_lock(&bond->lock); write_lock_bh(&bond->curr_slave_lock); - if (!USES_PRIMARY(bond->params.mode)) + if (!USES_PRIMARY(bond->params.mode)) { pr_info("%s: Unable to change active slave; %s is in mode %d\n", bond->dev->name, bond->dev->name, bond->params.mode); - else { - bond_for_each_slave(bond, slave, i) { - if (strnicmp - (slave->dev->name, buf, - strlen(slave->dev->name)) == 0) { - old_active = bond->curr_active_slave; - new_active = slave; - if (new_active == old_active) { - /* do nothing */ - pr_info("%s: %s is already the current active slave.\n", + goto out; + } + + sscanf(buf, "%16s", ifname); /* IFNAMSIZ */ + + /* check to see if we are clearing active */ + if (!strlen(ifname) || buf[0] == '\n') { + pr_info("%s: Clearing current active slave.\n", + bond->dev->name); + bond->curr_active_slave = NULL; + bond_select_active_slave(bond); + goto out; + } + + bond_for_each_slave(bond, slave, i) { + if (strncmp(slave->dev->name, ifname, IFNAMSIZ) == 0) { + old_active = bond->curr_active_slave; + new_active = slave; + if (new_active == old_active) { + /* do nothing */ + pr_info("%s: %s is already the current" + " active slave.\n", + bond->dev->name, + slave->dev->name); + goto out; + } + else { + if ((new_active) && + (old_active) && + (new_active->link == BOND_LINK_UP) && + IS_UP(new_active->dev)) { + pr_info("%s: Setting %s as active" + " slave.\n", bond->dev->name, slave->dev->name); - goto out; + bond_change_active_slave(bond, + new_active); } else { - if ((new_active) && - (old_active) && - (new_active->link == BOND_LINK_UP) && - IS_UP(new_active->dev)) { - pr_info("%s: Setting %s as active slave.\n", - bond->dev->name, - slave->dev->name); - bond_change_active_slave(bond, new_active); - } - else { - pr_info("%s: Could not set %s as active slave; either %s is down or the link is down.\n", - bond->dev->name, - slave->dev->name, - slave->dev->name); - } - goto out; + pr_info("%s: Could not set %s as" + " active slave; either %s is" + " down or the link is down.\n", + bond->dev->name, + slave->dev->name, + slave->dev->name); } + goto out; } } - - /* if we got here, then we didn't match the name of any slave */ - - if (strlen(buf) == 0 || buf[0] == '\n') { - pr_info("%s: Setting active slave to None.\n", - bond->dev->name); - bond->primary_slave = NULL; - bond_select_active_slave(bond); - } else { - pr_info("%s: Unable to set %.*s as active slave as it is not a slave.\n", - bond->dev->name, (int)strlen(buf) - 1, buf); - } } + + pr_info("%s: Unable to set %.*s as active slave.\n", + bond->dev->name, (int)strlen(buf) - 1, buf); out: write_unlock_bh(&bond->curr_slave_lock); read_unlock(&bond->lock); diff --git a/drivers/net/forcedeth.c b/drivers/net/forcedeth.c index e64cd9ceac3f..e55df308a3af 100644 --- a/drivers/net/forcedeth.c +++ b/drivers/net/forcedeth.c @@ -2764,7 +2764,14 @@ static int nv_rx_process_optimized(struct net_device *dev, int limit) prefetch(skb->data); vlanflags = le32_to_cpu(np->get_rx.ex->buflow); - if (vlanflags & NV_RX3_VLAN_TAG_PRESENT) { + + /* + * There's need to check for NETIF_F_HW_VLAN_RX here. + * Even if vlan rx accel is disabled, + * NV_RX3_VLAN_TAG_PRESENT is pseudo randomly set. + */ + if (dev->features & NETIF_F_HW_VLAN_RX && + vlanflags & NV_RX3_VLAN_TAG_PRESENT) { u16 vid = vlanflags & NV_RX3_VLAN_TAG_MASK; __vlan_hwaccel_put_tag(skb, vid); @@ -5331,15 +5338,16 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK; dev->hw_features |= NETIF_F_IP_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_RXCSUM; - dev->features |= dev->hw_features; } np->vlanctl_bits = 0; if (id->driver_data & DEV_HAS_VLAN) { np->vlanctl_bits = NVREG_VLANCONTROL_ENABLE; - dev->features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; + dev->hw_features |= NETIF_F_HW_VLAN_RX | NETIF_F_HW_VLAN_TX; } + dev->features |= dev->hw_features; + np->pause_flags = NV_PAUSEFRAME_RX_CAPABLE | NV_PAUSEFRAME_RX_REQ | NV_PAUSEFRAME_AUTONEG; if ((id->driver_data & DEV_HAS_PAUSEFRAME_TX_V1) || (id->driver_data & DEV_HAS_PAUSEFRAME_TX_V2) || @@ -5607,6 +5615,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i goto out_error; } + nv_vlan_mode(dev, dev->features); + netif_carrier_off(dev); dev_info(&pci_dev->dev, "ifname %s, PHY OUI 0x%x @ %d, addr %pM\n", diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 835cd2588148..2659daad783d 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -388,12 +388,8 @@ static void gfar_init_mac(struct net_device *ndev) if (priv->hwts_rx_en) rctrl |= RCTRL_PRSDEP_INIT | RCTRL_TS_ENABLE; - /* keep vlan related bits if it's enabled */ - if (ndev->features & NETIF_F_HW_VLAN_TX) - rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; - if (ndev->features & NETIF_F_HW_VLAN_RX) - tctrl |= TCTRL_VLINS; + rctrl |= RCTRL_VLEX | RCTRL_PRSDEP_INIT; /* Init rctrl based on our settings */ gfar_write(®s->rctrl, rctrl); diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c index 6e82dd32e806..46b5f5fd686b 100644 --- a/drivers/net/ifb.c +++ b/drivers/net/ifb.c @@ -183,7 +183,7 @@ static void ifb_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->flags &= ~IFF_MULTICAST; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); random_ether_addr(dev->dev_addr); } diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index ba631fcece34..05172c39a0ce 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -572,7 +572,7 @@ void macvlan_common_setup(struct net_device *dev) { ether_setup(dev); - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->netdev_ops = &macvlan_netdev_ops; dev->destructor = free_netdev; dev->header_ops = &macvlan_hard_header_ops, diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c index 803576568154..dc3fbf61910b 100644 --- a/drivers/net/tg3.c +++ b/drivers/net/tg3.c @@ -190,6 +190,7 @@ static inline void _tg3_flag_clear(enum TG3_FLAGS flag, unsigned long *bits) /* minimum number of free TX descriptors required to wake up TX process */ #define TG3_TX_WAKEUP_THRESH(tnapi) ((tnapi)->tx_pending / 4) +#define TG3_TX_BD_DMA_MAX 4096 #define TG3_RAW_IP_ALIGN 2 @@ -4824,7 +4825,7 @@ static void tg3_tx(struct tg3_napi *tnapi) txq = netdev_get_tx_queue(tp->dev, index); while (sw_idx != hw_idx) { - struct ring_info *ri = &tnapi->tx_buffers[sw_idx]; + struct tg3_tx_ring_info *ri = &tnapi->tx_buffers[sw_idx]; struct sk_buff *skb = ri->skb; int i, tx_bug = 0; @@ -4840,6 +4841,12 @@ static void tg3_tx(struct tg3_napi *tnapi) ri->skb = NULL; + while (ri->fragmented) { + ri->fragmented = false; + sw_idx = NEXT_TX(sw_idx); + ri = &tnapi->tx_buffers[sw_idx]; + } + sw_idx = NEXT_TX(sw_idx); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { @@ -4851,6 +4858,13 @@ static void tg3_tx(struct tg3_napi *tnapi) dma_unmap_addr(ri, mapping), skb_shinfo(skb)->frags[i].size, PCI_DMA_TODEVICE); + + while (ri->fragmented) { + ri->fragmented = false; + sw_idx = NEXT_TX(sw_idx); + ri = &tnapi->tx_buffers[sw_idx]; + } + sw_idx = NEXT_TX(sw_idx); } @@ -5901,40 +5915,100 @@ static inline int tg3_40bit_overflow_test(struct tg3 *tp, dma_addr_t mapping, #endif } -static void tg3_set_txd(struct tg3_napi *tnapi, int entry, - dma_addr_t mapping, int len, u32 flags, - u32 mss_and_is_end) +static inline void tg3_tx_set_bd(struct tg3_tx_buffer_desc *txbd, + dma_addr_t mapping, u32 len, u32 flags, + u32 mss, u32 vlan) +{ + txbd->addr_hi = ((u64) mapping >> 32); + txbd->addr_lo = ((u64) mapping & 0xffffffff); + txbd->len_flags = (len << TXD_LEN_SHIFT) | (flags & 0x0000ffff); + txbd->vlan_tag = (mss << TXD_MSS_SHIFT) | (vlan << TXD_VLAN_TAG_SHIFT); +} + +static bool tg3_tx_frag_set(struct tg3_napi *tnapi, u32 *entry, u32 *budget, + dma_addr_t map, u32 len, u32 flags, + u32 mss, u32 vlan) { - struct tg3_tx_buffer_desc *txd = &tnapi->tx_ring[entry]; - int is_end = (mss_and_is_end & 0x1); - u32 mss = (mss_and_is_end >> 1); - u32 vlan_tag = 0; + struct tg3 *tp = tnapi->tp; + bool hwbug = false; + + if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8) + hwbug = 1; + + if (tg3_4g_overflow_test(map, len)) + hwbug = 1; + + if (tg3_40bit_overflow_test(tp, map, len)) + hwbug = 1; + + if (tg3_flag(tp, 4K_FIFO_LIMIT)) { + u32 tmp_flag = flags & ~TXD_FLAG_END; + while (len > TG3_TX_BD_DMA_MAX) { + u32 frag_len = TG3_TX_BD_DMA_MAX; + len -= TG3_TX_BD_DMA_MAX; + + if (len) { + tnapi->tx_buffers[*entry].fragmented = true; + /* Avoid the 8byte DMA problem */ + if (len <= 8) { + len += TG3_TX_BD_DMA_MAX / 2; + frag_len = TG3_TX_BD_DMA_MAX / 2; + } + } else + tmp_flag = flags; + + if (*budget) { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + frag_len, tmp_flag, mss, vlan); + (*budget)--; + *entry = NEXT_TX(*entry); + } else { + hwbug = 1; + break; + } + + map += frag_len; + } - if (is_end) - flags |= TXD_FLAG_END; - if (flags & TXD_FLAG_VLAN) { - vlan_tag = flags >> 16; - flags &= 0xffff; + if (len) { + if (*budget) { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + len, flags, mss, vlan); + (*budget)--; + *entry = NEXT_TX(*entry); + } else { + hwbug = 1; + } + } + } else { + tg3_tx_set_bd(&tnapi->tx_ring[*entry], map, + len, flags, mss, vlan); + *entry = NEXT_TX(*entry); } - vlan_tag |= (mss << TXD_MSS_SHIFT); - txd->addr_hi = ((u64) mapping >> 32); - txd->addr_lo = ((u64) mapping & 0xffffffff); - txd->len_flags = (len << TXD_LEN_SHIFT) | flags; - txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT; + return hwbug; } -static void tg3_skb_error_unmap(struct tg3_napi *tnapi, - struct sk_buff *skb, int last) +static void tg3_tx_skb_unmap(struct tg3_napi *tnapi, u32 entry, int last) { int i; - u32 entry = tnapi->tx_prod; - struct ring_info *txb = &tnapi->tx_buffers[entry]; + struct sk_buff *skb; + struct tg3_tx_ring_info *txb = &tnapi->tx_buffers[entry]; + + skb = txb->skb; + txb->skb = NULL; pci_unmap_single(tnapi->tp->pdev, dma_unmap_addr(txb, mapping), skb_headlen(skb), PCI_DMA_TODEVICE); + + while (txb->fragmented) { + txb->fragmented = false; + entry = NEXT_TX(entry); + txb = &tnapi->tx_buffers[entry]; + } + for (i = 0; i < last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -5944,18 +6018,24 @@ static void tg3_skb_error_unmap(struct tg3_napi *tnapi, pci_unmap_page(tnapi->tp->pdev, dma_unmap_addr(txb, mapping), frag->size, PCI_DMA_TODEVICE); + + while (txb->fragmented) { + txb->fragmented = false; + entry = NEXT_TX(entry); + txb = &tnapi->tx_buffers[entry]; + } } } /* Workaround 4GB and 40-bit hardware DMA bugs. */ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, struct sk_buff *skb, - u32 base_flags, u32 mss) + u32 *entry, u32 *budget, + u32 base_flags, u32 mss, u32 vlan) { struct tg3 *tp = tnapi->tp; struct sk_buff *new_skb; dma_addr_t new_addr = 0; - u32 entry = tnapi->tx_prod; int ret = 0; if (GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5701) @@ -5976,24 +6056,22 @@ static int tigon3_dma_hwbug_workaround(struct tg3_napi *tnapi, PCI_DMA_TODEVICE); /* Make sure the mapping succeeded */ if (pci_dma_mapping_error(tp->pdev, new_addr)) { - ret = -1; dev_kfree_skb(new_skb); - - /* Make sure new skb does not cross any 4G boundaries. - * Drop the packet if it does. - */ - } else if (tg3_4g_overflow_test(new_addr, new_skb->len)) { - pci_unmap_single(tp->pdev, new_addr, new_skb->len, - PCI_DMA_TODEVICE); ret = -1; - dev_kfree_skb(new_skb); } else { - tnapi->tx_buffers[entry].skb = new_skb; - dma_unmap_addr_set(&tnapi->tx_buffers[entry], + base_flags |= TXD_FLAG_END; + + tnapi->tx_buffers[*entry].skb = new_skb; + dma_unmap_addr_set(&tnapi->tx_buffers[*entry], mapping, new_addr); - tg3_set_txd(tnapi, entry, new_addr, new_skb->len, - base_flags, 1 | (mss << 1)); + if (tg3_tx_frag_set(tnapi, entry, budget, new_addr, + new_skb->len, base_flags, + mss, vlan)) { + tg3_tx_skb_unmap(tnapi, *entry, 0); + dev_kfree_skb(new_skb); + ret = -1; + } } } @@ -6051,7 +6129,8 @@ tg3_tso_bug_end: static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct tg3 *tp = netdev_priv(dev); - u32 len, entry, base_flags, mss; + u32 len, entry, base_flags, mss, vlan = 0; + u32 budget; int i = -1, would_hit_hwbug; dma_addr_t mapping; struct tg3_napi *tnapi; @@ -6063,12 +6142,14 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (tg3_flag(tp, ENABLE_TSS)) tnapi++; + budget = tg3_tx_avail(tnapi); + /* We are running in BH disabled context with netif_tx_lock * and TX reclaim runs via tp->napi.poll inside of a software * interrupt. Furthermore, IRQ processing runs lockless so we have * no IRQ context deadlocks to worry about either. Rejoice! */ - if (unlikely(tg3_tx_avail(tnapi) <= (skb_shinfo(skb)->nr_frags + 1))) { + if (unlikely(budget <= (skb_shinfo(skb)->nr_frags + 1))) { if (!netif_tx_queue_stopped(txq)) { netif_tx_stop_queue(txq); @@ -6153,9 +6234,12 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) } } - if (vlan_tx_tag_present(skb)) - base_flags |= (TXD_FLAG_VLAN | - (vlan_tx_tag_get(skb) << 16)); +#ifdef BCM_KERNEL_SUPPORTS_8021Q + if (vlan_tx_tag_present(skb)) { + base_flags |= TXD_FLAG_VLAN; + vlan = vlan_tx_tag_get(skb); + } +#endif if (tg3_flag(tp, USE_JUMBO_BDFLAG) && !mss && skb->len > VLAN_ETH_FRAME_LEN) @@ -6174,25 +6258,23 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) would_hit_hwbug = 0; - if (tg3_flag(tp, SHORT_DMA_BUG) && len <= 8) - would_hit_hwbug = 1; - - if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; - - if (tg3_40bit_overflow_test(tp, mapping, len)) - would_hit_hwbug = 1; - if (tg3_flag(tp, 5701_DMA_BUG)) would_hit_hwbug = 1; - tg3_set_txd(tnapi, entry, mapping, len, base_flags, - (skb_shinfo(skb)->nr_frags == 0) | (mss << 1)); - - entry = NEXT_TX(entry); + if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, len, base_flags | + ((skb_shinfo(skb)->nr_frags == 0) ? TXD_FLAG_END : 0), + mss, vlan)) + would_hit_hwbug = 1; /* Now loop through additional data fragments, and queue them. */ if (skb_shinfo(skb)->nr_frags > 0) { + u32 tmp_mss = mss; + + if (!tg3_flag(tp, HW_TSO_1) && + !tg3_flag(tp, HW_TSO_2) && + !tg3_flag(tp, HW_TSO_3)) + tmp_mss = 0; + last = skb_shinfo(skb)->nr_frags - 1; for (i = 0; i <= last; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; @@ -6209,39 +6291,25 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev) if (pci_dma_mapping_error(tp->pdev, mapping)) goto dma_error; - if (tg3_flag(tp, SHORT_DMA_BUG) && - len <= 8) + if (tg3_tx_frag_set(tnapi, &entry, &budget, mapping, + len, base_flags | + ((i == last) ? TXD_FLAG_END : 0), + tmp_mss, vlan)) would_hit_hwbug = 1; - - if (tg3_4g_overflow_test(mapping, len)) - would_hit_hwbug = 1; - - if (tg3_40bit_overflow_test(tp, mapping, len)) - would_hit_hwbug = 1; - - if (tg3_flag(tp, HW_TSO_1) || - tg3_flag(tp, HW_TSO_2) || - tg3_flag(tp, HW_TSO_3)) - tg3_set_txd(tnapi, entry, mapping, len, - base_flags, (i == last)|(mss << 1)); - else - tg3_set_txd(tnapi, entry, mapping, len, - base_flags, (i == last)); - - entry = NEXT_TX(entry); } } if (would_hit_hwbug) { - tg3_skb_error_unmap(tnapi, skb, i); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); /* If the workaround fails due to memory/mapping * failure, silently drop this packet. */ - if (tigon3_dma_hwbug_workaround(tnapi, skb, base_flags, mss)) + entry = tnapi->tx_prod; + budget = tg3_tx_avail(tnapi); + if (tigon3_dma_hwbug_workaround(tnapi, skb, &entry, &budget, + base_flags, mss, vlan)) goto out_unlock; - - entry = NEXT_TX(tnapi->tx_prod); } skb_tx_timestamp(skb); @@ -6269,7 +6337,7 @@ out_unlock: return NETDEV_TX_OK; dma_error: - tg3_skb_error_unmap(tnapi, skb, i); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod, i); dev_kfree_skb(skb); tnapi->tx_buffers[tnapi->tx_prod].skb = NULL; return NETDEV_TX_OK; @@ -6602,35 +6670,13 @@ static void tg3_free_rings(struct tg3 *tp) if (!tnapi->tx_buffers) continue; - for (i = 0; i < TG3_TX_RING_SIZE; ) { - struct ring_info *txp; - struct sk_buff *skb; - unsigned int k; - - txp = &tnapi->tx_buffers[i]; - skb = txp->skb; + for (i = 0; i < TG3_TX_RING_SIZE; i++) { + struct sk_buff *skb = tnapi->tx_buffers[i].skb; - if (skb == NULL) { - i++; + if (!skb) continue; - } - - pci_unmap_single(tp->pdev, - dma_unmap_addr(txp, mapping), - skb_headlen(skb), - PCI_DMA_TODEVICE); - txp->skb = NULL; - i++; - - for (k = 0; k < skb_shinfo(skb)->nr_frags; k++) { - txp = &tnapi->tx_buffers[i & (TG3_TX_RING_SIZE - 1)]; - pci_unmap_page(tp->pdev, - dma_unmap_addr(txp, mapping), - skb_shinfo(skb)->frags[k].size, - PCI_DMA_TODEVICE); - i++; - } + tg3_tx_skb_unmap(tnapi, i, skb_shinfo(skb)->nr_frags); dev_kfree_skb_any(skb); } @@ -6762,9 +6808,9 @@ static int tg3_alloc_consistent(struct tg3 *tp) */ if ((!i && !tg3_flag(tp, ENABLE_TSS)) || (i && tg3_flag(tp, ENABLE_TSS))) { - tnapi->tx_buffers = kzalloc(sizeof(struct ring_info) * - TG3_TX_RING_SIZE, - GFP_KERNEL); + tnapi->tx_buffers = kzalloc( + sizeof(struct tg3_tx_ring_info) * + TG3_TX_RING_SIZE, GFP_KERNEL); if (!tnapi->tx_buffers) goto err_out; @@ -8360,7 +8406,7 @@ static int tg3_reset_hw(struct tg3 *tp, int reset_phy) /* Program the jumbo buffer descriptor ring control * blocks on those devices that have them. */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719 || + if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0 || (tg3_flag(tp, JUMBO_CAPABLE) && !tg3_flag(tp, 5780_CLASS))) { if (tg3_flag(tp, JUMBO_RING_ENABLE)) { @@ -11204,6 +11250,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) { u32 mac_mode, rx_start_idx, rx_idx, tx_idx, opaque_key; u32 base_flags = 0, mss = 0, desc_idx, coal_now, data_off, val; + u32 budget; struct sk_buff *skb, *rx_skb; u8 *tx_data; dma_addr_t map; @@ -11363,6 +11410,10 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) return -EIO; } + val = tnapi->tx_prod; + tnapi->tx_buffers[val].skb = skb; + dma_unmap_addr_set(&tnapi->tx_buffers[val], mapping, map); + tw32_f(HOSTCC_MODE, tp->coalesce_mode | HOSTCC_MODE_ENABLE | rnapi->coal_now); @@ -11370,8 +11421,13 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) rx_start_idx = rnapi->hw_status->idx[0].rx_producer; - tg3_set_txd(tnapi, tnapi->tx_prod, map, tx_len, - base_flags, (mss << 1) | 1); + budget = tg3_tx_avail(tnapi); + if (tg3_tx_frag_set(tnapi, &val, &budget, map, tx_len, + base_flags | TXD_FLAG_END, mss, 0)) { + tnapi->tx_buffers[val].skb = NULL; + dev_kfree_skb(skb); + return -EIO; + } tnapi->tx_prod++; @@ -11394,7 +11450,7 @@ static int tg3_run_loopback(struct tg3 *tp, u32 pktsz, int loopback_mode) break; } - pci_unmap_single(tp->pdev, map, tx_len, PCI_DMA_TODEVICE); + tg3_tx_skb_unmap(tnapi, tnapi->tx_prod - 1, 0); dev_kfree_skb(skb); if (tx_idx != tnapi->tx_prod) @@ -13817,7 +13873,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) tg3_flag_set(tp, 5705_PLUS); /* Determine TSO capabilities */ - if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) + if (tp->pci_chip_rev_id == CHIPREV_ID_5719_A0) ; /* Do nothing. HW bug. */ else if (tg3_flag(tp, 57765_PLUS)) tg3_flag_set(tp, HW_TSO_3); @@ -13880,11 +13936,14 @@ static int __devinit tg3_get_invariants(struct tg3 *tp) if (tg3_flag(tp, 5755_PLUS)) tg3_flag_set(tp, SHORT_DMA_BUG); + if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5719) + tg3_flag_set(tp, 4K_FIFO_LIMIT); + if (tg3_flag(tp, 5717_PLUS)) tg3_flag_set(tp, LRG_PROD_RING_CAP); if (tg3_flag(tp, 57765_PLUS) && - GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5719) + tp->pci_chip_rev_id != CHIPREV_ID_5719_A0) tg3_flag_set(tp, USE_JUMBO_BDFLAG); if (!tg3_flag(tp, 5705_PLUS) || diff --git a/drivers/net/tg3.h b/drivers/net/tg3.h index 691539ba17b3..2ea456dd5880 100644 --- a/drivers/net/tg3.h +++ b/drivers/net/tg3.h @@ -2652,6 +2652,12 @@ struct ring_info { DEFINE_DMA_UNMAP_ADDR(mapping); }; +struct tg3_tx_ring_info { + struct sk_buff *skb; + DEFINE_DMA_UNMAP_ADDR(mapping); + bool fragmented; +}; + struct tg3_link_config { /* Describes what we're trying to get. */ u32 advertising; @@ -2816,7 +2822,7 @@ struct tg3_napi { u32 last_tx_cons; u32 prodmbox; struct tg3_tx_buffer_desc *tx_ring; - struct ring_info *tx_buffers; + struct tg3_tx_ring_info *tx_buffers; dma_addr_t status_mapping; dma_addr_t rx_rcb_mapping; @@ -2899,6 +2905,7 @@ enum TG3_FLAGS { TG3_FLAG_57765_PLUS, TG3_FLAG_APE_HAS_NCSI, TG3_FLAG_5717_PLUS, + TG3_FLAG_4K_FIFO_LIMIT, /* Add new flags before this comment and TG3_FLAG_NUMBER_OF_FLAGS */ TG3_FLAG_NUMBER_OF_FLAGS, /* Last entry in enum TG3_FLAGS */ diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 9a6b3824da14..71f3d1a35b74 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -528,6 +528,7 @@ static void tun_net_init(struct net_device *dev) dev->netdev_ops = &tap_netdev_ops; /* Ethernet TAP Device */ ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; random_ether_addr(dev->dev_addr); diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c index 52502883523e..c5c4b4def7fb 100644 --- a/drivers/net/usb/asix.c +++ b/drivers/net/usb/asix.c @@ -314,12 +314,11 @@ static int asix_rx_fixup(struct usbnet *dev, struct sk_buff *skb) skb_pull(skb, 4); while (skb->len > 0) { - if ((short)(header & 0x0000ffff) != - ~((short)((header & 0xffff0000) >> 16))) { + if ((header & 0x07ff) != ((~header >> 16) & 0x07ff)) netdev_err(dev->net, "asix_rx_fixup() Bad Header Length\n"); - } + /* get the packet length */ - size = (u16) (header & 0x0000ffff); + size = (u16) (header & 0x000007ff); if ((skb->len) - ((size + 1) & 0xfffe) == 0) { u8 alignment = (unsigned long)skb->data & 0x3; diff --git a/drivers/net/veth.c b/drivers/net/veth.c index 7f78db7bd68d..5b23767ea817 100644 --- a/drivers/net/veth.c +++ b/drivers/net/veth.c @@ -263,6 +263,8 @@ static void veth_setup(struct net_device *dev) { ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->netdev_ops = &veth_netdev_ops; dev->ethtool_ops = &veth_ethtool_ops; dev->features |= NETIF_F_LLTX; diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c index b25c9229a6a9..eb2028187fbe 100644 --- a/drivers/net/wan/hdlc_fr.c +++ b/drivers/net/wan/hdlc_fr.c @@ -1074,9 +1074,10 @@ static int fr_add_pvc(struct net_device *frad, unsigned int dlci, int type) used = pvc_is_used(pvc); - if (type == ARPHRD_ETHER) + if (type == ARPHRD_ETHER) { dev = alloc_netdev(0, "pvceth%d", ether_setup); - else + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + } else dev = alloc_netdev(0, "pvc%d", pvc_setup); if (!dev) { diff --git a/drivers/net/wireless/airo.c b/drivers/net/wireless/airo.c index 55cf71fbffe3..e1b3e3c134fd 100644 --- a/drivers/net/wireless/airo.c +++ b/drivers/net/wireless/airo.c @@ -2823,6 +2823,7 @@ static struct net_device *_init_airo_card( unsigned short irq, int port, dev->wireless_data = &ai->wireless_data; dev->irq = irq; dev->base_addr = port; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; SET_NETDEV_DEV(dev, dmdev); diff --git a/drivers/net/wireless/b43/Kconfig b/drivers/net/wireless/b43/Kconfig index d2293dcc117f..3cab843afb05 100644 --- a/drivers/net/wireless/b43/Kconfig +++ b/drivers/net/wireless/b43/Kconfig @@ -28,7 +28,7 @@ config B43 config B43_BCMA bool "Support for BCMA bus" - depends on B43 && BCMA && BROKEN + depends on B43 && BCMA default y config B43_SSB diff --git a/drivers/net/wireless/b43/bus.c b/drivers/net/wireless/b43/bus.c index 64c3f65ff8c0..05f6c7bff6ab 100644 --- a/drivers/net/wireless/b43/bus.c +++ b/drivers/net/wireless/b43/bus.c @@ -244,10 +244,12 @@ void b43_bus_set_wldev(struct b43_bus_dev *dev, void *wldev) #ifdef CONFIG_B43_BCMA case B43_BUS_BCMA: bcma_set_drvdata(dev->bdev, wldev); + break; #endif #ifdef CONFIG_B43_SSB case B43_BUS_SSB: ssb_set_drvdata(dev->sdev, wldev); + break; #endif } } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index 032d46674f6b..26f1ab840cc7 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -5350,6 +5350,7 @@ static void b43_ssb_remove(struct ssb_device *sdev) { struct b43_wl *wl = ssb_get_devtypedata(sdev); struct b43_wldev *wldev = ssb_get_drvdata(sdev); + struct b43_bus_dev *dev = wldev->dev; /* We must cancel any work here before unregistering from ieee80211, * as the ieee80211 unreg will destroy the workqueue. */ @@ -5365,14 +5366,14 @@ static void b43_ssb_remove(struct ssb_device *sdev) ieee80211_unregister_hw(wl->hw); } - b43_one_core_detach(wldev->dev); + b43_one_core_detach(dev); if (list_empty(&wl->devlist)) { b43_leds_unregister(wl); /* Last core on the chip unregistered. * We can destroy common struct b43_wl. */ - b43_wireless_exit(wldev->dev, wl); + b43_wireless_exit(dev, wl); } } diff --git a/drivers/net/wireless/hostap/hostap_main.c b/drivers/net/wireless/hostap/hostap_main.c index d5084829c9e5..89a116fba1de 100644 --- a/drivers/net/wireless/hostap/hostap_main.c +++ b/drivers/net/wireless/hostap/hostap_main.c @@ -855,6 +855,7 @@ void hostap_setup_dev(struct net_device *dev, local_info_t *local, iface = netdev_priv(dev); ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; /* kernel callbacks */ if (iface) { diff --git a/drivers/nfc/pn533.c b/drivers/nfc/pn533.c index 037231540719..c77e0543e502 100644 --- a/drivers/nfc/pn533.c +++ b/drivers/nfc/pn533.c @@ -1596,7 +1596,7 @@ static void pn533_disconnect(struct usb_interface *interface) usb_free_urb(dev->out_urb); kfree(dev); - nfc_dev_info(&dev->interface->dev, "NXP PN533 NFC device disconnected"); + nfc_dev_info(&interface->dev, "NXP PN533 NFC device disconnected"); } static struct usb_driver pn533_driver = { diff --git a/drivers/scsi/be2iscsi/be_main.h b/drivers/scsi/be2iscsi/be_main.h index 081c171a1ed6..5ce5170254ca 100644 --- a/drivers/scsi/be2iscsi/be_main.h +++ b/drivers/scsi/be2iscsi/be_main.h @@ -397,7 +397,7 @@ struct amap_pdu_data_out { }; struct be_cmd_bhs { - struct iscsi_cmd iscsi_hdr; + struct iscsi_scsi_req iscsi_hdr; unsigned char pad1[16]; struct pdu_data_out iscsi_data_pdu; unsigned char pad2[BE_SENSE_INFO_SIZE - @@ -428,7 +428,7 @@ struct be_nonio_bhs { }; struct be_status_bhs { - struct iscsi_cmd iscsi_hdr; + struct iscsi_scsi_req iscsi_hdr; unsigned char pad1[16]; /** * The plus 2 below is to hold the sense info length that gets diff --git a/drivers/scsi/bnx2i/bnx2i_hwi.c b/drivers/scsi/bnx2i/bnx2i_hwi.c index 030a96c646c3..9ae80cd5953b 100644 --- a/drivers/scsi/bnx2i/bnx2i_hwi.c +++ b/drivers/scsi/bnx2i/bnx2i_hwi.c @@ -332,11 +332,11 @@ int bnx2i_send_iscsi_login(struct bnx2i_conn *bnx2i_conn, { struct bnx2i_cmd *bnx2i_cmd; struct bnx2i_login_request *login_wqe; - struct iscsi_login *login_hdr; + struct iscsi_login_req *login_hdr; u32 dword; bnx2i_cmd = (struct bnx2i_cmd *)task->dd_data; - login_hdr = (struct iscsi_login *)task->hdr; + login_hdr = (struct iscsi_login_req *)task->hdr; login_wqe = (struct bnx2i_login_request *) bnx2i_conn->ep->qp.sq_prod_qe; @@ -1349,7 +1349,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session, struct bnx2i_cmd_response *resp_cqe; struct bnx2i_cmd *bnx2i_cmd; struct iscsi_task *task; - struct iscsi_cmd_rsp *hdr; + struct iscsi_scsi_rsp *hdr; u32 datalen = 0; resp_cqe = (struct bnx2i_cmd_response *)cqe; @@ -1376,7 +1376,7 @@ int bnx2i_process_scsi_cmd_resp(struct iscsi_session *session, } bnx2i_iscsi_unmap_sg_list(bnx2i_cmd); - hdr = (struct iscsi_cmd_rsp *)task->hdr; + hdr = (struct iscsi_scsi_rsp *)task->hdr; resp_cqe = (struct bnx2i_cmd_response *)cqe; hdr->opcode = resp_cqe->op_code; hdr->max_cmdsn = cpu_to_be32(resp_cqe->max_cmd_sn); diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c index 5c55a75ae597..cffd4d75df56 100644 --- a/drivers/scsi/bnx2i/bnx2i_iscsi.c +++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c @@ -1213,7 +1213,7 @@ static int bnx2i_task_xmit(struct iscsi_task *task) struct bnx2i_conn *bnx2i_conn = conn->dd_data; struct scsi_cmnd *sc = task->sc; struct bnx2i_cmd *cmd = task->dd_data; - struct iscsi_cmd *hdr = (struct iscsi_cmd *) task->hdr; + struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task->hdr; if (atomic_read(&bnx2i_conn->ep->num_active_cmds) + 1 > hba->max_sqes) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index d7a4120034a2..256a999d010b 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -84,22 +84,6 @@ MODULE_PARM_DESC(debug_libiscsi_eh, __func__, ##arg); \ } while (0); -/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ -#define SNA32_CHECK 2147483648UL - -static int iscsi_sna_lt(u32 n1, u32 n2) -{ - return n1 != n2 && ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || - (n1 > n2 && (n2 - n1 < SNA32_CHECK))); -} - -/* Serial Number Arithmetic, 32 bits, less than, RFC1982 */ -static int iscsi_sna_lte(u32 n1, u32 n2) -{ - return n1 == n2 || ((n1 < n2 && (n2 - n1 < SNA32_CHECK)) || - (n1 > n2 && (n2 - n1 < SNA32_CHECK))); -} - inline void iscsi_conn_queue_work(struct iscsi_conn *conn) { struct Scsi_Host *shost = conn->session->host; @@ -360,7 +344,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) struct iscsi_conn *conn = task->conn; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; - struct iscsi_cmd *hdr; + struct iscsi_scsi_req *hdr; unsigned hdrlength, cmd_len; itt_t itt; int rc; @@ -374,7 +358,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } - hdr = (struct iscsi_cmd *) task->hdr; + hdr = (struct iscsi_scsi_req *)task->hdr; itt = hdr->itt; memset(hdr, 0, sizeof(*hdr)); @@ -830,7 +814,7 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, struct iscsi_task *task, char *data, int datalen) { - struct iscsi_cmd_rsp *rhdr = (struct iscsi_cmd_rsp *)hdr; + struct iscsi_scsi_rsp *rhdr = (struct iscsi_scsi_rsp *)hdr; struct iscsi_session *session = conn->session; struct scsi_cmnd *sc = task->sc; diff --git a/drivers/staging/ath6kl/os/linux/ar6000_drv.c b/drivers/staging/ath6kl/os/linux/ar6000_drv.c index 499b7a90e941..32ee39ad00df 100644 --- a/drivers/staging/ath6kl/os/linux/ar6000_drv.c +++ b/drivers/staging/ath6kl/os/linux/ar6000_drv.c @@ -6205,6 +6205,7 @@ int ar6000_create_ap_interface(struct ar6_softc *ar, char *ap_ifname) ether_setup(dev); init_netdev(dev, ap_ifname); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; if (register_netdev(dev)) { AR_DEBUG_PRINTF(ATH_DEBUG_ERR,("ar6000_create_ap_interface: register_netdev failed\n")); diff --git a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h index 5711e7c16b50..40e3d375ea99 100644 --- a/drivers/staging/brcm80211/brcmsmac/mac80211_if.h +++ b/drivers/staging/brcm80211/brcmsmac/mac80211_if.h @@ -24,8 +24,6 @@ #define BRCMS_SET_SHORTSLOT_OVERRIDE 146 -#include <linux/interrupt.h> - /* BMAC Note: High-only driver is no longer working in softirq context as it needs to block and * sleep so perimeter lock has to be a semaphore instead of spinlock. This requires timers to be * submitted to workqueue instead of being on kernel timer diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig index 5cb0f0ef6af0..b28794b72125 100644 --- a/drivers/target/Kconfig +++ b/drivers/target/Kconfig @@ -31,5 +31,6 @@ config TCM_PSCSI source "drivers/target/loopback/Kconfig" source "drivers/target/tcm_fc/Kconfig" +source "drivers/target/iscsi/Kconfig" endif diff --git a/drivers/target/Makefile b/drivers/target/Makefile index 21df808a992c..1060c7b7f803 100644 --- a/drivers/target/Makefile +++ b/drivers/target/Makefile @@ -24,5 +24,5 @@ obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o # Fabric modules obj-$(CONFIG_LOOPBACK_TARGET) += loopback/ - obj-$(CONFIG_TCM_FC) += tcm_fc/ +obj-$(CONFIG_ISCSI_TARGET) += iscsi/ diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig new file mode 100644 index 000000000000..564ff4e0dbc4 --- /dev/null +++ b/drivers/target/iscsi/Kconfig @@ -0,0 +1,8 @@ +config ISCSI_TARGET + tristate "Linux-iSCSI.org iSCSI Target Mode Stack" + select CRYPTO + select CRYPTO_CRC32C + select CRYPTO_CRC32C_INTEL if X86 + help + Say M here to enable the ConfigFS enabled Linux-iSCSI.org iSCSI + Target Mode Stack. diff --git a/drivers/target/iscsi/Makefile b/drivers/target/iscsi/Makefile new file mode 100644 index 000000000000..5b9a2cf7f0a9 --- /dev/null +++ b/drivers/target/iscsi/Makefile @@ -0,0 +1,20 @@ +iscsi_target_mod-y += iscsi_target_parameters.o \ + iscsi_target_seq_pdu_list.o \ + iscsi_target_tq.o \ + iscsi_target_auth.o \ + iscsi_target_datain_values.o \ + iscsi_target_device.o \ + iscsi_target_erl0.o \ + iscsi_target_erl1.o \ + iscsi_target_erl2.o \ + iscsi_target_login.o \ + iscsi_target_nego.o \ + iscsi_target_nodeattrib.o \ + iscsi_target_tmr.o \ + iscsi_target_tpg.o \ + iscsi_target_util.o \ + iscsi_target.o \ + iscsi_target_configfs.o \ + iscsi_target_stat.o + +obj-$(CONFIG_ISCSI_TARGET) += iscsi_target_mod.o diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c new file mode 100644 index 000000000000..14c81c4265bd --- /dev/null +++ b/drivers/target/iscsi/iscsi_target.c @@ -0,0 +1,4559 @@ +/******************************************************************************* + * This file contains main functions related to the iSCSI Target Core Driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/kthread.h> +#include <linux/crypto.h> +#include <linux/completion.h> +#include <asm/unaligned.h> +#include <scsi/scsi_device.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_tmr.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_configfs.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_login.h" +#include "iscsi_target_tmr.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_device.h" +#include "iscsi_target_stat.h" + +static LIST_HEAD(g_tiqn_list); +static LIST_HEAD(g_np_list); +static DEFINE_SPINLOCK(tiqn_lock); +static DEFINE_SPINLOCK(np_lock); + +static struct idr tiqn_idr; +struct idr sess_idr; +struct mutex auth_id_lock; +spinlock_t sess_idr_lock; + +struct iscsit_global *iscsit_global; + +struct kmem_cache *lio_cmd_cache; +struct kmem_cache *lio_qr_cache; +struct kmem_cache *lio_dr_cache; +struct kmem_cache *lio_ooo_cache; +struct kmem_cache *lio_r2t_cache; + +static int iscsit_handle_immediate_data(struct iscsi_cmd *, + unsigned char *buf, u32); +static int iscsit_logout_post_handler(struct iscsi_cmd *, struct iscsi_conn *); + +struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *buf) +{ + struct iscsi_tiqn *tiqn = NULL; + + spin_lock(&tiqn_lock); + list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { + if (!strcmp(tiqn->tiqn, buf)) { + + spin_lock(&tiqn->tiqn_state_lock); + if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) { + tiqn->tiqn_access_count++; + spin_unlock(&tiqn->tiqn_state_lock); + spin_unlock(&tiqn_lock); + return tiqn; + } + spin_unlock(&tiqn->tiqn_state_lock); + } + } + spin_unlock(&tiqn_lock); + + return NULL; +} + +static int iscsit_set_tiqn_shutdown(struct iscsi_tiqn *tiqn) +{ + spin_lock(&tiqn->tiqn_state_lock); + if (tiqn->tiqn_state == TIQN_STATE_ACTIVE) { + tiqn->tiqn_state = TIQN_STATE_SHUTDOWN; + spin_unlock(&tiqn->tiqn_state_lock); + return 0; + } + spin_unlock(&tiqn->tiqn_state_lock); + + return -1; +} + +void iscsit_put_tiqn_for_login(struct iscsi_tiqn *tiqn) +{ + spin_lock(&tiqn->tiqn_state_lock); + tiqn->tiqn_access_count--; + spin_unlock(&tiqn->tiqn_state_lock); +} + +/* + * Note that IQN formatting is expected to be done in userspace, and + * no explict IQN format checks are done here. + */ +struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *buf) +{ + struct iscsi_tiqn *tiqn = NULL; + int ret; + + if (strlen(buf) > ISCSI_IQN_LEN) { + pr_err("Target IQN exceeds %d bytes\n", + ISCSI_IQN_LEN); + return ERR_PTR(-EINVAL); + } + + tiqn = kzalloc(sizeof(struct iscsi_tiqn), GFP_KERNEL); + if (!tiqn) { + pr_err("Unable to allocate struct iscsi_tiqn\n"); + return ERR_PTR(-ENOMEM); + } + + sprintf(tiqn->tiqn, "%s", buf); + INIT_LIST_HEAD(&tiqn->tiqn_list); + INIT_LIST_HEAD(&tiqn->tiqn_tpg_list); + spin_lock_init(&tiqn->tiqn_state_lock); + spin_lock_init(&tiqn->tiqn_tpg_lock); + spin_lock_init(&tiqn->sess_err_stats.lock); + spin_lock_init(&tiqn->login_stats.lock); + spin_lock_init(&tiqn->logout_stats.lock); + + if (!idr_pre_get(&tiqn_idr, GFP_KERNEL)) { + pr_err("idr_pre_get() for tiqn_idr failed\n"); + kfree(tiqn); + return ERR_PTR(-ENOMEM); + } + tiqn->tiqn_state = TIQN_STATE_ACTIVE; + + spin_lock(&tiqn_lock); + ret = idr_get_new(&tiqn_idr, NULL, &tiqn->tiqn_index); + if (ret < 0) { + pr_err("idr_get_new() failed for tiqn->tiqn_index\n"); + spin_unlock(&tiqn_lock); + kfree(tiqn); + return ERR_PTR(ret); + } + list_add_tail(&tiqn->tiqn_list, &g_tiqn_list); + spin_unlock(&tiqn_lock); + + pr_debug("CORE[0] - Added iSCSI Target IQN: %s\n", tiqn->tiqn); + + return tiqn; + +} + +static void iscsit_wait_for_tiqn(struct iscsi_tiqn *tiqn) +{ + /* + * Wait for accesses to said struct iscsi_tiqn to end. + */ + spin_lock(&tiqn->tiqn_state_lock); + while (tiqn->tiqn_access_count != 0) { + spin_unlock(&tiqn->tiqn_state_lock); + msleep(10); + spin_lock(&tiqn->tiqn_state_lock); + } + spin_unlock(&tiqn->tiqn_state_lock); +} + +void iscsit_del_tiqn(struct iscsi_tiqn *tiqn) +{ + /* + * iscsit_set_tiqn_shutdown sets tiqn->tiqn_state = TIQN_STATE_SHUTDOWN + * while holding tiqn->tiqn_state_lock. This means that all subsequent + * attempts to access this struct iscsi_tiqn will fail from both transport + * fabric and control code paths. + */ + if (iscsit_set_tiqn_shutdown(tiqn) < 0) { + pr_err("iscsit_set_tiqn_shutdown() failed\n"); + return; + } + + iscsit_wait_for_tiqn(tiqn); + + spin_lock(&tiqn_lock); + list_del(&tiqn->tiqn_list); + idr_remove(&tiqn_idr, tiqn->tiqn_index); + spin_unlock(&tiqn_lock); + + pr_debug("CORE[0] - Deleted iSCSI Target IQN: %s\n", + tiqn->tiqn); + kfree(tiqn); +} + +int iscsit_access_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +{ + int ret; + /* + * Determine if the network portal is accepting storage traffic. + */ + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + return -1; + } + if (np->np_login_tpg) { + pr_err("np->np_login_tpg() is not NULL!\n"); + spin_unlock_bh(&np->np_thread_lock); + return -1; + } + spin_unlock_bh(&np->np_thread_lock); + /* + * Determine if the portal group is accepting storage traffic. + */ + spin_lock_bh(&tpg->tpg_state_lock); + if (tpg->tpg_state != TPG_STATE_ACTIVE) { + spin_unlock_bh(&tpg->tpg_state_lock); + return -1; + } + spin_unlock_bh(&tpg->tpg_state_lock); + + /* + * Here we serialize access across the TIQN+TPG Tuple. + */ + ret = mutex_lock_interruptible(&tpg->np_login_lock); + if ((ret != 0) || signal_pending(current)) + return -1; + + spin_lock_bh(&np->np_thread_lock); + np->np_login_tpg = tpg; + spin_unlock_bh(&np->np_thread_lock); + + return 0; +} + +int iscsit_deaccess_np(struct iscsi_np *np, struct iscsi_portal_group *tpg) +{ + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + spin_lock_bh(&np->np_thread_lock); + np->np_login_tpg = NULL; + spin_unlock_bh(&np->np_thread_lock); + + mutex_unlock(&tpg->np_login_lock); + + if (tiqn) + iscsit_put_tiqn_for_login(tiqn); + + return 0; +} + +static struct iscsi_np *iscsit_get_np( + struct __kernel_sockaddr_storage *sockaddr, + int network_transport) +{ + struct sockaddr_in *sock_in, *sock_in_e; + struct sockaddr_in6 *sock_in6, *sock_in6_e; + struct iscsi_np *np; + int ip_match = 0; + u16 port; + + spin_lock_bh(&np_lock); + list_for_each_entry(np, &g_np_list, np_list) { + spin_lock(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock(&np->np_thread_lock); + continue; + } + + if (sockaddr->ss_family == AF_INET6) { + sock_in6 = (struct sockaddr_in6 *)sockaddr; + sock_in6_e = (struct sockaddr_in6 *)&np->np_sockaddr; + + if (!memcmp((void *)&sock_in6->sin6_addr.in6_u, + (void *)&sock_in6_e->sin6_addr.in6_u, + sizeof(struct in6_addr))) + ip_match = 1; + + port = ntohs(sock_in6->sin6_port); + } else { + sock_in = (struct sockaddr_in *)sockaddr; + sock_in_e = (struct sockaddr_in *)&np->np_sockaddr; + + if (sock_in->sin_addr.s_addr == + sock_in_e->sin_addr.s_addr) + ip_match = 1; + + port = ntohs(sock_in->sin_port); + } + + if ((ip_match == 1) && (np->np_port == port) && + (np->np_network_transport == network_transport)) { + /* + * Increment the np_exports reference count now to + * prevent iscsit_del_np() below from being called + * while iscsi_tpg_add_network_portal() is called. + */ + np->np_exports++; + spin_unlock(&np->np_thread_lock); + spin_unlock_bh(&np_lock); + return np; + } + spin_unlock(&np->np_thread_lock); + } + spin_unlock_bh(&np_lock); + + return NULL; +} + +struct iscsi_np *iscsit_add_np( + struct __kernel_sockaddr_storage *sockaddr, + char *ip_str, + int network_transport) +{ + struct sockaddr_in *sock_in; + struct sockaddr_in6 *sock_in6; + struct iscsi_np *np; + int ret; + /* + * Locate the existing struct iscsi_np if already active.. + */ + np = iscsit_get_np(sockaddr, network_transport); + if (np) + return np; + + np = kzalloc(sizeof(struct iscsi_np), GFP_KERNEL); + if (!np) { + pr_err("Unable to allocate memory for struct iscsi_np\n"); + return ERR_PTR(-ENOMEM); + } + + np->np_flags |= NPF_IP_NETWORK; + if (sockaddr->ss_family == AF_INET6) { + sock_in6 = (struct sockaddr_in6 *)sockaddr; + snprintf(np->np_ip, IPV6_ADDRESS_SPACE, "%s", ip_str); + np->np_port = ntohs(sock_in6->sin6_port); + } else { + sock_in = (struct sockaddr_in *)sockaddr; + sprintf(np->np_ip, "%s", ip_str); + np->np_port = ntohs(sock_in->sin_port); + } + + np->np_network_transport = network_transport; + spin_lock_init(&np->np_thread_lock); + init_completion(&np->np_restart_comp); + INIT_LIST_HEAD(&np->np_list); + + ret = iscsi_target_setup_login_socket(np, sockaddr); + if (ret != 0) { + kfree(np); + return ERR_PTR(ret); + } + + np->np_thread = kthread_run(iscsi_target_login_thread, np, "iscsi_np"); + if (IS_ERR(np->np_thread)) { + pr_err("Unable to create kthread: iscsi_np\n"); + ret = PTR_ERR(np->np_thread); + kfree(np); + return ERR_PTR(ret); + } + /* + * Increment the np_exports reference count now to prevent + * iscsit_del_np() below from being run while a new call to + * iscsi_tpg_add_network_portal() for a matching iscsi_np is + * active. We don't need to hold np->np_thread_lock at this + * point because iscsi_np has not been added to g_np_list yet. + */ + np->np_exports = 1; + + spin_lock_bh(&np_lock); + list_add_tail(&np->np_list, &g_np_list); + spin_unlock_bh(&np_lock); + + pr_debug("CORE[0] - Added Network Portal: %s:%hu on %s\n", + np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + + return np; +} + +int iscsit_reset_np_thread( + struct iscsi_np *np, + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg) +{ + spin_lock_bh(&np->np_thread_lock); + if (tpg && tpg_np) { + /* + * The reset operation need only be performed when the + * passed struct iscsi_portal_group has a login in progress + * to one of the network portals. + */ + if (tpg_np->tpg_np->np_login_tpg != tpg) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + } + if (np->np_thread_state == ISCSI_NP_THREAD_INACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + np->np_thread_state = ISCSI_NP_THREAD_RESET; + + if (np->np_thread) { + spin_unlock_bh(&np->np_thread_lock); + send_sig(SIGINT, np->np_thread, 1); + wait_for_completion(&np->np_restart_comp); + spin_lock_bh(&np->np_thread_lock); + } + spin_unlock_bh(&np->np_thread_lock); + + return 0; +} + +int iscsit_del_np_comm(struct iscsi_np *np) +{ + if (!np->np_socket) + return 0; + + /* + * Some network transports allocate their own struct sock->file, + * see if we need to free any additional allocated resources. + */ + if (np->np_flags & NPF_SCTP_STRUCT_FILE) { + kfree(np->np_socket->file); + np->np_socket->file = NULL; + } + + sock_release(np->np_socket); + return 0; +} + +int iscsit_del_np(struct iscsi_np *np) +{ + spin_lock_bh(&np->np_thread_lock); + np->np_exports--; + if (np->np_exports) { + spin_unlock_bh(&np->np_thread_lock); + return 0; + } + np->np_thread_state = ISCSI_NP_THREAD_SHUTDOWN; + spin_unlock_bh(&np->np_thread_lock); + + if (np->np_thread) { + /* + * We need to send the signal to wakeup Linux/Net + * which may be sleeping in sock_accept().. + */ + send_sig(SIGINT, np->np_thread, 1); + kthread_stop(np->np_thread); + } + iscsit_del_np_comm(np); + + spin_lock_bh(&np_lock); + list_del(&np->np_list); + spin_unlock_bh(&np_lock); + + pr_debug("CORE[0] - Removed Network Portal: %s:%hu on %s\n", + np->np_ip, np->np_port, (np->np_network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + + kfree(np); + return 0; +} + +static int __init iscsi_target_init_module(void) +{ + int ret = 0; + + pr_debug("iSCSI-Target "ISCSIT_VERSION"\n"); + + iscsit_global = kzalloc(sizeof(struct iscsit_global), GFP_KERNEL); + if (!iscsit_global) { + pr_err("Unable to allocate memory for iscsit_global\n"); + return -1; + } + mutex_init(&auth_id_lock); + spin_lock_init(&sess_idr_lock); + idr_init(&tiqn_idr); + idr_init(&sess_idr); + + ret = iscsi_target_register_configfs(); + if (ret < 0) + goto out; + + ret = iscsi_thread_set_init(); + if (ret < 0) + goto configfs_out; + + if (iscsi_allocate_thread_sets(TARGET_THREAD_SET_COUNT) != + TARGET_THREAD_SET_COUNT) { + pr_err("iscsi_allocate_thread_sets() returned" + " unexpected value!\n"); + goto ts_out1; + } + + lio_cmd_cache = kmem_cache_create("lio_cmd_cache", + sizeof(struct iscsi_cmd), __alignof__(struct iscsi_cmd), + 0, NULL); + if (!lio_cmd_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_cmd_cache\n"); + goto ts_out2; + } + + lio_qr_cache = kmem_cache_create("lio_qr_cache", + sizeof(struct iscsi_queue_req), + __alignof__(struct iscsi_queue_req), 0, NULL); + if (!lio_qr_cache) { + pr_err("nable to kmem_cache_create() for" + " lio_qr_cache\n"); + goto cmd_out; + } + + lio_dr_cache = kmem_cache_create("lio_dr_cache", + sizeof(struct iscsi_datain_req), + __alignof__(struct iscsi_datain_req), 0, NULL); + if (!lio_dr_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_dr_cache\n"); + goto qr_out; + } + + lio_ooo_cache = kmem_cache_create("lio_ooo_cache", + sizeof(struct iscsi_ooo_cmdsn), + __alignof__(struct iscsi_ooo_cmdsn), 0, NULL); + if (!lio_ooo_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_ooo_cache\n"); + goto dr_out; + } + + lio_r2t_cache = kmem_cache_create("lio_r2t_cache", + sizeof(struct iscsi_r2t), __alignof__(struct iscsi_r2t), + 0, NULL); + if (!lio_r2t_cache) { + pr_err("Unable to kmem_cache_create() for" + " lio_r2t_cache\n"); + goto ooo_out; + } + + if (iscsit_load_discovery_tpg() < 0) + goto r2t_out; + + return ret; +r2t_out: + kmem_cache_destroy(lio_r2t_cache); +ooo_out: + kmem_cache_destroy(lio_ooo_cache); +dr_out: + kmem_cache_destroy(lio_dr_cache); +qr_out: + kmem_cache_destroy(lio_qr_cache); +cmd_out: + kmem_cache_destroy(lio_cmd_cache); +ts_out2: + iscsi_deallocate_thread_sets(); +ts_out1: + iscsi_thread_set_free(); +configfs_out: + iscsi_target_deregister_configfs(); +out: + kfree(iscsit_global); + return -ENOMEM; +} + +static void __exit iscsi_target_cleanup_module(void) +{ + iscsi_deallocate_thread_sets(); + iscsi_thread_set_free(); + iscsit_release_discovery_tpg(); + kmem_cache_destroy(lio_cmd_cache); + kmem_cache_destroy(lio_qr_cache); + kmem_cache_destroy(lio_dr_cache); + kmem_cache_destroy(lio_ooo_cache); + kmem_cache_destroy(lio_r2t_cache); + + iscsi_target_deregister_configfs(); + + kfree(iscsit_global); +} + +int iscsit_add_reject( + u8 reason, + int fail_conn, + unsigned char *buf, + struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + struct iscsi_reject *hdr; + int ret; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return -1; + + cmd->iscsi_opcode = ISCSI_OP_REJECT; + if (fail_conn) + cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->reason = reason; + + cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!cmd->buf_ptr) { + pr_err("Unable to allocate memory for cmd->buf_ptr\n"); + iscsit_release_cmd(cmd); + return -1; + } + memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + cmd->i_state = ISTATE_SEND_REJECT; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + ret = wait_for_completion_interruptible(&cmd->reject_comp); + if (ret != 0) + return -1; + + return (!fail_conn) ? 0 : -1; +} + +int iscsit_add_reject_from_cmd( + u8 reason, + int fail_conn, + int add_to_conn, + unsigned char *buf, + struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn; + struct iscsi_reject *hdr; + int ret; + + if (!cmd->conn) { + pr_err("cmd->conn is NULL for ITT: 0x%08x\n", + cmd->init_task_tag); + return -1; + } + conn = cmd->conn; + + cmd->iscsi_opcode = ISCSI_OP_REJECT; + if (fail_conn) + cmd->cmd_flags |= ICF_REJECT_FAIL_CONN; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->reason = reason; + + cmd->buf_ptr = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!cmd->buf_ptr) { + pr_err("Unable to allocate memory for cmd->buf_ptr\n"); + iscsit_release_cmd(cmd); + return -1; + } + memcpy(cmd->buf_ptr, buf, ISCSI_HDR_LEN); + + if (add_to_conn) { + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + } + + cmd->i_state = ISTATE_SEND_REJECT; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + ret = wait_for_completion_interruptible(&cmd->reject_comp); + if (ret != 0) + return -1; + + return (!fail_conn) ? 0 : -1; +} + +/* + * Map some portion of the allocated scatterlist to an iovec, suitable for + * kernel sockets to copy data in/out. This handles both pages and slab-allocated + * buffers, since we have been tricky and mapped t_mem_sg to the buffer in + * either case (see iscsit_alloc_buffs) + */ +static int iscsit_map_iovec( + struct iscsi_cmd *cmd, + struct kvec *iov, + u32 data_offset, + u32 data_length) +{ + u32 i = 0; + struct scatterlist *sg; + unsigned int page_off; + + /* + * We have a private mapping of the allocated pages in t_mem_sg. + * At this point, we also know each contains a page. + */ + sg = &cmd->t_mem_sg[data_offset / PAGE_SIZE]; + page_off = (data_offset % PAGE_SIZE); + + cmd->first_data_sg = sg; + cmd->first_data_sg_off = page_off; + + while (data_length) { + u32 cur_len = min_t(u32, data_length, sg->length - page_off); + + iov[i].iov_base = kmap(sg_page(sg)) + sg->offset + page_off; + iov[i].iov_len = cur_len; + + data_length -= cur_len; + page_off = 0; + sg = sg_next(sg); + i++; + } + + cmd->kmapped_nents = i; + + return i; +} + +static void iscsit_unmap_iovec(struct iscsi_cmd *cmd) +{ + u32 i; + struct scatterlist *sg; + + sg = cmd->first_data_sg; + + for (i = 0; i < cmd->kmapped_nents; i++) + kunmap(sg_page(&sg[i])); +} + +static void iscsit_ack_from_expstatsn(struct iscsi_conn *conn, u32 exp_statsn) +{ + struct iscsi_cmd *cmd; + + conn->exp_statsn = exp_statsn; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + spin_lock(&cmd->istate_lock); + if ((cmd->i_state == ISTATE_SENT_STATUS) && + (cmd->stat_sn < exp_statsn)) { + cmd->i_state = ISTATE_REMOVE; + spin_unlock(&cmd->istate_lock); + iscsit_add_cmd_to_immediate_queue(cmd, conn, + cmd->i_state); + continue; + } + spin_unlock(&cmd->istate_lock); + } + spin_unlock_bh(&conn->cmd_lock); +} + +static int iscsit_allocate_iovecs(struct iscsi_cmd *cmd) +{ + u32 iov_count = (cmd->se_cmd.t_data_nents == 0) ? 1 : + cmd->se_cmd.t_data_nents; + + iov_count += TRANSPORT_IOV_DATA_BUFFER; + + cmd->iov_data = kzalloc(iov_count * sizeof(struct kvec), GFP_KERNEL); + if (!cmd->iov_data) { + pr_err("Unable to allocate cmd->iov_data\n"); + return -ENOMEM; + } + + cmd->orig_iov_data_count = iov_count; + return 0; +} + +static int iscsit_alloc_buffs(struct iscsi_cmd *cmd) +{ + struct scatterlist *sgl; + u32 length = cmd->se_cmd.data_length; + int nents = DIV_ROUND_UP(length, PAGE_SIZE); + int i = 0, ret; + /* + * If no SCSI payload is present, allocate the default iovecs used for + * iSCSI PDU Header + */ + if (!length) + return iscsit_allocate_iovecs(cmd); + + sgl = kzalloc(sizeof(*sgl) * nents, GFP_KERNEL); + if (!sgl) + return -ENOMEM; + + sg_init_table(sgl, nents); + + while (length) { + int buf_size = min_t(int, length, PAGE_SIZE); + struct page *page; + + page = alloc_page(GFP_KERNEL | __GFP_ZERO); + if (!page) + goto page_alloc_failed; + + sg_set_page(&sgl[i], page, buf_size, 0); + + length -= buf_size; + i++; + } + + cmd->t_mem_sg = sgl; + cmd->t_mem_sg_nents = nents; + + /* BIDI ops not supported */ + + /* Tell the core about our preallocated memory */ + transport_generic_map_mem_to_cmd(&cmd->se_cmd, sgl, nents, NULL, 0); + /* + * Allocate iovecs for SCSI payload after transport_generic_map_mem_to_cmd + * so that cmd->se_cmd.t_tasks_se_num has been set. + */ + ret = iscsit_allocate_iovecs(cmd); + if (ret < 0) + goto page_alloc_failed; + + return 0; + +page_alloc_failed: + while (i >= 0) { + __free_page(sg_page(&sgl[i])); + i--; + } + kfree(cmd->t_mem_sg); + cmd->t_mem_sg = NULL; + return -ENOMEM; +} + +static int iscsit_handle_scsi_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + int data_direction, cmdsn_ret = 0, immed_ret, ret, transport_ret; + int dump_immediate_data = 0, send_check_condition = 0, payload_length; + struct iscsi_cmd *cmd = NULL; + struct iscsi_scsi_req *hdr; + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->cmd_pdus++; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->num_cmds++; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + + hdr = (struct iscsi_scsi_req *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->data_length = be32_to_cpu(hdr->data_length); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + /* FIXME; Add checks for AdditionalHeaderSegment */ + + if (!(hdr->flags & ISCSI_FLAG_CMD_WRITE) && + !(hdr->flags & ISCSI_FLAG_CMD_FINAL)) { + pr_err("ISCSI_FLAG_CMD_WRITE & ISCSI_FLAG_CMD_FINAL" + " not set. Bad iSCSI Initiator.\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (((hdr->flags & ISCSI_FLAG_CMD_READ) || + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) && !hdr->data_length) { + /* + * Vmware ESX v3.0 uses a modified Cisco Initiator (v3.4.2) + * that adds support for RESERVE/RELEASE. There is a bug + * add with this new functionality that sets R/W bits when + * neither CDB carries any READ or WRITE datapayloads. + */ + if ((hdr->cdb[0] == 0x16) || (hdr->cdb[0] == 0x17)) { + hdr->flags &= ~ISCSI_FLAG_CMD_READ; + hdr->flags &= ~ISCSI_FLAG_CMD_WRITE; + goto done; + } + + pr_err("ISCSI_FLAG_CMD_READ or ISCSI_FLAG_CMD_WRITE" + " set when Expected Data Transfer Length is 0 for" + " CDB: 0x%02x. Bad iSCSI Initiator.\n", hdr->cdb[0]); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } +done: + + if (!(hdr->flags & ISCSI_FLAG_CMD_READ) && + !(hdr->flags & ISCSI_FLAG_CMD_WRITE) && (hdr->data_length != 0)) { + pr_err("ISCSI_FLAG_CMD_READ and/or ISCSI_FLAG_CMD_WRITE" + " MUST be set if Expected Data Transfer Length is not 0." + " Bad iSCSI Initiator\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if ((hdr->flags & ISCSI_FLAG_CMD_READ) && + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) { + pr_err("Bidirectional operations not supported!\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + pr_err("Illegally set Immediate Bit in iSCSI Initiator" + " Scsi Command PDU.\n"); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + if (payload_length && !conn->sess->sess_ops->ImmediateData) { + pr_err("ImmediateData=No but DataSegmentLength=%u," + " protocol error.\n", payload_length); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if ((hdr->data_length == payload_length) && + (!(hdr->flags & ISCSI_FLAG_CMD_FINAL))) { + pr_err("Expected Data Transfer Length and Length of" + " Immediate Data are the same, but ISCSI_FLAG_CMD_FINAL" + " bit is not set protocol error\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > hdr->data_length) { + pr_err("DataSegmentLength: %u is greater than" + " EDTL: %u, protocol error.\n", payload_length, + hdr->data_length); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("DataSegmentLength: %u is greater than" + " MaxRecvDataSegmentLength: %u, protocol error.\n", + payload_length, conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->sess->sess_ops->FirstBurstLength) { + pr_err("DataSegmentLength: %u is greater than" + " FirstBurstLength: %u, protocol error.\n", + payload_length, conn->sess->sess_ops->FirstBurstLength); + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_INVALID, 1, + buf, conn); + } + + data_direction = (hdr->flags & ISCSI_FLAG_CMD_WRITE) ? DMA_TO_DEVICE : + (hdr->flags & ISCSI_FLAG_CMD_READ) ? DMA_FROM_DEVICE : + DMA_NONE; + + cmd = iscsit_allocate_se_cmd(conn, hdr->data_length, data_direction, + (hdr->flags & ISCSI_FLAG_CMD_ATTR_MASK)); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, + buf, conn); + + pr_debug("Got SCSI Command, ITT: 0x%08x, CmdSN: 0x%08x," + " ExpXferLen: %u, Length: %u, CID: %hu\n", hdr->itt, + hdr->cmdsn, hdr->data_length, payload_length, conn->cid); + + cmd->iscsi_opcode = ISCSI_OP_SCSI_CMD; + cmd->i_state = ISTATE_NEW_CMD; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + cmd->immediate_data = (payload_length) ? 1 : 0; + cmd->unsolicited_data = ((!(hdr->flags & ISCSI_FLAG_CMD_FINAL) && + (hdr->flags & ISCSI_FLAG_CMD_WRITE)) ? 1 : 0); + if (cmd->unsolicited_data) + cmd->cmd_flags |= ICF_NON_IMMEDIATE_UNSOLICITED_DATA; + + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + if (hdr->flags & ISCSI_FLAG_CMD_READ) { + spin_lock_bh(&conn->sess->ttt_lock); + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + if (cmd->targ_xfer_tag == 0xFFFFFFFF) + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + } else if (hdr->flags & ISCSI_FLAG_CMD_WRITE) + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->first_burst_len = payload_length; + + if (cmd->data_direction == DMA_FROM_DEVICE) { + struct iscsi_datain_req *dr; + + dr = iscsit_allocate_datain_req(); + if (!dr) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + + iscsit_attach_datain_req(cmd, dr); + } + + /* + * The CDB is going to an se_device_t. + */ + ret = iscsit_get_lun_for_cmd(cmd, hdr->cdb, + get_unaligned_le64(&hdr->lun)); + if (ret < 0) { + if (cmd->se_cmd.scsi_sense_reason == TCM_NON_EXISTENT_LUN) { + pr_debug("Responding to non-acl'ed," + " non-existent or non-exported iSCSI LUN:" + " 0x%016Lx\n", get_unaligned_le64(&hdr->lun)); + } + if (ret == PYX_TRANSPORT_OUT_OF_MEMORY_RESOURCES) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + + send_check_condition = 1; + goto attach_cmd; + } + /* + * The Initiator Node has access to the LUN (the addressing method + * is handled inside of iscsit_get_lun_for_cmd()). Now it's time to + * allocate 1->N transport tasks (depending on sector count and + * maximum request size the physical HBA(s) can handle. + */ + transport_ret = transport_generic_allocate_tasks(&cmd->se_cmd, hdr->cdb); + if (transport_ret == -ENOMEM) { + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + } else if (transport_ret == -EINVAL) { + /* + * Unsupported SAM Opcode. CHECK_CONDITION will be sent + * in iscsit_execute_cmd() during the CmdSN OOO Execution + * Mechinism. + */ + send_check_condition = 1; + } else { + if (iscsit_decide_list_to_build(cmd, payload_length) < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + } + +attach_cmd: + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + /* + * Check if we need to delay processing because of ALUA + * Active/NonOptimized primary access state.. + */ + core_alua_check_nonop_delay(&cmd->se_cmd); + /* + * Allocate and setup SGL used with transport_generic_map_mem_to_cmd(). + * also call iscsit_allocate_iovecs() + */ + ret = iscsit_alloc_buffs(cmd); + if (ret < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 1, buf, cmd); + /* + * Check the CmdSN against ExpCmdSN/MaxCmdSN here if + * the Immediate Bit is not set, and no Immediate + * Data is attached. + * + * A PDU/CmdSN carrying Immediate Data can only + * be processed after the DataCRC has passed. + * If the DataCRC fails, the CmdSN MUST NOT + * be acknowledged. (See below) + */ + if (!cmd->immediate_data) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + /* + * If no Immediate Data is attached, it's OK to return now. + */ + if (!cmd->immediate_data) { + if (send_check_condition) + return 0; + + if (cmd->unsolicited_data) { + iscsit_set_dataout_sequence_values(cmd); + + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + + return 0; + } + + /* + * Early CHECK_CONDITIONs never make it to the transport processing + * thread. They are processed in CmdSN order by + * iscsit_check_received_cmdsn() below. + */ + if (send_check_condition) { + immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; + dump_immediate_data = 1; + goto after_immediate_data; + } + /* + * Call directly into transport_generic_new_cmd() to perform + * the backend memory allocation. + */ + ret = transport_generic_new_cmd(&cmd->se_cmd); + if ((ret < 0) || (cmd->se_cmd.se_cmd_flags & SCF_SE_CMD_FAILED)) { + immed_ret = IMMEDIATE_DATA_NORMAL_OPERATION; + dump_immediate_data = 1; + goto after_immediate_data; + } + + immed_ret = iscsit_handle_immediate_data(cmd, buf, payload_length); +after_immediate_data: + if (immed_ret == IMMEDIATE_DATA_NORMAL_OPERATION) { + /* + * A PDU/CmdSN carrying Immediate Data passed + * DataCRC, check against ExpCmdSN/MaxCmdSN if + * Immediate Bit is not set. + */ + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + /* + * Special case for Unsupported SAM WRITE Opcodes + * and ImmediateData=Yes. + */ + if (dump_immediate_data) { + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return -1; + } else if (cmd->unsolicited_data) { + iscsit_set_dataout_sequence_values(cmd); + + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + } else if (immed_ret == IMMEDIATE_DATA_ERL1_CRC_FAILURE) { + /* + * Immediate Data failed DataCRC and ERL>=1, + * silently drop this PDU and let the initiator + * plug the CmdSN gap. + * + * FIXME: Send Unsolicited NOPIN with reserved + * TTT here to help the initiator figure out + * the missing CmdSN, although they should be + * intelligent enough to determine the missing + * CmdSN and issue a retry to plug the sequence. + */ + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + } else /* immed_ret == IMMEDIATE_DATA_CANNOT_RECOVER */ + return -1; + + return 0; +} + +static u32 iscsit_do_crypto_hash_sg( + struct hash_desc *hash, + struct iscsi_cmd *cmd, + u32 data_offset, + u32 data_length, + u32 padding, + u8 *pad_bytes) +{ + u32 data_crc; + u32 i; + struct scatterlist *sg; + unsigned int page_off; + + crypto_hash_init(hash); + + sg = cmd->first_data_sg; + page_off = cmd->first_data_sg_off; + + i = 0; + while (data_length) { + u32 cur_len = min_t(u32, data_length, (sg[i].length - page_off)); + + crypto_hash_update(hash, &sg[i], cur_len); + + data_length -= cur_len; + page_off = 0; + i++; + } + + if (padding) { + struct scatterlist pad_sg; + + sg_init_one(&pad_sg, pad_bytes, padding); + crypto_hash_update(hash, &pad_sg, padding); + } + crypto_hash_final(hash, (u8 *) &data_crc); + + return data_crc; +} + +static void iscsit_do_crypto_hash_buf( + struct hash_desc *hash, + unsigned char *buf, + u32 payload_length, + u32 padding, + u8 *pad_bytes, + u8 *data_crc) +{ + struct scatterlist sg; + + crypto_hash_init(hash); + + sg_init_one(&sg, (u8 *)buf, payload_length); + crypto_hash_update(hash, &sg, payload_length); + + if (padding) { + sg_init_one(&sg, pad_bytes, padding); + crypto_hash_update(hash, &sg, padding); + } + crypto_hash_final(hash, data_crc); +} + +static int iscsit_handle_data_out(struct iscsi_conn *conn, unsigned char *buf) +{ + int iov_ret, ooo_cmdsn = 0, ret; + u8 data_crc_failed = 0; + u32 checksum, iov_count = 0, padding = 0, rx_got = 0; + u32 rx_size = 0, payload_length; + struct iscsi_cmd *cmd = NULL; + struct se_cmd *se_cmd; + struct iscsi_data *hdr; + struct kvec *iov; + unsigned long flags; + + hdr = (struct iscsi_data *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->datasn = be32_to_cpu(hdr->datasn); + hdr->offset = be32_to_cpu(hdr->offset); + + if (!payload_length) { + pr_err("DataOUT payload is ZERO, protocol error.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + /* iSCSI write */ + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->rx_data_octets += payload_length; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->write_bytes += payload_length; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("DataSegmentLength: %u is greater than" + " MaxRecvDataSegmentLength: %u\n", payload_length, + conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + cmd = iscsit_find_cmd_from_itt_or_dump(conn, hdr->itt, + payload_length); + if (!cmd) + return 0; + + pr_debug("Got DataOut ITT: 0x%08x, TTT: 0x%08x," + " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", + hdr->itt, hdr->ttt, hdr->datasn, hdr->offset, + payload_length, conn->cid); + + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + pr_err("Command ITT: 0x%08x received DataOUT after" + " last DataOUT received, dumping payload\n", + cmd->init_task_tag); + return iscsit_dump_data_payload(conn, payload_length, 1); + } + + if (cmd->data_direction != DMA_TO_DEVICE) { + pr_err("Command ITT: 0x%08x received DataOUT for a" + " NON-WRITE command.\n", cmd->init_task_tag); + return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + se_cmd = &cmd->se_cmd; + iscsit_mod_dataout_timer(cmd); + + if ((hdr->offset + payload_length) > cmd->data_length) { + pr_err("DataOut Offset: %u, Length %u greater than" + " iSCSI Command EDTL %u, protocol error.\n", + hdr->offset, payload_length, cmd->data_length); + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, + 1, 0, buf, cmd); + } + + if (cmd->unsolicited_data) { + int dump_unsolicited_data = 0; + + if (conn->sess->sess_ops->InitialR2T) { + pr_err("Received unexpected unsolicited data" + " while InitialR2T=Yes, protocol error.\n"); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_UNEXPECTED_UNSOLICITED_DATA, 0); + return -1; + } + /* + * Special case for dealing with Unsolicited DataOUT + * and Unsupported SAM WRITE Opcodes and SE resource allocation + * failures; + */ + + /* Something's amiss if we're not in WRITE_PENDING state... */ + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + WARN_ON(se_cmd->t_state != TRANSPORT_WRITE_PENDING); + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + spin_lock_irqsave(&se_cmd->t_state_lock, flags); + if (!(se_cmd->se_cmd_flags & SCF_SUPPORTED_SAM_OPCODE) || + (se_cmd->se_cmd_flags & SCF_SE_CMD_FAILED)) + dump_unsolicited_data = 1; + spin_unlock_irqrestore(&se_cmd->t_state_lock, flags); + + if (dump_unsolicited_data) { + /* + * Check if a delayed TASK_ABORTED status needs to + * be sent now if the ISCSI_FLAG_CMD_FINAL has been + * received with the unsolicitied data out. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) + iscsit_stop_dataout_timer(cmd); + + transport_check_aborted_status(se_cmd, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + return iscsit_dump_data_payload(conn, payload_length, 1); + } + } else { + /* + * For the normal solicited data path: + * + * Check for a delayed TASK_ABORTED status and dump any + * incoming data out payload if one exists. Also, when the + * ISCSI_FLAG_CMD_FINAL is set to denote the end of the current + * data out sequence, we decrement outstanding_r2ts. Once + * outstanding_r2ts reaches zero, go ahead and send the delayed + * TASK_ABORTED status. + */ + if (atomic_read(&se_cmd->t_transport_aborted) != 0) { + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) + if (--cmd->outstanding_r2ts < 1) { + iscsit_stop_dataout_timer(cmd); + transport_check_aborted_status( + se_cmd, 1); + } + + return iscsit_dump_data_payload(conn, payload_length, 1); + } + } + /* + * Preform DataSN, DataSequenceInOrder, DataPDUInOrder, and + * within-command recovery checks before receiving the payload. + */ + ret = iscsit_check_pre_dataout(cmd, buf); + if (ret == DATAOUT_WITHIN_COMMAND_RECOVERY) + return 0; + else if (ret == DATAOUT_CANNOT_RECOVER) + return -1; + + rx_size += payload_length; + iov = &cmd->iov_data[0]; + + iov_ret = iscsit_map_iovec(cmd, iov, hdr->offset, payload_length); + if (iov_ret < 0) + return -1; + + iov_count += iov_ret; + + padding = ((-payload_length) & 3); + if (padding != 0) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = padding; + rx_size += padding; + pr_debug("Receiving %u padding bytes.\n", padding); + } + + if (conn->conn_ops->DataDigest) { + iov[iov_count].iov_base = &checksum; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); + + iscsit_unmap_iovec(cmd); + + if (rx_got != rx_size) + return -1; + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, + hdr->offset, payload_length, padding, + cmd->pad_bytes); + + if (checksum != data_crc) { + pr_err("ITT: 0x%08x, Offset: %u, Length: %u," + " DataSN: 0x%08x, CRC32C DataDigest 0x%08x" + " does not match computed 0x%08x\n", + hdr->itt, hdr->offset, payload_length, + hdr->datasn, checksum, data_crc); + data_crc_failed = 1; + } else { + pr_debug("Got CRC32C DataDigest 0x%08x for" + " %u bytes of Data Out\n", checksum, + payload_length); + } + } + /* + * Increment post receive data and CRC values or perform + * within-command recovery. + */ + ret = iscsit_check_post_dataout(cmd, buf, data_crc_failed); + if ((ret == DATAOUT_NORMAL) || (ret == DATAOUT_WITHIN_COMMAND_RECOVERY)) + return 0; + else if (ret == DATAOUT_SEND_R2T) { + iscsit_set_dataout_sequence_values(cmd); + iscsit_build_r2ts_for_cmd(cmd, conn, 0); + } else if (ret == DATAOUT_SEND_TO_TRANSPORT) { + /* + * Handle extra special case for out of order + * Unsolicited Data Out. + */ + spin_lock_bh(&cmd->istate_lock); + ooo_cmdsn = (cmd->cmd_flags & ICF_OOO_CMDSN); + cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; + cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; + spin_unlock_bh(&cmd->istate_lock); + + iscsit_stop_dataout_timer(cmd); + return (!ooo_cmdsn) ? transport_generic_handle_data( + &cmd->se_cmd) : 0; + } else /* DATAOUT_CANNOT_RECOVER */ + return -1; + + return 0; +} + +static int iscsit_handle_nop_out( + struct iscsi_conn *conn, + unsigned char *buf) +{ + unsigned char *ping_data = NULL; + int cmdsn_ret, niov = 0, ret = 0, rx_got, rx_size; + u32 checksum, data_crc, padding = 0, payload_length; + u64 lun; + struct iscsi_cmd *cmd = NULL; + struct kvec *iov = NULL; + struct iscsi_nopout *hdr; + + hdr = (struct iscsi_nopout *) buf; + payload_length = ntoh24(hdr->dlength); + lun = get_unaligned_le64(&hdr->lun); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if ((hdr->itt == 0xFFFFFFFF) && !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + pr_err("NOPOUT ITT is reserved, but Immediate Bit is" + " not set, protocol error.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("NOPOUT Ping Data DataSegmentLength: %u is" + " greater than MaxRecvDataSegmentLength: %u, protocol" + " error.\n", payload_length, + conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + pr_debug("Got NOPOUT Ping %s ITT: 0x%08x, TTT: 0x%09x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n", + (hdr->itt == 0xFFFFFFFF) ? "Response" : "Request", + hdr->itt, hdr->ttt, hdr->cmdsn, hdr->exp_statsn, + payload_length); + /* + * This is not a response to a Unsolicited NopIN, which means + * it can either be a NOPOUT ping request (with a valid ITT), + * or a NOPOUT not requesting a NOPIN (with a reserved ITT). + * Either way, make sure we allocate an struct iscsi_cmd, as both + * can contain ping data. + */ + if (hdr->ttt == 0xFFFFFFFF) { + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject( + ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_NOOP_OUT; + cmd->i_state = ISTATE_SEND_NOPIN; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? + 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->data_direction = DMA_NONE; + } + + if (payload_length && (hdr->ttt == 0xFFFFFFFF)) { + rx_size = payload_length; + ping_data = kzalloc(payload_length + 1, GFP_KERNEL); + if (!ping_data) { + pr_err("Unable to allocate memory for" + " NOPOUT ping data.\n"); + ret = -1; + goto out; + } + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = ping_data; + iov[niov++].iov_len = payload_length; + + padding = ((-payload_length) & 3); + if (padding != 0) { + pr_debug("Receiving %u additional bytes" + " for padding.\n", padding); + iov[niov].iov_base = &cmd->pad_bytes; + iov[niov++].iov_len = padding; + rx_size += padding; + } + if (conn->conn_ops->DataDigest) { + iov[niov].iov_base = &checksum; + iov[niov++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_misc[0], niov, rx_size); + if (rx_got != rx_size) { + ret = -1; + goto out; + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + ping_data, payload_length, + padding, cmd->pad_bytes, + (u8 *)&data_crc); + + if (checksum != data_crc) { + pr_err("Ping data CRC32C DataDigest" + " 0x%08x does not match computed 0x%08x\n", + checksum, data_crc); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " NOPOUT Ping DataCRC failure while in" + " ERL=0.\n"); + ret = -1; + goto out; + } else { + /* + * Silently drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_debug("Dropping NOPOUT" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + ret = 0; + goto out; + } + } else { + pr_debug("Got CRC32C DataDigest" + " 0x%08x for %u bytes of ping data.\n", + checksum, payload_length); + } + } + + ping_data[payload_length] = '\0'; + /* + * Attach ping data to struct iscsi_cmd->buf_ptr. + */ + cmd->buf_ptr = (void *)ping_data; + cmd->buf_ptr_size = payload_length; + + pr_debug("Got %u bytes of NOPOUT ping" + " data.\n", payload_length); + pr_debug("Ping Data: \"%s\"\n", ping_data); + } + + if (hdr->itt != 0xFFFFFFFF) { + if (!cmd) { + pr_err("Checking CmdSN for NOPOUT," + " but cmd is NULL!\n"); + return -1; + } + /* + * Initiator is expecting a NopIN ping reply, + */ + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + iscsit_add_cmd_to_response_queue(cmd, conn, + cmd->i_state); + return 0; + } + + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + ret = 0; + goto ping_out; + } + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + return 0; + } + + if (hdr->ttt != 0xFFFFFFFF) { + /* + * This was a response to a unsolicited NOPIN ping. + */ + cmd = iscsit_find_cmd_from_ttt(conn, hdr->ttt); + if (!cmd) + return -1; + + iscsit_stop_nopin_response_timer(conn); + + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + iscsit_start_nopin_timer(conn); + } else { + /* + * Initiator is not expecting a NOPIN is response. + * Just ignore for now. + * + * iSCSI v19-91 10.18 + * "A NOP-OUT may also be used to confirm a changed + * ExpStatSN if another PDU will not be available + * for a long time." + */ + ret = 0; + goto out; + } + + return 0; +out: + if (cmd) + iscsit_release_cmd(cmd); +ping_out: + kfree(ping_data); + return ret; +} + +static int iscsit_handle_task_mgt_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_cmd *cmd; + struct se_tmr_req *se_tmr; + struct iscsi_tmr_req *tmr_req; + struct iscsi_tm *hdr; + u32 payload_length; + int out_of_order_cmdsn = 0; + int ret; + u8 function; + + hdr = (struct iscsi_tm *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->rtt = be32_to_cpu(hdr->rtt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->refcmdsn = be32_to_cpu(hdr->refcmdsn); + hdr->exp_datasn = be32_to_cpu(hdr->exp_datasn); + hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; + function = hdr->flags; + + pr_debug("Got Task Management Request ITT: 0x%08x, CmdSN:" + " 0x%08x, Function: 0x%02x, RefTaskTag: 0x%08x, RefCmdSN:" + " 0x%08x, CID: %hu\n", hdr->itt, hdr->cmdsn, function, + hdr->rtt, hdr->refcmdsn, conn->cid); + + if ((function != ISCSI_TM_FUNC_ABORT_TASK) && + ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && + (hdr->rtt != ISCSI_RESERVED_TAG))) { + pr_err("RefTaskTag should be set to 0xFFFFFFFF.\n"); + hdr->rtt = ISCSI_RESERVED_TAG; + } + + if ((function == ISCSI_TM_FUNC_TASK_REASSIGN) && + !(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + pr_err("Task Management Request TASK_REASSIGN not" + " issued as immediate command, bad iSCSI Initiator" + "implementation\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + if ((function != ISCSI_TM_FUNC_ABORT_TASK) && + (hdr->refcmdsn != ISCSI_RESERVED_TAG)) + hdr->refcmdsn = ISCSI_RESERVED_TAG; + + cmd = iscsit_allocate_se_cmd_for_tmr(conn, function); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_SCSI_TMFUNC; + cmd->i_state = ISTATE_SEND_TASKMGTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + se_tmr = cmd->se_cmd.se_tmr_req; + tmr_req = cmd->tmr_req; + /* + * Locate the struct se_lun for all TMRs not related to ERL=2 TASK_REASSIGN + */ + if (function != ISCSI_TM_FUNC_TASK_REASSIGN) { + ret = iscsit_get_lun_for_tmr(cmd, + get_unaligned_le64(&hdr->lun)); + if (ret < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_NO_LUN; + goto attach; + } + } + + switch (function) { + case ISCSI_TM_FUNC_ABORT_TASK: + se_tmr->response = iscsit_tmr_abort_task(cmd, buf); + if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + goto attach; + } + break; + case ISCSI_TM_FUNC_ABORT_TASK_SET: + case ISCSI_TM_FUNC_CLEAR_ACA: + case ISCSI_TM_FUNC_CLEAR_TASK_SET: + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + break; + case ISCSI_TM_FUNC_TARGET_WARM_RESET: + if (iscsit_tmr_task_warm_reset(conn, tmr_req, buf) < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; + goto attach; + } + break; + case ISCSI_TM_FUNC_TARGET_COLD_RESET: + if (iscsit_tmr_task_cold_reset(conn, tmr_req, buf) < 0) { + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_AUTH_FAILED; + goto attach; + } + break; + case ISCSI_TM_FUNC_TASK_REASSIGN: + se_tmr->response = iscsit_tmr_task_reassign(cmd, buf); + /* + * Perform sanity checks on the ExpDataSN only if the + * TASK_REASSIGN was successful. + */ + if (se_tmr->response != ISCSI_TMF_RSP_COMPLETE) + break; + + if (iscsit_check_task_reassign_expdatasn(tmr_req, conn) < 0) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_INVALID, 1, 1, + buf, cmd); + break; + default: + pr_err("Unknown TMR function: 0x%02x, protocol" + " error.\n", function); + cmd->se_cmd.se_cmd_flags |= SCF_SCSI_CDB_EXCEPTION; + se_tmr->response = ISCSI_TMF_RSP_NOT_SUPPORTED; + goto attach; + } + + if ((function != ISCSI_TM_FUNC_TASK_REASSIGN) && + (se_tmr->response == ISCSI_TMF_RSP_COMPLETE)) + se_tmr->call_transport = 1; +attach: + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + int cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_HIGHER_THAN_EXP) + out_of_order_cmdsn = 1; + else if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + return 0; + } else { /* (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) */ + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + } + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (out_of_order_cmdsn) + return 0; + /* + * Found the referenced task, send to transport for processing. + */ + if (se_tmr->call_transport) + return transport_generic_handle_tmr(&cmd->se_cmd); + + /* + * Could not find the referenced LUN, task, or Task Management + * command not authorized or supported. Change state and + * let the tx_thread send the response. + * + * For connection recovery, this is also the default action for + * TMR TASK_REASSIGN. + */ + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +/* #warning FIXME: Support Text Command parameters besides SendTargets */ +static int iscsit_handle_text_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + char *text_ptr, *text_in; + int cmdsn_ret, niov = 0, rx_got, rx_size; + u32 checksum = 0, data_crc = 0, payload_length; + u32 padding = 0, text_length = 0; + struct iscsi_cmd *cmd; + struct kvec iov[3]; + struct iscsi_text *hdr; + + hdr = (struct iscsi_text *) buf; + payload_length = ntoh24(hdr->dlength); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if (payload_length > conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("Unable to accept text parameter length: %u" + "greater than MaxRecvDataSegmentLength %u.\n", + payload_length, conn->conn_ops->MaxRecvDataSegmentLength); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + pr_debug("Got Text Request: ITT: 0x%08x, CmdSN: 0x%08x," + " ExpStatSN: 0x%08x, Length: %u\n", hdr->itt, hdr->cmdsn, + hdr->exp_statsn, payload_length); + + rx_size = text_length = payload_length; + if (text_length) { + text_in = kzalloc(text_length, GFP_KERNEL); + if (!text_in) { + pr_err("Unable to allocate memory for" + " incoming text parameters\n"); + return -1; + } + + memset(iov, 0, 3 * sizeof(struct kvec)); + iov[niov].iov_base = text_in; + iov[niov++].iov_len = text_length; + + padding = ((-payload_length) & 3); + if (padding != 0) { + iov[niov].iov_base = cmd->pad_bytes; + iov[niov++].iov_len = padding; + rx_size += padding; + pr_debug("Receiving %u additional bytes" + " for padding.\n", padding); + } + if (conn->conn_ops->DataDigest) { + iov[niov].iov_base = &checksum; + iov[niov++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &iov[0], niov, rx_size); + if (rx_got != rx_size) { + kfree(text_in); + return -1; + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + text_in, text_length, + padding, cmd->pad_bytes, + (u8 *)&data_crc); + + if (checksum != data_crc) { + pr_err("Text data CRC32C DataDigest" + " 0x%08x does not match computed" + " 0x%08x\n", checksum, data_crc); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Text Data digest failure while in" + " ERL=0.\n"); + kfree(text_in); + return -1; + } else { + /* + * Silently drop this PDU and let the + * initiator plug the CmdSN gap. + */ + pr_debug("Dropping Text" + " Command CmdSN: 0x%08x due to" + " DataCRC error.\n", hdr->cmdsn); + kfree(text_in); + return 0; + } + } else { + pr_debug("Got CRC32C DataDigest" + " 0x%08x for %u bytes of text data.\n", + checksum, text_length); + } + } + text_in[text_length - 1] = '\0'; + pr_debug("Successfully read %d bytes of text" + " data.\n", text_length); + + if (strncmp("SendTargets", text_in, 11) != 0) { + pr_err("Received Text Data that is not" + " SendTargets, cannot continue.\n"); + kfree(text_in); + return -1; + } + text_ptr = strchr(text_in, '='); + if (!text_ptr) { + pr_err("No \"=\" separator found in Text Data," + " cannot continue.\n"); + kfree(text_in); + return -1; + } + if (strncmp("=All", text_ptr, 4) != 0) { + pr_err("Unable to locate All value for" + " SendTargets key, cannot continue.\n"); + kfree(text_in); + return -1; + } +/*#warning Support SendTargets=(iSCSI Target Name/Nothing) values. */ + kfree(text_in); + } + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_TEXT; + cmd->i_state = ISTATE_SEND_TEXTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->data_direction = DMA_NONE; + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + if (!(hdr->opcode & ISCSI_OP_IMMEDIATE)) { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + + return 0; + } + + return iscsit_execute_cmd(cmd, 0); +} + +int iscsit_logout_closesession(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_conn *conn_p; + struct iscsi_session *sess = conn->sess; + + pr_debug("Received logout request CLOSESESSION on CID: %hu" + " for SID: %u.\n", conn->cid, conn->sess->sid); + + atomic_set(&sess->session_logout, 1); + atomic_set(&conn->conn_logout_remove, 1); + conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_SESSION; + + iscsit_inc_conn_usage_count(conn); + iscsit_inc_session_usage_count(sess); + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn_p, &sess->sess_conn_list, conn_list) { + if (conn_p->conn_state != TARG_CONN_STATE_LOGGED_IN) + continue; + + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + conn_p->conn_state = TARG_CONN_STATE_IN_LOGOUT; + } + spin_unlock_bh(&sess->conn_lock); + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_logout_closeconnection(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_conn *l_conn; + struct iscsi_session *sess = conn->sess; + + pr_debug("Received logout request CLOSECONNECTION for CID:" + " %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); + + /* + * A Logout Request with a CLOSECONNECTION reason code for a CID + * can arrive on a connection with a differing CID. + */ + if (conn->cid == cmd->logout_cid) { + spin_lock_bh(&conn->state_lock); + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; + + atomic_set(&conn->conn_logout_remove, 1); + conn->conn_logout_reason = ISCSI_LOGOUT_REASON_CLOSE_CONNECTION; + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&conn->state_lock); + } else { + /* + * Handle all different cid CLOSECONNECTION requests in + * iscsit_logout_post_handler_diffcid() as to give enough + * time for any non immediate command's CmdSN to be + * acknowledged on the connection in question. + * + * Here we simply make sure the CID is still around. + */ + l_conn = iscsit_get_conn_from_cid(sess, + cmd->logout_cid); + if (!l_conn) { + cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; + iscsit_add_cmd_to_response_queue(cmd, conn, + cmd->i_state); + return 0; + } + + iscsit_dec_conn_usage_count(l_conn); + } + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + pr_debug("Received explicit REMOVECONNFORRECOVERY logout for" + " CID: %hu on CID: %hu.\n", cmd->logout_cid, conn->cid); + + if (sess->sess_ops->ErrorRecoveryLevel != 2) { + pr_err("Received Logout Request REMOVECONNFORRECOVERY" + " while ERL!=2.\n"); + cmd->logout_response = ISCSI_LOGOUT_RECOVERY_UNSUPPORTED; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + if (conn->cid == cmd->logout_cid) { + pr_err("Received Logout Request REMOVECONNFORRECOVERY" + " with CID: %hu on CID: %hu, implementation error.\n", + cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_CLEANUP_FAILED; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +static int iscsit_handle_logout_cmd( + struct iscsi_conn *conn, + unsigned char *buf) +{ + int cmdsn_ret, logout_remove = 0; + u8 reason_code = 0; + struct iscsi_cmd *cmd; + struct iscsi_logout *hdr; + struct iscsi_tiqn *tiqn = iscsit_snmp_get_tiqn(conn); + + hdr = (struct iscsi_logout *) buf; + reason_code = (hdr->flags & 0x7f); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->cid = be16_to_cpu(hdr->cid); + hdr->cmdsn = be32_to_cpu(hdr->cmdsn); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + + if (tiqn) { + spin_lock(&tiqn->logout_stats.lock); + if (reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) + tiqn->logout_stats.normal_logouts++; + else + tiqn->logout_stats.abnormal_logouts++; + spin_unlock(&tiqn->logout_stats.lock); + } + + pr_debug("Got Logout Request ITT: 0x%08x CmdSN: 0x%08x" + " ExpStatSN: 0x%08x Reason: 0x%02x CID: %hu on CID: %hu\n", + hdr->itt, hdr->cmdsn, hdr->exp_statsn, reason_code, + hdr->cid, conn->cid); + + if (conn->conn_state != TARG_CONN_STATE_LOGGED_IN) { + pr_err("Received logout request on connection that" + " is not in logged in state, ignoring request.\n"); + return 0; + } + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return iscsit_add_reject(ISCSI_REASON_BOOKMARK_NO_RESOURCES, 1, + buf, conn); + + cmd->iscsi_opcode = ISCSI_OP_LOGOUT; + cmd->i_state = ISTATE_SEND_LOGOUTRSP; + cmd->immediate_cmd = ((hdr->opcode & ISCSI_OP_IMMEDIATE) ? 1 : 0); + conn->sess->init_task_tag = cmd->init_task_tag = hdr->itt; + cmd->targ_xfer_tag = 0xFFFFFFFF; + cmd->cmd_sn = hdr->cmdsn; + cmd->exp_stat_sn = hdr->exp_statsn; + cmd->logout_cid = hdr->cid; + cmd->logout_reason = reason_code; + cmd->data_direction = DMA_NONE; + + /* + * We need to sleep in these cases (by returning 1) until the Logout + * Response gets sent in the tx thread. + */ + if ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_SESSION) || + ((reason_code == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) && + (hdr->cid == conn->cid))) + logout_remove = 1; + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (reason_code != ISCSI_LOGOUT_REASON_RECOVERY) + iscsit_ack_from_expstatsn(conn, hdr->exp_statsn); + + /* + * Immediate commands are executed, well, immediately. + * Non-Immediate Logout Commands are executed in CmdSN order. + */ + if (hdr->opcode & ISCSI_OP_IMMEDIATE) { + int ret = iscsit_execute_cmd(cmd, 0); + + if (ret < 0) + return ret; + } else { + cmdsn_ret = iscsit_sequence_cmd(conn, cmd, hdr->cmdsn); + if (cmdsn_ret == CMDSN_LOWER_THAN_EXP) { + logout_remove = 0; + } else if (cmdsn_ret == CMDSN_ERROR_CANNOT_RECOVER) { + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + } + + return logout_remove; +} + +static int iscsit_handle_snack( + struct iscsi_conn *conn, + unsigned char *buf) +{ + u32 unpacked_lun; + u64 lun; + struct iscsi_snack *hdr; + + hdr = (struct iscsi_snack *) buf; + hdr->flags &= ~ISCSI_FLAG_CMD_FINAL; + lun = get_unaligned_le64(&hdr->lun); + unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + hdr->itt = be32_to_cpu(hdr->itt); + hdr->ttt = be32_to_cpu(hdr->ttt); + hdr->exp_statsn = be32_to_cpu(hdr->exp_statsn); + hdr->begrun = be32_to_cpu(hdr->begrun); + hdr->runlength = be32_to_cpu(hdr->runlength); + + pr_debug("Got ISCSI_INIT_SNACK, ITT: 0x%08x, ExpStatSN:" + " 0x%08x, Type: 0x%02x, BegRun: 0x%08x, RunLength: 0x%08x," + " CID: %hu\n", hdr->itt, hdr->exp_statsn, hdr->flags, + hdr->begrun, hdr->runlength, conn->cid); + + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Initiator sent SNACK request while in" + " ErrorRecoveryLevel=0.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + /* + * SNACK_DATA and SNACK_R2T are both 0, so check which function to + * call from inside iscsi_send_recovery_datain_or_r2t(). + */ + switch (hdr->flags & ISCSI_FLAG_SNACK_TYPE_MASK) { + case 0: + return iscsit_handle_recovery_datain_or_r2t(conn, buf, + hdr->itt, hdr->ttt, hdr->begrun, hdr->runlength); + return 0; + case ISCSI_FLAG_SNACK_TYPE_STATUS: + return iscsit_handle_status_snack(conn, hdr->itt, hdr->ttt, + hdr->begrun, hdr->runlength); + case ISCSI_FLAG_SNACK_TYPE_DATA_ACK: + return iscsit_handle_data_ack(conn, hdr->ttt, hdr->begrun, + hdr->runlength); + case ISCSI_FLAG_SNACK_TYPE_RDATA: + /* FIXME: Support R-Data SNACK */ + pr_err("R-Data SNACK Not Supported.\n"); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + default: + pr_err("Unknown SNACK type 0x%02x, protocol" + " error.\n", hdr->flags & 0x0f); + return iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buf, conn); + } + + return 0; +} + +static void iscsit_rx_thread_wait_for_tcp(struct iscsi_conn *conn) +{ + if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || + (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { + wait_for_completion_interruptible_timeout( + &conn->rx_half_close_comp, + ISCSI_RX_THREAD_TCP_TIMEOUT * HZ); + } +} + +static int iscsit_handle_immediate_data( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 length) +{ + int iov_ret, rx_got = 0, rx_size = 0; + u32 checksum, iov_count = 0, padding = 0; + struct iscsi_conn *conn = cmd->conn; + struct kvec *iov; + + iov_ret = iscsit_map_iovec(cmd, cmd->iov_data, cmd->write_data_done, length); + if (iov_ret < 0) + return IMMEDIATE_DATA_CANNOT_RECOVER; + + rx_size = length; + iov_count = iov_ret; + iov = &cmd->iov_data[0]; + + padding = ((-length) & 3); + if (padding != 0) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = padding; + rx_size += padding; + } + + if (conn->conn_ops->DataDigest) { + iov[iov_count].iov_base = &checksum; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + rx_size += ISCSI_CRC_LEN; + } + + rx_got = rx_data(conn, &cmd->iov_data[0], iov_count, rx_size); + + iscsit_unmap_iovec(cmd); + + if (rx_got != rx_size) { + iscsit_rx_thread_wait_for_tcp(conn); + return IMMEDIATE_DATA_CANNOT_RECOVER; + } + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + data_crc = iscsit_do_crypto_hash_sg(&conn->conn_rx_hash, cmd, + cmd->write_data_done, length, padding, + cmd->pad_bytes); + + if (checksum != data_crc) { + pr_err("ImmediateData CRC32C DataDigest 0x%08x" + " does not match computed 0x%08x\n", checksum, + data_crc); + + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from" + " Immediate Data digest failure while" + " in ERL=0.\n"); + iscsit_add_reject_from_cmd( + ISCSI_REASON_DATA_DIGEST_ERROR, + 1, 0, buf, cmd); + return IMMEDIATE_DATA_CANNOT_RECOVER; + } else { + iscsit_add_reject_from_cmd( + ISCSI_REASON_DATA_DIGEST_ERROR, + 0, 0, buf, cmd); + return IMMEDIATE_DATA_ERL1_CRC_FAILURE; + } + } else { + pr_debug("Got CRC32C DataDigest 0x%08x for" + " %u bytes of Immediate Data\n", checksum, + length); + } + } + + cmd->write_data_done += length; + + if (cmd->write_data_done == cmd->data_length) { + spin_lock_bh(&cmd->istate_lock); + cmd->cmd_flags |= ICF_GOT_LAST_DATAOUT; + cmd->i_state = ISTATE_RECEIVED_LAST_DATAOUT; + spin_unlock_bh(&cmd->istate_lock); + } + + return IMMEDIATE_DATA_NORMAL_OPERATION; +} + +/* + * Called with sess->conn_lock held. + */ +/* #warning iscsi_build_conn_drop_async_message() only sends out on connections + with active network interface */ +static void iscsit_build_conn_drop_async_message(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + struct iscsi_conn *conn_p; + + /* + * Only send a Asynchronous Message on connections whos network + * interface is still functional. + */ + list_for_each_entry(conn_p, &conn->sess->sess_conn_list, conn_list) { + if (conn_p->conn_state == TARG_CONN_STATE_LOGGED_IN) { + iscsit_inc_conn_usage_count(conn_p); + break; + } + } + + if (!conn_p) + return; + + cmd = iscsit_allocate_cmd(conn_p, GFP_KERNEL); + if (!cmd) { + iscsit_dec_conn_usage_count(conn_p); + return; + } + + cmd->logout_cid = conn->cid; + cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; + cmd->i_state = ISTATE_SEND_ASYNCMSG; + + spin_lock_bh(&conn_p->cmd_lock); + list_add_tail(&cmd->i_list, &conn_p->conn_cmd_list); + spin_unlock_bh(&conn_p->cmd_lock); + + iscsit_add_cmd_to_response_queue(cmd, conn_p, cmd->i_state); + iscsit_dec_conn_usage_count(conn_p); +} + +static int iscsit_send_conn_drop_async_message( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_async *hdr; + + cmd->tx_size = ISCSI_HDR_LEN; + cmd->iscsi_opcode = ISCSI_OP_ASYNC_EVENT; + + hdr = (struct iscsi_async *) cmd->pdu; + hdr->opcode = ISCSI_OP_ASYNC_EVENT; + hdr->flags = ISCSI_FLAG_CMD_FINAL; + cmd->init_task_tag = 0xFFFFFFFF; + cmd->targ_xfer_tag = 0xFFFFFFFF; + put_unaligned_be64(0xFFFFFFFFFFFFFFFFULL, &hdr->rsvd4[0]); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->async_event = ISCSI_ASYNC_MSG_DROPPING_CONNECTION; + hdr->param1 = cpu_to_be16(cmd->logout_cid); + hdr->param2 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Wait); + hdr->param3 = cpu_to_be16(conn->sess->sess_ops->DefaultTime2Retain); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " Async Message 0x%08x\n", *header_digest); + } + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = cmd->tx_size; + cmd->iov_misc_count = 1; + + pr_debug("Sending Connection Dropped Async Message StatSN:" + " 0x%08x, for CID: %hu on CID: %hu\n", cmd->stat_sn, + cmd->logout_cid, conn->cid); + return 0; +} + +static int iscsit_send_data_in( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int *eodr) +{ + int iov_ret = 0, set_statsn = 0; + u32 iov_count = 0, tx_size = 0; + struct iscsi_datain datain; + struct iscsi_datain_req *dr; + struct iscsi_data_rsp *hdr; + struct kvec *iov; + + memset(&datain, 0, sizeof(struct iscsi_datain)); + dr = iscsit_get_datain_values(cmd, &datain); + if (!dr) { + pr_err("iscsit_get_datain_values failed for ITT: 0x%08x\n", + cmd->init_task_tag); + return -1; + } + + /* + * Be paranoid and double check the logic for now. + */ + if ((datain.offset + datain.length) > cmd->data_length) { + pr_err("Command ITT: 0x%08x, datain.offset: %u and" + " datain.length: %u exceeds cmd->data_length: %u\n", + cmd->init_task_tag, datain.offset, datain.length, + cmd->data_length); + return -1; + } + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->tx_data_octets += datain.length; + if (conn->sess->se_sess->se_node_acl) { + spin_lock(&conn->sess->se_sess->se_node_acl->stats_lock); + conn->sess->se_sess->se_node_acl->read_bytes += datain.length; + spin_unlock(&conn->sess->se_sess->se_node_acl->stats_lock); + } + spin_unlock_bh(&conn->sess->session_stats_lock); + /* + * Special case for successfully execution w/ both DATAIN + * and Sense Data. + */ + if ((datain.flags & ISCSI_FLAG_DATA_STATUS) && + (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE)) + datain.flags &= ~ISCSI_FLAG_DATA_STATUS; + else { + if ((dr->dr_complete == DATAIN_COMPLETE_NORMAL) || + (dr->dr_complete == DATAIN_COMPLETE_CONNECTION_RECOVERY)) { + iscsit_increment_maxcmdsn(cmd, conn->sess); + cmd->stat_sn = conn->stat_sn++; + set_statsn = 1; + } else if (dr->dr_complete == + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY) + set_statsn = 1; + } + + hdr = (struct iscsi_data_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_DATA_IN; + hdr->flags = datain.flags; + if (hdr->flags & ISCSI_FLAG_DATA_STATUS) { + if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_DATA_OVERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_DATA_UNDERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } + } + hton24(hdr->dlength, datain.length); + if (hdr->flags & ISCSI_FLAG_DATA_ACK) + int_to_scsilun(cmd->se_cmd.orig_fe_lun, + (struct scsi_lun *)&hdr->lun); + else + put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); + + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = (hdr->flags & ISCSI_FLAG_DATA_ACK) ? + cpu_to_be32(cmd->targ_xfer_tag) : + 0xFFFFFFFF; + hdr->statsn = (set_statsn) ? cpu_to_be32(cmd->stat_sn) : + 0xFFFFFFFF; + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->datasn = cpu_to_be32(datain.data_sn); + hdr->offset = cpu_to_be32(datain.offset); + + iov = &cmd->iov_data[0]; + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching CRC32 HeaderDigest" + " for DataIN PDU 0x%08x\n", *header_digest); + } + + iov_ret = iscsit_map_iovec(cmd, &cmd->iov_data[1], datain.offset, datain.length); + if (iov_ret < 0) + return -1; + + iov_count += iov_ret; + tx_size += datain.length; + + cmd->padding = ((-datain.length) & 3); + if (cmd->padding) { + iov[iov_count].iov_base = cmd->pad_bytes; + iov[iov_count++].iov_len = cmd->padding; + tx_size += cmd->padding; + + pr_debug("Attaching %u padding bytes\n", + cmd->padding); + } + if (conn->conn_ops->DataDigest) { + cmd->data_crc = iscsit_do_crypto_hash_sg(&conn->conn_tx_hash, cmd, + datain.offset, datain.length, cmd->padding, cmd->pad_bytes); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attached CRC32C DataDigest %d bytes, crc" + " 0x%08x\n", datain.length+cmd->padding, cmd->data_crc); + } + + cmd->iov_data_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built DataIN ITT: 0x%08x, StatSN: 0x%08x," + " DataSN: 0x%08x, Offset: %u, Length: %u, CID: %hu\n", + cmd->init_task_tag, ntohl(hdr->statsn), ntohl(hdr->datasn), + ntohl(hdr->offset), datain.length, conn->cid); + + if (dr->dr_complete) { + *eodr = (cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) ? + 2 : 1; + iscsit_free_datain_req(cmd, dr); + } + + return 0; +} + +static int iscsit_send_logout_response( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int niov = 0, tx_size; + struct iscsi_conn *logout_conn = NULL; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_session *sess = conn->sess; + struct kvec *iov; + struct iscsi_logout_rsp *hdr; + /* + * The actual shutting down of Sessions and/or Connections + * for CLOSESESSION and CLOSECONNECTION Logout Requests + * is done in scsi_logout_post_handler(). + */ + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + pr_debug("iSCSI session logout successful, setting" + " logout response to ISCSI_LOGOUT_SUCCESS.\n"); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + if (cmd->logout_response == ISCSI_LOGOUT_CID_NOT_FOUND) + break; + /* + * For CLOSECONNECTION logout requests carrying + * a matching logout CID -> local CID, the reference + * for the local CID will have been incremented in + * iscsi_logout_closeconnection(). + * + * For CLOSECONNECTION logout requests carrying + * a different CID than the connection it arrived + * on, the connection responding to cmd->logout_cid + * is stopped in iscsit_logout_post_handler_diffcid(). + */ + + pr_debug("iSCSI CID: %hu logout on CID: %hu" + " successful.\n", cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + if ((cmd->logout_response == ISCSI_LOGOUT_RECOVERY_UNSUPPORTED) || + (cmd->logout_response == ISCSI_LOGOUT_CLEANUP_FAILED)) + break; + /* + * If the connection is still active from our point of view + * force connection recovery to occur. + */ + logout_conn = iscsit_get_conn_from_cid_rcfr(sess, + cmd->logout_cid); + if ((logout_conn)) { + iscsit_connection_reinstatement_rcfr(logout_conn); + iscsit_dec_conn_usage_count(logout_conn); + } + + cr = iscsit_get_inactive_connection_recovery_entry( + conn->sess, cmd->logout_cid); + if (!cr) { + pr_err("Unable to locate CID: %hu for" + " REMOVECONNFORRECOVERY Logout Request.\n", + cmd->logout_cid); + cmd->logout_response = ISCSI_LOGOUT_CID_NOT_FOUND; + break; + } + + iscsit_discard_cr_cmds_by_expstatsn(cr, cmd->exp_stat_sn); + + pr_debug("iSCSI REMOVECONNFORRECOVERY logout" + " for recovery for CID: %hu on CID: %hu successful.\n", + cmd->logout_cid, conn->cid); + cmd->logout_response = ISCSI_LOGOUT_SUCCESS; + break; + default: + pr_err("Unknown cmd->logout_reason: 0x%02x\n", + cmd->logout_reason); + return -1; + } + + tx_size = ISCSI_HDR_LEN; + hdr = (struct iscsi_logout_rsp *)cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_LOGOUT_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->response = cmd->logout_response; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = cmd->pdu; + iov[niov++].iov_len = ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " Logout Response 0x%08x\n", *header_digest); + } + cmd->iov_misc_count = niov; + cmd->tx_size = tx_size; + + pr_debug("Sending Logout Response ITT: 0x%08x StatSN:" + " 0x%08x Response: 0x%02x CID: %hu on CID: %hu\n", + cmd->init_task_tag, cmd->stat_sn, hdr->response, + cmd->logout_cid, conn->cid); + + return 0; +} + +/* + * Unsolicited NOPIN, either requesting a response or not. + */ +static int iscsit_send_unsolicited_nopin( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int want_response) +{ + int tx_size = ISCSI_HDR_LEN; + struct iscsi_nopin *hdr; + + hdr = (struct iscsi_nopin *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_NOOP_IN; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest to" + " NopIN 0x%08x\n", *header_digest); + } + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = tx_size; + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + pr_debug("Sending Unsolicited NOPIN TTT: 0x%08x StatSN:" + " 0x%08x CID: %hu\n", hdr->ttt, cmd->stat_sn, conn->cid); + + return 0; +} + +static int iscsit_send_nopin_response( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int niov = 0, tx_size; + u32 padding = 0; + struct kvec *iov; + struct iscsi_nopin *hdr; + + tx_size = ISCSI_HDR_LEN; + hdr = (struct iscsi_nopin *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_NOOP_IN; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, cmd->buf_ptr_size); + put_unaligned_le64(0xFFFFFFFFFFFFFFFFULL, &hdr->lun); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[niov].iov_base = cmd->pdu; + iov[niov++].iov_len = ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32C HeaderDigest" + " to NopIn 0x%08x\n", *header_digest); + } + + /* + * NOPOUT Ping Data is attached to struct iscsi_cmd->buf_ptr. + * NOPOUT DataSegmentLength is at struct iscsi_cmd->buf_ptr_size. + */ + if (cmd->buf_ptr_size) { + iov[niov].iov_base = cmd->buf_ptr; + iov[niov++].iov_len = cmd->buf_ptr_size; + tx_size += cmd->buf_ptr_size; + + pr_debug("Echoing back %u bytes of ping" + " data.\n", cmd->buf_ptr_size); + + padding = ((-cmd->buf_ptr_size) & 3); + if (padding != 0) { + iov[niov].iov_base = &cmd->pad_bytes; + iov[niov++].iov_len = padding; + tx_size += padding; + pr_debug("Attaching %u additional" + " padding bytes.\n", padding); + } + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->buf_ptr, cmd->buf_ptr_size, + padding, (u8 *)&cmd->pad_bytes, + (u8 *)&cmd->data_crc); + + iov[niov].iov_base = &cmd->data_crc; + iov[niov++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attached DataDigest for %u" + " bytes of ping data, CRC 0x%08x\n", + cmd->buf_ptr_size, cmd->data_crc); + } + } + + cmd->iov_misc_count = niov; + cmd->tx_size = tx_size; + + pr_debug("Sending NOPIN Response ITT: 0x%08x, TTT:" + " 0x%08x, StatSN: 0x%08x, Length %u\n", cmd->init_task_tag, + cmd->targ_xfer_tag, cmd->stat_sn, cmd->buf_ptr_size); + + return 0; +} + +int iscsit_send_r2t( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int tx_size = 0; + struct iscsi_r2t *r2t; + struct iscsi_r2t_rsp *hdr; + + r2t = iscsit_get_r2t_from_list(cmd); + if (!r2t) + return -1; + + hdr = (struct iscsi_r2t_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_R2T; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + int_to_scsilun(cmd->se_cmd.orig_fe_lun, + (struct scsi_lun *)&hdr->lun); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + spin_lock_bh(&conn->sess->ttt_lock); + r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; + if (r2t->targ_xfer_tag == 0xFFFFFFFF) + r2t->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + hdr->ttt = cpu_to_be32(r2t->targ_xfer_tag); + hdr->statsn = cpu_to_be32(conn->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + hdr->r2tsn = cpu_to_be32(r2t->r2t_sn); + hdr->data_offset = cpu_to_be32(r2t->offset); + hdr->data_length = cpu_to_be32(r2t->xfer_len); + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for R2T" + " PDU 0x%08x\n", *header_digest); + } + + pr_debug("Built %sR2T, ITT: 0x%08x, TTT: 0x%08x, StatSN:" + " 0x%08x, R2TSN: 0x%08x, Offset: %u, DDTL: %u, CID: %hu\n", + (!r2t->recovery_r2t) ? "" : "Recovery ", cmd->init_task_tag, + r2t->targ_xfer_tag, ntohl(hdr->statsn), r2t->r2t_sn, + r2t->offset, r2t->xfer_len, conn->cid); + + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + spin_lock_bh(&cmd->r2t_lock); + r2t->sent_r2t = 1; + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +/* + * type 0: Normal Operation. + * type 1: Called from Storage Transport. + * type 2: Called from iscsi_task_reassign_complete_write() for + * connection recovery. + */ +int iscsit_build_r2ts_for_cmd( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int type) +{ + int first_r2t = 1; + u32 offset = 0, xfer_len = 0; + + spin_lock_bh(&cmd->r2t_lock); + if (cmd->cmd_flags & ICF_SENT_LAST_R2T) { + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + + if (conn->sess->sess_ops->DataSequenceInOrder && (type != 2)) + if (cmd->r2t_offset < cmd->write_data_done) + cmd->r2t_offset = cmd->write_data_done; + + while (cmd->outstanding_r2ts < conn->sess->sess_ops->MaxOutstandingR2T) { + if (conn->sess->sess_ops->DataSequenceInOrder) { + offset = cmd->r2t_offset; + + if (first_r2t && (type == 2)) { + xfer_len = ((offset + + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len) > + cmd->data_length) ? + (cmd->data_length - offset) : + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len)); + } else { + xfer_len = ((offset + + conn->sess->sess_ops->MaxBurstLength) > + cmd->data_length) ? + (cmd->data_length - offset) : + conn->sess->sess_ops->MaxBurstLength; + } + cmd->r2t_offset += xfer_len; + + if (cmd->r2t_offset == cmd->data_length) + cmd->cmd_flags |= ICF_SENT_LAST_R2T; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder_for_r2t(cmd); + if (!seq) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + offset = seq->offset; + xfer_len = seq->xfer_len; + + if (cmd->seq_send_order == cmd->seq_count) + cmd->cmd_flags |= ICF_SENT_LAST_R2T; + } + cmd->outstanding_r2ts++; + first_r2t = 0; + + if (iscsit_add_r2t_to_list(cmd, offset, xfer_len, 0, 0) < 0) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + if (cmd->cmd_flags & ICF_SENT_LAST_R2T) + break; + } + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +static int iscsit_send_status( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + u8 iov_count = 0, recovery; + u32 padding = 0, tx_size = 0; + struct iscsi_scsi_rsp *hdr; + struct kvec *iov; + + recovery = (cmd->i_state != ISTATE_SEND_STATUS); + if (!recovery) + cmd->stat_sn = conn->stat_sn++; + + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->rsp_pdus++; + spin_unlock_bh(&conn->sess->session_stats_lock); + + hdr = (struct iscsi_scsi_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_CMD_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + if (cmd->se_cmd.se_cmd_flags & SCF_OVERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_CMD_OVERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } else if (cmd->se_cmd.se_cmd_flags & SCF_UNDERFLOW_BIT) { + hdr->flags |= ISCSI_FLAG_CMD_UNDERFLOW; + hdr->residual_count = cpu_to_be32(cmd->residual_count); + } + hdr->response = cmd->iscsi_response; + hdr->cmd_status = cmd->se_cmd.scsi_status; + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + /* + * Attach SENSE DATA payload to iSCSI Response PDU + */ + if (cmd->se_cmd.sense_buffer && + ((cmd->se_cmd.se_cmd_flags & SCF_TRANSPORT_TASK_SENSE) || + (cmd->se_cmd.se_cmd_flags & SCF_EMULATED_TASK_SENSE))) { + padding = -(cmd->se_cmd.scsi_sense_length) & 3; + hton24(hdr->dlength, cmd->se_cmd.scsi_sense_length); + iov[iov_count].iov_base = cmd->se_cmd.sense_buffer; + iov[iov_count++].iov_len = + (cmd->se_cmd.scsi_sense_length + padding); + tx_size += cmd->se_cmd.scsi_sense_length; + + if (padding) { + memset(cmd->se_cmd.sense_buffer + + cmd->se_cmd.scsi_sense_length, 0, padding); + tx_size += padding; + pr_debug("Adding %u bytes of padding to" + " SENSE.\n", padding); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->se_cmd.sense_buffer, + (cmd->se_cmd.scsi_sense_length + padding), + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching CRC32 DataDigest for" + " SENSE, %u bytes CRC 0x%08x\n", + (cmd->se_cmd.scsi_sense_length + padding), + cmd->data_crc); + } + + pr_debug("Attaching SENSE DATA: %u bytes to iSCSI" + " Response PDU\n", + cmd->se_cmd.scsi_sense_length); + } + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for Response" + " PDU 0x%08x\n", *header_digest); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built %sSCSI Response, ITT: 0x%08x, StatSN: 0x%08x," + " Response: 0x%02x, SAM Status: 0x%02x, CID: %hu\n", + (!recovery) ? "" : "Recovery ", cmd->init_task_tag, + cmd->stat_sn, 0x00, cmd->se_cmd.scsi_status, conn->cid); + + return 0; +} + +static u8 iscsit_convert_tcm_tmr_rsp(struct se_tmr_req *se_tmr) +{ + switch (se_tmr->response) { + case TMR_FUNCTION_COMPLETE: + return ISCSI_TMF_RSP_COMPLETE; + case TMR_TASK_DOES_NOT_EXIST: + return ISCSI_TMF_RSP_NO_TASK; + case TMR_LUN_DOES_NOT_EXIST: + return ISCSI_TMF_RSP_NO_LUN; + case TMR_TASK_MGMT_FUNCTION_NOT_SUPPORTED: + return ISCSI_TMF_RSP_NOT_SUPPORTED; + case TMR_FUNCTION_AUTHORIZATION_FAILED: + return ISCSI_TMF_RSP_AUTH_FAILED; + case TMR_FUNCTION_REJECTED: + default: + return ISCSI_TMF_RSP_REJECTED; + } +} + +static int iscsit_send_task_mgt_rsp( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm_rsp *hdr; + u32 tx_size = 0; + + hdr = (struct iscsi_tm_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_SCSI_TMFUNC_RSP; + hdr->response = iscsit_convert_tcm_tmr_rsp(se_tmr); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + cmd->iov_misc[0].iov_base = cmd->pdu; + cmd->iov_misc[0].iov_len = ISCSI_HDR_LEN; + tx_size += ISCSI_HDR_LEN; + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + cmd->iov_misc[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for Task" + " Mgmt Response PDU 0x%08x\n", *header_digest); + } + + cmd->iov_misc_count = 1; + cmd->tx_size = tx_size; + + pr_debug("Built Task Management Response ITT: 0x%08x," + " StatSN: 0x%08x, Response: 0x%02x, CID: %hu\n", + cmd->init_task_tag, cmd->stat_sn, hdr->response, conn->cid); + + return 0; +} + +static int iscsit_build_sendtargets_response(struct iscsi_cmd *cmd) +{ + char *payload = NULL; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + struct iscsi_tpg_np *tpg_np; + int buffer_len, end_of_buf = 0, len = 0, payload_len = 0; + unsigned char buf[256]; + + buffer_len = (conn->conn_ops->MaxRecvDataSegmentLength > 32768) ? + 32768 : conn->conn_ops->MaxRecvDataSegmentLength; + + memset(buf, 0, 256); + + payload = kzalloc(buffer_len, GFP_KERNEL); + if (!payload) { + pr_err("Unable to allocate memory for sendtargets" + " response.\n"); + return -ENOMEM; + } + + spin_lock(&tiqn_lock); + list_for_each_entry(tiqn, &g_tiqn_list, tiqn_list) { + len = sprintf(buf, "TargetName=%s", tiqn->tiqn); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy((void *)payload + payload_len, buf, len); + payload_len += len; + + spin_lock(&tiqn->tiqn_tpg_lock); + list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + + spin_lock(&tpg->tpg_state_lock); + if ((tpg->tpg_state == TPG_STATE_FREE) || + (tpg->tpg_state == TPG_STATE_INACTIVE)) { + spin_unlock(&tpg->tpg_state_lock); + continue; + } + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, + tpg_np_list) { + len = sprintf(buf, "TargetAddress=" + "%s%s%s:%hu,%hu", + (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? + "[" : "", tpg_np->tpg_np->np_ip, + (tpg_np->tpg_np->np_sockaddr.ss_family == AF_INET6) ? + "]" : "", tpg_np->tpg_np->np_port, + tpg->tpgt); + len += 1; + + if ((len + payload_len) > buffer_len) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + end_of_buf = 1; + goto eob; + } + memcpy((void *)payload + payload_len, buf, len); + payload_len += len; + } + spin_unlock(&tpg->tpg_np_lock); + } + spin_unlock(&tiqn->tiqn_tpg_lock); +eob: + if (end_of_buf) + break; + } + spin_unlock(&tiqn_lock); + + cmd->buf_ptr = payload; + + return payload_len; +} + +/* + * FIXME: Add support for F_BIT and C_BIT when the length is longer than + * MaxRecvDataSegmentLength. + */ +static int iscsit_send_text_rsp( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_text_rsp *hdr; + struct kvec *iov; + u32 padding = 0, tx_size = 0; + int text_length, iov_count = 0; + + text_length = iscsit_build_sendtargets_response(cmd); + if (text_length < 0) + return text_length; + + padding = ((-text_length) & 3); + if (padding != 0) { + memset(cmd->buf_ptr + text_length, 0, padding); + pr_debug("Attaching %u additional bytes for" + " padding.\n", padding); + } + + hdr = (struct iscsi_text_rsp *) cmd->pdu; + memset(hdr, 0, ISCSI_HDR_LEN); + hdr->opcode = ISCSI_OP_TEXT_RSP; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, text_length); + hdr->itt = cpu_to_be32(cmd->init_task_tag); + hdr->ttt = cpu_to_be32(cmd->targ_xfer_tag); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + + iscsit_increment_maxcmdsn(cmd, conn->sess); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + iov[iov_count].iov_base = cmd->buf_ptr; + iov[iov_count++].iov_len = text_length + padding; + + tx_size += (ISCSI_HDR_LEN + text_length + padding); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for" + " Text Response PDU 0x%08x\n", *header_digest); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + cmd->buf_ptr, (text_length + padding), + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + + pr_debug("Attaching DataDigest for %u bytes of text" + " data, CRC 0x%08x\n", (text_length + padding), + cmd->data_crc); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built Text Response: ITT: 0x%08x, StatSN: 0x%08x," + " Length: %u, CID: %hu\n", cmd->init_task_tag, cmd->stat_sn, + text_length, conn->cid); + return 0; +} + +static int iscsit_send_reject( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + u32 iov_count = 0, tx_size = 0; + struct iscsi_reject *hdr; + struct kvec *iov; + + hdr = (struct iscsi_reject *) cmd->pdu; + hdr->opcode = ISCSI_OP_REJECT; + hdr->flags |= ISCSI_FLAG_CMD_FINAL; + hton24(hdr->dlength, ISCSI_HDR_LEN); + cmd->stat_sn = conn->stat_sn++; + hdr->statsn = cpu_to_be32(cmd->stat_sn); + hdr->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + hdr->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + iov = &cmd->iov_misc[0]; + + iov[iov_count].iov_base = cmd->pdu; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + iov[iov_count].iov_base = cmd->buf_ptr; + iov[iov_count++].iov_len = ISCSI_HDR_LEN; + + tx_size = (ISCSI_HDR_LEN + ISCSI_HDR_LEN); + + if (conn->conn_ops->HeaderDigest) { + u32 *header_digest = (u32 *)&cmd->pdu[ISCSI_HDR_LEN]; + + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)hdr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)header_digest); + + iov[0].iov_len += ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 HeaderDigest for" + " REJECT PDU 0x%08x\n", *header_digest); + } + + if (conn->conn_ops->DataDigest) { + iscsit_do_crypto_hash_buf(&conn->conn_tx_hash, + (unsigned char *)cmd->buf_ptr, ISCSI_HDR_LEN, + 0, NULL, (u8 *)&cmd->data_crc); + + iov[iov_count].iov_base = &cmd->data_crc; + iov[iov_count++].iov_len = ISCSI_CRC_LEN; + tx_size += ISCSI_CRC_LEN; + pr_debug("Attaching CRC32 DataDigest for REJECT" + " PDU 0x%08x\n", cmd->data_crc); + } + + cmd->iov_misc_count = iov_count; + cmd->tx_size = tx_size; + + pr_debug("Built Reject PDU StatSN: 0x%08x, Reason: 0x%02x," + " CID: %hu\n", ntohl(hdr->statsn), hdr->reason, conn->cid); + + return 0; +} + +static void iscsit_tx_thread_wait_for_tcp(struct iscsi_conn *conn) +{ + if ((conn->sock->sk->sk_shutdown & SEND_SHUTDOWN) || + (conn->sock->sk->sk_shutdown & RCV_SHUTDOWN)) { + wait_for_completion_interruptible_timeout( + &conn->tx_half_close_comp, + ISCSI_TX_THREAD_TCP_TIMEOUT * HZ); + } +} + +#ifdef CONFIG_SMP + +void iscsit_thread_get_cpumask(struct iscsi_conn *conn) +{ + struct iscsi_thread_set *ts = conn->thread_set; + int ord, cpu; + /* + * thread_id is assigned from iscsit_global->ts_bitmap from + * within iscsi_thread_set.c:iscsi_allocate_thread_sets() + * + * Here we use thread_id to determine which CPU that this + * iSCSI connection's iscsi_thread_set will be scheduled to + * execute upon. + */ + ord = ts->thread_id % cpumask_weight(cpu_online_mask); +#if 0 + pr_debug(">>>>>>>>>>>>>>>>>>>> Generated ord: %d from" + " thread_id: %d\n", ord, ts->thread_id); +#endif + for_each_online_cpu(cpu) { + if (ord-- == 0) { + cpumask_set_cpu(cpu, conn->conn_cpumask); + return; + } + } + /* + * This should never be reached.. + */ + dump_stack(); + cpumask_setall(conn->conn_cpumask); +} + +static inline void iscsit_thread_check_cpumask( + struct iscsi_conn *conn, + struct task_struct *p, + int mode) +{ + char buf[128]; + /* + * mode == 1 signals iscsi_target_tx_thread() usage. + * mode == 0 signals iscsi_target_rx_thread() usage. + */ + if (mode == 1) { + if (!conn->conn_tx_reset_cpumask) + return; + conn->conn_tx_reset_cpumask = 0; + } else { + if (!conn->conn_rx_reset_cpumask) + return; + conn->conn_rx_reset_cpumask = 0; + } + /* + * Update the CPU mask for this single kthread so that + * both TX and RX kthreads are scheduled to run on the + * same CPU. + */ + memset(buf, 0, 128); + cpumask_scnprintf(buf, 128, conn->conn_cpumask); +#if 0 + pr_debug(">>>>>>>>>>>>>> Calling set_cpus_allowed_ptr():" + " %s for %s\n", buf, p->comm); +#endif + set_cpus_allowed_ptr(p, conn->conn_cpumask); +} + +#else +#define iscsit_thread_get_cpumask(X) ({}) +#define iscsit_thread_check_cpumask(X, Y, Z) ({}) +#endif /* CONFIG_SMP */ + +int iscsi_target_tx_thread(void *arg) +{ + u8 state; + int eodr = 0; + int ret = 0; + int sent_status = 0; + int use_misc = 0; + int map_sg = 0; + struct iscsi_cmd *cmd = NULL; + struct iscsi_conn *conn; + struct iscsi_queue_req *qr = NULL; + struct se_cmd *se_cmd; + struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; + /* + * Allow ourselves to be interrupted by SIGINT so that a + * connection recovery / failure event can be triggered externally. + */ + allow_signal(SIGINT); + +restart: + conn = iscsi_tx_thread_pre_handler(ts); + if (!conn) + goto out; + + eodr = map_sg = ret = sent_status = use_misc = 0; + + while (!kthread_should_stop()) { + /* + * Ensure that both TX and RX per connection kthreads + * are scheduled to run on the same CPU. + */ + iscsit_thread_check_cpumask(conn, current, 1); + + schedule_timeout_interruptible(MAX_SCHEDULE_TIMEOUT); + + if ((ts->status == ISCSI_THREAD_SET_RESET) || + signal_pending(current)) + goto transport_err; + +get_immediate: + qr = iscsit_get_cmd_from_immediate_queue(conn); + if (qr) { + atomic_set(&conn->check_immediate_queue, 0); + cmd = qr->cmd; + state = qr->state; + kmem_cache_free(lio_qr_cache, qr); + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_R2T: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_r2t(cmd, conn); + break; + case ISTATE_REMOVE: + spin_unlock_bh(&cmd->istate_lock); + + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + + spin_lock_bh(&conn->cmd_lock); + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + /* + * Determine if a struct se_cmd is assoicated with + * this struct iscsi_cmd. + */ + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) && + !(cmd->tmr_req)) + iscsit_release_cmd(cmd); + else + transport_generic_free_cmd(&cmd->se_cmd, + 1, 0); + goto get_immediate; + case ISTATE_SEND_NOPIN_WANT_RESPONSE: + spin_unlock_bh(&cmd->istate_lock); + iscsit_mod_nopin_response_timer(conn); + ret = iscsit_send_unsolicited_nopin(cmd, + conn, 1); + break; + case ISTATE_SEND_NOPIN_NO_RESPONSE: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_unsolicited_nopin(cmd, + conn, 0); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, state, + conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + if (ret < 0) { + conn->tx_immediate_queue = 0; + goto transport_err; + } + + if (iscsit_send_tx_data(cmd, conn, 1) < 0) { + conn->tx_immediate_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + goto transport_err; + } + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_R2T: + spin_unlock_bh(&cmd->istate_lock); + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + break; + case ISTATE_SEND_NOPIN_WANT_RESPONSE: + cmd->i_state = ISTATE_SENT_NOPIN_WANT_RESPONSE; + spin_unlock_bh(&cmd->istate_lock); + break; + case ISTATE_SEND_NOPIN_NO_RESPONSE: + cmd->i_state = ISTATE_SENT_STATUS; + spin_unlock_bh(&cmd->istate_lock); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + goto get_immediate; + } else + conn->tx_immediate_queue = 0; + +get_response: + qr = iscsit_get_cmd_from_response_queue(conn); + if (qr) { + cmd = qr->cmd; + state = qr->state; + kmem_cache_free(lio_qr_cache, qr); + + spin_lock_bh(&cmd->istate_lock); +check_rsp_state: + switch (state) { + case ISTATE_SEND_DATAIN: + spin_unlock_bh(&cmd->istate_lock); + ret = iscsit_send_data_in(cmd, conn, + &eodr); + map_sg = 1; + break; + case ISTATE_SEND_STATUS: + case ISTATE_SEND_STATUS_RECOVERY: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_status(cmd, conn); + break; + case ISTATE_SEND_LOGOUTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_logout_response(cmd, conn); + break; + case ISTATE_SEND_ASYNCMSG: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_conn_drop_async_message( + cmd, conn); + break; + case ISTATE_SEND_NOPIN: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_nopin_response(cmd, conn); + break; + case ISTATE_SEND_REJECT: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_reject(cmd, conn); + break; + case ISTATE_SEND_TASKMGTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_task_mgt_rsp(cmd, conn); + if (ret != 0) + break; + ret = iscsit_tmr_post_handler(cmd, conn); + if (ret != 0) + iscsit_fall_back_to_erl0(conn->sess); + break; + case ISTATE_SEND_TEXTRSP: + spin_unlock_bh(&cmd->istate_lock); + use_misc = 1; + ret = iscsit_send_text_rsp(cmd, conn); + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + if (ret < 0) { + conn->tx_response_queue = 0; + goto transport_err; + } + + se_cmd = &cmd->se_cmd; + + if (map_sg && !conn->conn_ops->IFMarker) { + if (iscsit_fe_sendpage_sg(cmd, conn) < 0) { + conn->tx_response_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + iscsit_unmap_iovec(cmd); + goto transport_err; + } + } else { + if (iscsit_send_tx_data(cmd, conn, use_misc) < 0) { + conn->tx_response_queue = 0; + iscsit_tx_thread_wait_for_tcp(conn); + iscsit_unmap_iovec(cmd); + goto transport_err; + } + } + map_sg = 0; + iscsit_unmap_iovec(cmd); + + spin_lock_bh(&cmd->istate_lock); + switch (state) { + case ISTATE_SEND_DATAIN: + if (!eodr) + goto check_rsp_state; + + if (eodr == 1) { + cmd->i_state = ISTATE_SENT_LAST_DATAIN; + sent_status = 1; + eodr = use_misc = 0; + } else if (eodr == 2) { + cmd->i_state = state = + ISTATE_SEND_STATUS; + sent_status = 0; + eodr = use_misc = 0; + goto check_rsp_state; + } + break; + case ISTATE_SEND_STATUS: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_ASYNCMSG: + case ISTATE_SEND_NOPIN: + case ISTATE_SEND_STATUS_RECOVERY: + case ISTATE_SEND_TEXTRSP: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_REJECT: + use_misc = 0; + if (cmd->cmd_flags & ICF_REJECT_FAIL_CONN) { + cmd->cmd_flags &= ~ICF_REJECT_FAIL_CONN; + spin_unlock_bh(&cmd->istate_lock); + complete(&cmd->reject_comp); + goto transport_err; + } + complete(&cmd->reject_comp); + break; + case ISTATE_SEND_TASKMGTRSP: + use_misc = 0; + sent_status = 1; + break; + case ISTATE_SEND_LOGOUTRSP: + spin_unlock_bh(&cmd->istate_lock); + if (!iscsit_logout_post_handler(cmd, conn)) + goto restart; + spin_lock_bh(&cmd->istate_lock); + use_misc = 0; + sent_status = 1; + break; + default: + pr_err("Unknown Opcode: 0x%02x ITT:" + " 0x%08x, i_state: %d on CID: %hu\n", + cmd->iscsi_opcode, cmd->init_task_tag, + cmd->i_state, conn->cid); + spin_unlock_bh(&cmd->istate_lock); + goto transport_err; + } + + if (sent_status) { + cmd->i_state = ISTATE_SENT_STATUS; + sent_status = 0; + } + spin_unlock_bh(&cmd->istate_lock); + + if (atomic_read(&conn->check_immediate_queue)) + goto get_immediate; + + goto get_response; + } else + conn->tx_response_queue = 0; + } + +transport_err: + iscsit_take_action_for_connection_exit(conn); + goto restart; +out: + return 0; +} + +int iscsi_target_rx_thread(void *arg) +{ + int ret; + u8 buffer[ISCSI_HDR_LEN], opcode; + u32 checksum = 0, digest = 0; + struct iscsi_conn *conn = NULL; + struct iscsi_thread_set *ts = (struct iscsi_thread_set *)arg; + struct kvec iov; + /* + * Allow ourselves to be interrupted by SIGINT so that a + * connection recovery / failure event can be triggered externally. + */ + allow_signal(SIGINT); + +restart: + conn = iscsi_rx_thread_pre_handler(ts); + if (!conn) + goto out; + + while (!kthread_should_stop()) { + /* + * Ensure that both TX and RX per connection kthreads + * are scheduled to run on the same CPU. + */ + iscsit_thread_check_cpumask(conn, current, 0); + + memset(buffer, 0, ISCSI_HDR_LEN); + memset(&iov, 0, sizeof(struct kvec)); + + iov.iov_base = buffer; + iov.iov_len = ISCSI_HDR_LEN; + + ret = rx_data(conn, &iov, 1, ISCSI_HDR_LEN); + if (ret != ISCSI_HDR_LEN) { + iscsit_rx_thread_wait_for_tcp(conn); + goto transport_err; + } + + /* + * Set conn->bad_hdr for use with REJECT PDUs. + */ + memcpy(&conn->bad_hdr, &buffer, ISCSI_HDR_LEN); + + if (conn->conn_ops->HeaderDigest) { + iov.iov_base = &digest; + iov.iov_len = ISCSI_CRC_LEN; + + ret = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); + if (ret != ISCSI_CRC_LEN) { + iscsit_rx_thread_wait_for_tcp(conn); + goto transport_err; + } + + iscsit_do_crypto_hash_buf(&conn->conn_rx_hash, + buffer, ISCSI_HDR_LEN, + 0, NULL, (u8 *)&checksum); + + if (digest != checksum) { + pr_err("HeaderDigest CRC32C failed," + " received 0x%08x, computed 0x%08x\n", + digest, checksum); + /* + * Set the PDU to 0xff so it will intentionally + * hit default in the switch below. + */ + memset(buffer, 0xff, ISCSI_HDR_LEN); + spin_lock_bh(&conn->sess->session_stats_lock); + conn->sess->conn_digest_errors++; + spin_unlock_bh(&conn->sess->session_stats_lock); + } else { + pr_debug("Got HeaderDigest CRC32C" + " 0x%08x\n", checksum); + } + } + + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) + goto transport_err; + + opcode = buffer[0] & ISCSI_OPCODE_MASK; + + if (conn->sess->sess_ops->SessionType && + ((!(opcode & ISCSI_OP_TEXT)) || + (!(opcode & ISCSI_OP_LOGOUT)))) { + pr_err("Received illegal iSCSI Opcode: 0x%02x" + " while in Discovery Session, rejecting.\n", opcode); + iscsit_add_reject(ISCSI_REASON_PROTOCOL_ERROR, 1, + buffer, conn); + goto transport_err; + } + + switch (opcode) { + case ISCSI_OP_SCSI_CMD: + if (iscsit_handle_scsi_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_SCSI_DATA_OUT: + if (iscsit_handle_data_out(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_NOOP_OUT: + if (iscsit_handle_nop_out(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_SCSI_TMFUNC: + if (iscsit_handle_task_mgt_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_TEXT: + if (iscsit_handle_text_cmd(conn, buffer) < 0) + goto transport_err; + break; + case ISCSI_OP_LOGOUT: + ret = iscsit_handle_logout_cmd(conn, buffer); + if (ret > 0) { + wait_for_completion_timeout(&conn->conn_logout_comp, + SECONDS_FOR_LOGOUT_COMP * HZ); + goto transport_err; + } else if (ret < 0) + goto transport_err; + break; + case ISCSI_OP_SNACK: + if (iscsit_handle_snack(conn, buffer) < 0) + goto transport_err; + break; + default: + pr_err("Got unknown iSCSI OpCode: 0x%02x\n", + opcode); + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Cannot recover from unknown" + " opcode while ERL=0, closing iSCSI connection" + ".\n"); + goto transport_err; + } + if (!conn->conn_ops->OFMarker) { + pr_err("Unable to recover from unknown" + " opcode while OFMarker=No, closing iSCSI" + " connection.\n"); + goto transport_err; + } + if (iscsit_recover_from_unknown_opcode(conn) < 0) { + pr_err("Unable to recover from unknown" + " opcode, closing iSCSI connection.\n"); + goto transport_err; + } + break; + } + } + +transport_err: + if (!signal_pending(current)) + atomic_set(&conn->transport_failed, 1); + iscsit_take_action_for_connection_exit(conn); + goto restart; +out: + return 0; +} + +static void iscsit_release_commands_from_conn(struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd = NULL, *cmd_tmp = NULL; + struct iscsi_session *sess = conn->sess; + struct se_cmd *se_cmd; + /* + * We expect this function to only ever be called from either RX or TX + * thread context via iscsit_close_connection() once the other context + * has been reset -> returned sleeping pre-handler state. + */ + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD)) { + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + iscsit_increment_maxcmdsn(cmd, sess); + se_cmd = &cmd->se_cmd; + /* + * Special cases for active iSCSI TMR, and + * transport_lookup_cmd_lun() failing from + * iscsit_get_lun_for_cmd() in iscsit_handle_scsi_cmd(). + */ + if (cmd->tmr_req && se_cmd->transport_wait_for_tasks) + se_cmd->transport_wait_for_tasks(se_cmd, 1, 1); + else if (cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) + transport_release_cmd(se_cmd); + else + iscsit_release_cmd(cmd); + + spin_lock_bh(&conn->cmd_lock); + continue; + } + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_increment_maxcmdsn(cmd, sess); + se_cmd = &cmd->se_cmd; + + if (se_cmd->transport_wait_for_tasks) + se_cmd->transport_wait_for_tasks(se_cmd, 1, 1); + + spin_lock_bh(&conn->cmd_lock); + } + spin_unlock_bh(&conn->cmd_lock); +} + +static void iscsit_stop_timers_for_cmds( + struct iscsi_conn *conn) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + } + spin_unlock_bh(&conn->cmd_lock); +} + +int iscsit_close_connection( + struct iscsi_conn *conn) +{ + int conn_logout = (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT); + struct iscsi_session *sess = conn->sess; + + pr_debug("Closing iSCSI connection CID %hu on SID:" + " %u\n", conn->cid, sess->sid); + /* + * Always up conn_logout_comp just in case the RX Thread is sleeping + * and the logout response never got sent because the connection + * failed. + */ + complete(&conn->conn_logout_comp); + + iscsi_release_thread_set(conn); + + iscsit_stop_timers_for_cmds(conn); + iscsit_stop_nopin_response_timer(conn); + iscsit_stop_nopin_timer(conn); + iscsit_free_queue_reqs_for_conn(conn); + + /* + * During Connection recovery drop unacknowledged out of order + * commands for this connection, and prepare the other commands + * for realligence. + * + * During normal operation clear the out of order commands (but + * do not free the struct iscsi_ooo_cmdsn's) and release all + * struct iscsi_cmds. + */ + if (atomic_read(&conn->connection_recovery)) { + iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(conn); + iscsit_prepare_cmds_for_realligance(conn); + } else { + iscsit_clear_ooo_cmdsns_for_conn(conn); + iscsit_release_commands_from_conn(conn); + } + + /* + * Handle decrementing session or connection usage count if + * a logout response was not able to be sent because the + * connection failed. Fall back to Session Recovery here. + */ + if (atomic_read(&conn->conn_logout_remove)) { + if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_SESSION) { + iscsit_dec_conn_usage_count(conn); + iscsit_dec_session_usage_count(sess); + } + if (conn->conn_logout_reason == ISCSI_LOGOUT_REASON_CLOSE_CONNECTION) + iscsit_dec_conn_usage_count(conn); + + atomic_set(&conn->conn_logout_remove, 0); + atomic_set(&sess->session_reinstatement, 0); + atomic_set(&sess->session_fall_back_to_erl0, 1); + } + + spin_lock_bh(&sess->conn_lock); + list_del(&conn->conn_list); + + /* + * Attempt to let the Initiator know this connection failed by + * sending an Connection Dropped Async Message on another + * active connection. + */ + if (atomic_read(&conn->connection_recovery)) + iscsit_build_conn_drop_async_message(conn); + + spin_unlock_bh(&sess->conn_lock); + + /* + * If connection reinstatement is being performed on this connection, + * up the connection reinstatement semaphore that is being blocked on + * in iscsit_cause_connection_reinstatement(). + */ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->sleep_on_conn_wait_comp)) { + spin_unlock_bh(&conn->state_lock); + complete(&conn->conn_wait_comp); + wait_for_completion(&conn->conn_post_wait_comp); + spin_lock_bh(&conn->state_lock); + } + + /* + * If connection reinstatement is being performed on this connection + * by receiving a REMOVECONNFORRECOVERY logout request, up the + * connection wait rcfr semaphore that is being blocked on + * an iscsit_connection_reinstatement_rcfr(). + */ + if (atomic_read(&conn->connection_wait_rcfr)) { + spin_unlock_bh(&conn->state_lock); + complete(&conn->conn_wait_rcfr_comp); + wait_for_completion(&conn->conn_post_wait_comp); + spin_lock_bh(&conn->state_lock); + } + atomic_set(&conn->connection_reinstatement, 1); + spin_unlock_bh(&conn->state_lock); + + /* + * If any other processes are accessing this connection pointer we + * must wait until they have completed. + */ + iscsit_check_conn_usage_count(conn); + + if (conn->conn_rx_hash.tfm) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (conn->conn_tx_hash.tfm) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + conn->conn_ops = NULL; + + if (conn->sock) { + if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) { + kfree(conn->sock->file); + conn->sock->file = NULL; + } + sock_release(conn->sock); + } + conn->thread_set = NULL; + + pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); + conn->conn_state = TARG_CONN_STATE_FREE; + kfree(conn); + + spin_lock_bh(&sess->conn_lock); + atomic_dec(&sess->nconn); + pr_debug("Decremented iSCSI connection count to %hu from node:" + " %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + /* + * Make sure that if one connection fails in an non ERL=2 iSCSI + * Session that they all fail. + */ + if ((sess->sess_ops->ErrorRecoveryLevel != 2) && !conn_logout && + !atomic_read(&sess->session_logout)) + atomic_set(&sess->session_fall_back_to_erl0, 1); + + /* + * If this was not the last connection in the session, and we are + * performing session reinstatement or falling back to ERL=0, call + * iscsit_stop_session() without sleeping to shutdown the other + * active connections. + */ + if (atomic_read(&sess->nconn)) { + if (!atomic_read(&sess->session_reinstatement) && + !atomic_read(&sess->session_fall_back_to_erl0)) { + spin_unlock_bh(&sess->conn_lock); + return 0; + } + if (!atomic_read(&sess->session_stop_active)) { + atomic_set(&sess->session_stop_active, 1); + spin_unlock_bh(&sess->conn_lock); + iscsit_stop_session(sess, 0, 0); + return 0; + } + spin_unlock_bh(&sess->conn_lock); + return 0; + } + + /* + * If this was the last connection in the session and one of the + * following is occurring: + * + * Session Reinstatement is not being performed, and are falling back + * to ERL=0 call iscsit_close_session(). + * + * Session Logout was requested. iscsit_close_session() will be called + * elsewhere. + * + * Session Continuation is not being performed, start the Time2Retain + * handler and check if sleep_on_sess_wait_sem is active. + */ + if (!atomic_read(&sess->session_reinstatement) && + atomic_read(&sess->session_fall_back_to_erl0)) { + spin_unlock_bh(&sess->conn_lock); + iscsit_close_session(sess); + + return 0; + } else if (atomic_read(&sess->session_logout)) { + pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); + sess->session_state = TARG_SESS_STATE_FREE; + spin_unlock_bh(&sess->conn_lock); + + if (atomic_read(&sess->sleep_on_sess_wait_comp)) + complete(&sess->session_wait_comp); + + return 0; + } else { + pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); + sess->session_state = TARG_SESS_STATE_FAILED; + + if (!atomic_read(&sess->session_continuation)) { + spin_unlock_bh(&sess->conn_lock); + iscsit_start_time2retain_handler(sess); + } else + spin_unlock_bh(&sess->conn_lock); + + if (atomic_read(&sess->sleep_on_sess_wait_comp)) + complete(&sess->session_wait_comp); + + return 0; + } + spin_unlock_bh(&sess->conn_lock); + + return 0; +} + +int iscsit_close_session(struct iscsi_session *sess) +{ + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + if (atomic_read(&sess->nconn)) { + pr_err("%d connection(s) still exist for iSCSI session" + " to %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + BUG(); + } + + spin_lock_bh(&se_tpg->session_lock); + atomic_set(&sess->session_logout, 1); + atomic_set(&sess->session_reinstatement, 1); + iscsit_stop_time2retain_timer(sess); + spin_unlock_bh(&se_tpg->session_lock); + + /* + * transport_deregister_session_configfs() will clear the + * struct se_node_acl->nacl_sess pointer now as a iscsi_np process context + * can be setting it again with __transport_register_session() in + * iscsi_post_login_handler() again after the iscsit_stop_session() + * completes in iscsi_np context. + */ + transport_deregister_session_configfs(sess->se_sess); + + /* + * If any other processes are accessing this session pointer we must + * wait until they have completed. If we are in an interrupt (the + * time2retain handler) and contain and active session usage count we + * restart the timer and exit. + */ + if (!in_interrupt()) { + if (iscsit_check_session_usage_count(sess) == 1) + iscsit_stop_session(sess, 1, 1); + } else { + if (iscsit_check_session_usage_count(sess) == 2) { + atomic_set(&sess->session_logout, 0); + iscsit_start_time2retain_handler(sess); + return 0; + } + } + + transport_deregister_session(sess->se_sess); + + if (sess->sess_ops->ErrorRecoveryLevel == 2) + iscsit_free_connection_recovery_entires(sess); + + iscsit_free_all_ooo_cmdsns(sess); + + spin_lock_bh(&se_tpg->session_lock); + pr_debug("Moving to TARG_SESS_STATE_FREE.\n"); + sess->session_state = TARG_SESS_STATE_FREE; + pr_debug("Released iSCSI session from node: %s\n", + sess->sess_ops->InitiatorName); + tpg->nsessions--; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_nsessions--; + + pr_debug("Decremented number of active iSCSI Sessions on" + " iSCSI TPG: %hu to %u\n", tpg->tpgt, tpg->nsessions); + + spin_lock(&sess_idr_lock); + idr_remove(&sess_idr, sess->session_index); + spin_unlock(&sess_idr_lock); + + kfree(sess->sess_ops); + sess->sess_ops = NULL; + spin_unlock_bh(&se_tpg->session_lock); + + kfree(sess); + return 0; +} + +static void iscsit_logout_post_handler_closesession( + struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); + iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); + + atomic_set(&conn->conn_logout_remove, 0); + complete(&conn->conn_logout_comp); + + iscsit_dec_conn_usage_count(conn); + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + iscsit_close_session(sess); +} + +static void iscsit_logout_post_handler_samecid( + struct iscsi_conn *conn) +{ + iscsi_set_thread_clear(conn, ISCSI_CLEAR_TX_THREAD); + iscsi_set_thread_set_signal(conn, ISCSI_SIGNAL_TX_THREAD); + + atomic_set(&conn->conn_logout_remove, 0); + complete(&conn->conn_logout_comp); + + iscsit_cause_connection_reinstatement(conn, 1); + iscsit_dec_conn_usage_count(conn); +} + +static void iscsit_logout_post_handler_diffcid( + struct iscsi_conn *conn, + u16 cid) +{ + struct iscsi_conn *l_conn; + struct iscsi_session *sess = conn->sess; + + if (!sess) + return; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(l_conn, &sess->sess_conn_list, conn_list) { + if (l_conn->cid == cid) { + iscsit_inc_conn_usage_count(l_conn); + break; + } + } + spin_unlock_bh(&sess->conn_lock); + + if (!l_conn) + return; + + if (l_conn->sock) + l_conn->sock->ops->shutdown(l_conn->sock, RCV_SHUTDOWN); + + spin_lock_bh(&l_conn->state_lock); + pr_debug("Moving to TARG_CONN_STATE_IN_LOGOUT.\n"); + l_conn->conn_state = TARG_CONN_STATE_IN_LOGOUT; + spin_unlock_bh(&l_conn->state_lock); + + iscsit_cause_connection_reinstatement(l_conn, 1); + iscsit_dec_conn_usage_count(l_conn); +} + +/* + * Return of 0 causes the TX thread to restart. + */ +static int iscsit_logout_post_handler( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int ret = 0; + + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + iscsit_logout_post_handler_closesession(conn); + break; + } + ret = 0; + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + if (conn->cid == cmd->logout_cid) { + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + iscsit_logout_post_handler_samecid(conn); + break; + } + ret = 0; + } else { + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + iscsit_logout_post_handler_diffcid(conn, + cmd->logout_cid); + break; + case ISCSI_LOGOUT_CID_NOT_FOUND: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + break; + } + ret = 1; + } + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + switch (cmd->logout_response) { + case ISCSI_LOGOUT_SUCCESS: + case ISCSI_LOGOUT_CID_NOT_FOUND: + case ISCSI_LOGOUT_RECOVERY_UNSUPPORTED: + case ISCSI_LOGOUT_CLEANUP_FAILED: + default: + break; + } + ret = 1; + break; + default: + break; + + } + return ret; +} + +void iscsit_fail_session(struct iscsi_session *sess) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n"); + conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT; + } + spin_unlock_bh(&sess->conn_lock); + + pr_debug("Moving to TARG_SESS_STATE_FAILED.\n"); + sess->session_state = TARG_SESS_STATE_FAILED; +} + +int iscsit_free_session(struct iscsi_session *sess) +{ + u16 conn_count = atomic_read(&sess->nconn); + struct iscsi_conn *conn, *conn_tmp = NULL; + int is_last; + + spin_lock_bh(&sess->conn_lock); + atomic_set(&sess->sleep_on_sess_wait_comp, 1); + + list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, + conn_list) { + if (conn_count == 0) + break; + + if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { + is_last = 1; + } else { + iscsit_inc_conn_usage_count(conn_tmp); + is_last = 0; + } + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&sess->conn_lock); + iscsit_cause_connection_reinstatement(conn, 1); + spin_lock_bh(&sess->conn_lock); + + iscsit_dec_conn_usage_count(conn); + if (is_last == 0) + iscsit_dec_conn_usage_count(conn_tmp); + + conn_count--; + } + + if (atomic_read(&sess->nconn)) { + spin_unlock_bh(&sess->conn_lock); + wait_for_completion(&sess->session_wait_comp); + } else + spin_unlock_bh(&sess->conn_lock); + + iscsit_close_session(sess); + return 0; +} + +void iscsit_stop_session( + struct iscsi_session *sess, + int session_sleep, + int connection_sleep) +{ + u16 conn_count = atomic_read(&sess->nconn); + struct iscsi_conn *conn, *conn_tmp = NULL; + int is_last; + + spin_lock_bh(&sess->conn_lock); + if (session_sleep) + atomic_set(&sess->sleep_on_sess_wait_comp, 1); + + if (connection_sleep) { + list_for_each_entry_safe(conn, conn_tmp, &sess->sess_conn_list, + conn_list) { + if (conn_count == 0) + break; + + if (list_is_last(&conn->conn_list, &sess->sess_conn_list)) { + is_last = 1; + } else { + iscsit_inc_conn_usage_count(conn_tmp); + is_last = 0; + } + iscsit_inc_conn_usage_count(conn); + + spin_unlock_bh(&sess->conn_lock); + iscsit_cause_connection_reinstatement(conn, 1); + spin_lock_bh(&sess->conn_lock); + + iscsit_dec_conn_usage_count(conn); + if (is_last == 0) + iscsit_dec_conn_usage_count(conn_tmp); + conn_count--; + } + } else { + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) + iscsit_cause_connection_reinstatement(conn, 0); + } + + if (session_sleep && atomic_read(&sess->nconn)) { + spin_unlock_bh(&sess->conn_lock); + wait_for_completion(&sess->session_wait_comp); + } else + spin_unlock_bh(&sess->conn_lock); +} + +int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *tpg, int force) +{ + struct iscsi_session *sess; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + int session_count = 0; + + spin_lock_bh(&se_tpg->session_lock); + if (tpg->nsessions && !force) { + spin_unlock_bh(&se_tpg->session_lock); + return -1; + } + + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + if (atomic_read(&sess->session_fall_back_to_erl0) || + atomic_read(&sess->session_logout) || + (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess->conn_lock); + continue; + } + atomic_set(&sess->session_reinstatement, 1); + spin_unlock(&sess->conn_lock); + spin_unlock_bh(&se_tpg->session_lock); + + iscsit_free_session(sess); + spin_lock_bh(&se_tpg->session_lock); + + session_count++; + } + spin_unlock_bh(&se_tpg->session_lock); + + pr_debug("Released %d iSCSI Session(s) from Target Portal" + " Group: %hu\n", session_count, tpg->tpgt); + return 0; +} + +MODULE_DESCRIPTION("iSCSI-Target Driver for mainline target infrastructure"); +MODULE_VERSION("4.1.x"); +MODULE_AUTHOR("nab@Linux-iSCSI.org"); +MODULE_LICENSE("GPL"); + +module_init(iscsi_target_init_module); +module_exit(iscsi_target_cleanup_module); diff --git a/drivers/target/iscsi/iscsi_target.h b/drivers/target/iscsi/iscsi_target.h new file mode 100644 index 000000000000..5db2ddeed5eb --- /dev/null +++ b/drivers/target/iscsi/iscsi_target.h @@ -0,0 +1,42 @@ +#ifndef ISCSI_TARGET_H +#define ISCSI_TARGET_H + +extern struct iscsi_tiqn *iscsit_get_tiqn_for_login(unsigned char *); +extern struct iscsi_tiqn *iscsit_get_tiqn(unsigned char *, int); +extern void iscsit_put_tiqn_for_login(struct iscsi_tiqn *); +extern struct iscsi_tiqn *iscsit_add_tiqn(unsigned char *); +extern void iscsit_del_tiqn(struct iscsi_tiqn *); +extern int iscsit_access_np(struct iscsi_np *, struct iscsi_portal_group *); +extern int iscsit_deaccess_np(struct iscsi_np *, struct iscsi_portal_group *); +extern struct iscsi_np *iscsit_add_np(struct __kernel_sockaddr_storage *, + char *, int); +extern int iscsit_reset_np_thread(struct iscsi_np *, struct iscsi_tpg_np *, + struct iscsi_portal_group *); +extern int iscsit_del_np(struct iscsi_np *); +extern int iscsit_add_reject_from_cmd(u8, int, int, unsigned char *, struct iscsi_cmd *); +extern int iscsit_logout_closesession(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_logout_closeconnection(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_logout_removeconnforrecovery(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_send_async_msg(struct iscsi_conn *, u16, u8, u8); +extern int iscsit_send_r2t(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_build_r2ts_for_cmd(struct iscsi_cmd *, struct iscsi_conn *, int); +extern void iscsit_thread_get_cpumask(struct iscsi_conn *); +extern int iscsi_target_tx_thread(void *); +extern int iscsi_target_rx_thread(void *); +extern int iscsit_close_connection(struct iscsi_conn *); +extern int iscsit_close_session(struct iscsi_session *); +extern void iscsit_fail_session(struct iscsi_session *); +extern int iscsit_free_session(struct iscsi_session *); +extern void iscsit_stop_session(struct iscsi_session *, int, int); +extern int iscsit_release_sessions_for_tpg(struct iscsi_portal_group *, int); + +extern struct iscsit_global *iscsit_global; +extern struct target_fabric_configfs *lio_target_fabric_configfs; + +extern struct kmem_cache *lio_dr_cache; +extern struct kmem_cache *lio_ooo_cache; +extern struct kmem_cache *lio_cmd_cache; +extern struct kmem_cache *lio_qr_cache; +extern struct kmem_cache *lio_r2t_cache; + +#endif /*** ISCSI_TARGET_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_auth.c b/drivers/target/iscsi/iscsi_target_auth.c new file mode 100644 index 000000000000..11fd74307811 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_auth.c @@ -0,0 +1,490 @@ +/******************************************************************************* + * This file houses the main functions for the iSCSI CHAP support + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/crypto.h> +#include <linux/err.h> +#include <linux/scatterlist.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_auth.h" + +static unsigned char chap_asciihex_to_binaryhex(unsigned char val[2]) +{ + unsigned char result = 0; + /* + * MSB + */ + if ((val[0] >= 'a') && (val[0] <= 'f')) + result = ((val[0] - 'a' + 10) & 0xf) << 4; + else + if ((val[0] >= 'A') && (val[0] <= 'F')) + result = ((val[0] - 'A' + 10) & 0xf) << 4; + else /* digit */ + result = ((val[0] - '0') & 0xf) << 4; + /* + * LSB + */ + if ((val[1] >= 'a') && (val[1] <= 'f')) + result |= ((val[1] - 'a' + 10) & 0xf); + else + if ((val[1] >= 'A') && (val[1] <= 'F')) + result |= ((val[1] - 'A' + 10) & 0xf); + else /* digit */ + result |= ((val[1] - '0') & 0xf); + + return result; +} + +static int chap_string_to_hex(unsigned char *dst, unsigned char *src, int len) +{ + int i, j = 0; + + for (i = 0; i < len; i += 2) { + dst[j++] = (unsigned char) chap_asciihex_to_binaryhex(&src[i]); + } + + dst[j] = '\0'; + return j; +} + +static void chap_binaryhex_to_asciihex(char *dst, char *src, int src_len) +{ + int i; + + for (i = 0; i < src_len; i++) { + sprintf(&dst[i*2], "%02x", (int) src[i] & 0xff); + } +} + +static void chap_set_random(char *data, int length) +{ + long r; + unsigned n; + + while (length > 0) { + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 4); + n = r & 0x7; + + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 5); + n = (n << 3) | (r & 0x7); + + get_random_bytes(&r, sizeof(long)); + r = r ^ (r >> 8); + r = r ^ (r >> 5); + n = (n << 2) | (r & 0x3); + + *data++ = n; + length--; + } +} + +static void chap_gen_challenge( + struct iscsi_conn *conn, + int caller, + char *c_str, + unsigned int *c_len) +{ + unsigned char challenge_asciihex[CHAP_CHALLENGE_LENGTH * 2 + 1]; + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + memset(challenge_asciihex, 0, CHAP_CHALLENGE_LENGTH * 2 + 1); + + chap_set_random(chap->challenge, CHAP_CHALLENGE_LENGTH); + chap_binaryhex_to_asciihex(challenge_asciihex, chap->challenge, + CHAP_CHALLENGE_LENGTH); + /* + * Set CHAP_C, and copy the generated challenge into c_str. + */ + *c_len += sprintf(c_str + *c_len, "CHAP_C=0x%s", challenge_asciihex); + *c_len += 1; + + pr_debug("[%s] Sending CHAP_C=0x%s\n\n", (caller) ? "server" : "client", + challenge_asciihex); +} + + +static struct iscsi_chap *chap_server_open( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + const char *a_str, + char *aic_str, + unsigned int *aic_len) +{ + struct iscsi_chap *chap; + + if (!(auth->naf_flags & NAF_USERID_SET) || + !(auth->naf_flags & NAF_PASSWORD_SET)) { + pr_err("CHAP user or password not set for" + " Initiator ACL\n"); + return NULL; + } + + conn->auth_protocol = kzalloc(sizeof(struct iscsi_chap), GFP_KERNEL); + if (!conn->auth_protocol) + return NULL; + + chap = (struct iscsi_chap *) conn->auth_protocol; + /* + * We only support MD5 MDA presently. + */ + if (strncmp(a_str, "CHAP_A=5", 8)) { + pr_err("CHAP_A is not MD5.\n"); + return NULL; + } + pr_debug("[server] Got CHAP_A=5\n"); + /* + * Send back CHAP_A set to MD5. + */ + *aic_len = sprintf(aic_str, "CHAP_A=5"); + *aic_len += 1; + chap->digest_type = CHAP_DIGEST_MD5; + pr_debug("[server] Sending CHAP_A=%d\n", chap->digest_type); + /* + * Set Identifier. + */ + chap->id = ISCSI_TPG_C(conn)->tpg_chap_id++; + *aic_len += sprintf(aic_str + *aic_len, "CHAP_I=%d", chap->id); + *aic_len += 1; + pr_debug("[server] Sending CHAP_I=%d\n", chap->id); + /* + * Generate Challenge. + */ + chap_gen_challenge(conn, 1, aic_str, aic_len); + + return chap; +} + +static void chap_close(struct iscsi_conn *conn) +{ + kfree(conn->auth_protocol); + conn->auth_protocol = NULL; +} + +static int chap_server_compute_md5( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *nr_in_ptr, + char *nr_out_ptr, + unsigned int *nr_out_len) +{ + char *endptr; + unsigned char id, digest[MD5_SIGNATURE_SIZE]; + unsigned char type, response[MD5_SIGNATURE_SIZE * 2 + 2]; + unsigned char identifier[10], *challenge = NULL; + unsigned char *challenge_binhex = NULL; + unsigned char client_digest[MD5_SIGNATURE_SIZE]; + unsigned char server_digest[MD5_SIGNATURE_SIZE]; + unsigned char chap_n[MAX_CHAP_N_SIZE], chap_r[MAX_RESPONSE_LENGTH]; + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + struct crypto_hash *tfm; + struct hash_desc desc; + struct scatterlist sg; + int auth_ret = -1, ret, challenge_len; + + memset(identifier, 0, 10); + memset(chap_n, 0, MAX_CHAP_N_SIZE); + memset(chap_r, 0, MAX_RESPONSE_LENGTH); + memset(digest, 0, MD5_SIGNATURE_SIZE); + memset(response, 0, MD5_SIGNATURE_SIZE * 2 + 2); + memset(client_digest, 0, MD5_SIGNATURE_SIZE); + memset(server_digest, 0, MD5_SIGNATURE_SIZE); + + challenge = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL); + if (!challenge) { + pr_err("Unable to allocate challenge buffer\n"); + goto out; + } + + challenge_binhex = kzalloc(CHAP_CHALLENGE_STR_LEN, GFP_KERNEL); + if (!challenge_binhex) { + pr_err("Unable to allocate challenge_binhex buffer\n"); + goto out; + } + /* + * Extract CHAP_N. + */ + if (extract_param(nr_in_ptr, "CHAP_N", MAX_CHAP_N_SIZE, chap_n, + &type) < 0) { + pr_err("Could not find CHAP_N.\n"); + goto out; + } + if (type == HEX) { + pr_err("Could not find CHAP_N.\n"); + goto out; + } + + if (memcmp(chap_n, auth->userid, strlen(auth->userid)) != 0) { + pr_err("CHAP_N values do not match!\n"); + goto out; + } + pr_debug("[server] Got CHAP_N=%s\n", chap_n); + /* + * Extract CHAP_R. + */ + if (extract_param(nr_in_ptr, "CHAP_R", MAX_RESPONSE_LENGTH, chap_r, + &type) < 0) { + pr_err("Could not find CHAP_R.\n"); + goto out; + } + if (type != HEX) { + pr_err("Could not find CHAP_R.\n"); + goto out; + } + + pr_debug("[server] Got CHAP_R=%s\n", chap_r); + chap_string_to_hex(client_digest, chap_r, strlen(chap_r)); + + tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + pr_err("Unable to allocate struct crypto_hash\n"); + goto out; + } + desc.tfm = tfm; + desc.flags = 0; + + ret = crypto_hash_init(&desc); + if (ret < 0) { + pr_err("crypto_hash_init() failed\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&chap->id, 1); + ret = crypto_hash_update(&desc, &sg, 1); + if (ret < 0) { + pr_err("crypto_hash_update() failed for id\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&auth->password, strlen(auth->password)); + ret = crypto_hash_update(&desc, &sg, strlen(auth->password)); + if (ret < 0) { + pr_err("crypto_hash_update() failed for password\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)chap->challenge, CHAP_CHALLENGE_LENGTH); + ret = crypto_hash_update(&desc, &sg, CHAP_CHALLENGE_LENGTH); + if (ret < 0) { + pr_err("crypto_hash_update() failed for challenge\n"); + crypto_free_hash(tfm); + goto out; + } + + ret = crypto_hash_final(&desc, server_digest); + if (ret < 0) { + pr_err("crypto_hash_final() failed for server digest\n"); + crypto_free_hash(tfm); + goto out; + } + crypto_free_hash(tfm); + + chap_binaryhex_to_asciihex(response, server_digest, MD5_SIGNATURE_SIZE); + pr_debug("[server] MD5 Server Digest: %s\n", response); + + if (memcmp(server_digest, client_digest, MD5_SIGNATURE_SIZE) != 0) { + pr_debug("[server] MD5 Digests do not match!\n\n"); + goto out; + } else + pr_debug("[server] MD5 Digests match, CHAP connetication" + " successful.\n\n"); + /* + * One way authentication has succeeded, return now if mutual + * authentication is not enabled. + */ + if (!auth->authenticate_target) { + kfree(challenge); + kfree(challenge_binhex); + return 0; + } + /* + * Get CHAP_I. + */ + if (extract_param(nr_in_ptr, "CHAP_I", 10, identifier, &type) < 0) { + pr_err("Could not find CHAP_I.\n"); + goto out; + } + + if (type == HEX) + id = (unsigned char)simple_strtoul((char *)&identifier[2], + &endptr, 0); + else + id = (unsigned char)simple_strtoul(identifier, &endptr, 0); + /* + * RFC 1994 says Identifier is no more than octet (8 bits). + */ + pr_debug("[server] Got CHAP_I=%d\n", id); + /* + * Get CHAP_C. + */ + if (extract_param(nr_in_ptr, "CHAP_C", CHAP_CHALLENGE_STR_LEN, + challenge, &type) < 0) { + pr_err("Could not find CHAP_C.\n"); + goto out; + } + + if (type != HEX) { + pr_err("Could not find CHAP_C.\n"); + goto out; + } + pr_debug("[server] Got CHAP_C=%s\n", challenge); + challenge_len = chap_string_to_hex(challenge_binhex, challenge, + strlen(challenge)); + if (!challenge_len) { + pr_err("Unable to convert incoming challenge\n"); + goto out; + } + /* + * Generate CHAP_N and CHAP_R for mutual authentication. + */ + tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) { + pr_err("Unable to allocate struct crypto_hash\n"); + goto out; + } + desc.tfm = tfm; + desc.flags = 0; + + ret = crypto_hash_init(&desc); + if (ret < 0) { + pr_err("crypto_hash_init() failed\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)&id, 1); + ret = crypto_hash_update(&desc, &sg, 1); + if (ret < 0) { + pr_err("crypto_hash_update() failed for id\n"); + crypto_free_hash(tfm); + goto out; + } + + sg_init_one(&sg, (void *)auth->password_mutual, + strlen(auth->password_mutual)); + ret = crypto_hash_update(&desc, &sg, strlen(auth->password_mutual)); + if (ret < 0) { + pr_err("crypto_hash_update() failed for" + " password_mutual\n"); + crypto_free_hash(tfm); + goto out; + } + /* + * Convert received challenge to binary hex. + */ + sg_init_one(&sg, (void *)challenge_binhex, challenge_len); + ret = crypto_hash_update(&desc, &sg, challenge_len); + if (ret < 0) { + pr_err("crypto_hash_update() failed for ma challenge\n"); + crypto_free_hash(tfm); + goto out; + } + + ret = crypto_hash_final(&desc, digest); + if (ret < 0) { + pr_err("crypto_hash_final() failed for ma digest\n"); + crypto_free_hash(tfm); + goto out; + } + crypto_free_hash(tfm); + /* + * Generate CHAP_N and CHAP_R. + */ + *nr_out_len = sprintf(nr_out_ptr, "CHAP_N=%s", auth->userid_mutual); + *nr_out_len += 1; + pr_debug("[server] Sending CHAP_N=%s\n", auth->userid_mutual); + /* + * Convert response from binary hex to ascii hext. + */ + chap_binaryhex_to_asciihex(response, digest, MD5_SIGNATURE_SIZE); + *nr_out_len += sprintf(nr_out_ptr + *nr_out_len, "CHAP_R=0x%s", + response); + *nr_out_len += 1; + pr_debug("[server] Sending CHAP_R=0x%s\n", response); + auth_ret = 0; +out: + kfree(challenge); + kfree(challenge_binhex); + return auth_ret; +} + +static int chap_got_response( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *nr_in_ptr, + char *nr_out_ptr, + unsigned int *nr_out_len) +{ + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + switch (chap->digest_type) { + case CHAP_DIGEST_MD5: + if (chap_server_compute_md5(conn, auth, nr_in_ptr, + nr_out_ptr, nr_out_len) < 0) + return -1; + return 0; + default: + pr_err("Unknown CHAP digest type %d!\n", + chap->digest_type); + return -1; + } +} + +u32 chap_main_loop( + struct iscsi_conn *conn, + struct iscsi_node_auth *auth, + char *in_text, + char *out_text, + int *in_len, + int *out_len) +{ + struct iscsi_chap *chap = (struct iscsi_chap *) conn->auth_protocol; + + if (!chap) { + chap = chap_server_open(conn, auth, in_text, out_text, out_len); + if (!chap) + return 2; + chap->chap_state = CHAP_STAGE_SERVER_AIC; + return 0; + } else if (chap->chap_state == CHAP_STAGE_SERVER_AIC) { + convert_null_to_semi(in_text, *in_len); + if (chap_got_response(conn, auth, in_text, out_text, + out_len) < 0) { + chap_close(conn); + return 2; + } + if (auth->authenticate_target) + chap->chap_state = CHAP_STAGE_SERVER_NR; + else + *out_len = 0; + chap_close(conn); + return 1; + } + + return 2; +} diff --git a/drivers/target/iscsi/iscsi_target_auth.h b/drivers/target/iscsi/iscsi_target_auth.h new file mode 100644 index 000000000000..2f463c09626d --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_auth.h @@ -0,0 +1,31 @@ +#ifndef _ISCSI_CHAP_H_ +#define _ISCSI_CHAP_H_ + +#define CHAP_DIGEST_MD5 5 +#define CHAP_DIGEST_SHA 6 + +#define CHAP_CHALLENGE_LENGTH 16 +#define CHAP_CHALLENGE_STR_LEN 4096 +#define MAX_RESPONSE_LENGTH 64 /* sufficient for MD5 */ +#define MAX_CHAP_N_SIZE 512 + +#define MD5_SIGNATURE_SIZE 16 /* 16 bytes in a MD5 message digest */ + +#define CHAP_STAGE_CLIENT_A 1 +#define CHAP_STAGE_SERVER_AIC 2 +#define CHAP_STAGE_CLIENT_NR 3 +#define CHAP_STAGE_CLIENT_NRIC 4 +#define CHAP_STAGE_SERVER_NR 5 + +extern u32 chap_main_loop(struct iscsi_conn *, struct iscsi_node_auth *, char *, char *, + int *, int *); + +struct iscsi_chap { + unsigned char digest_type; + unsigned char id; + unsigned char challenge[CHAP_CHALLENGE_LENGTH]; + unsigned int authenticate_target; + unsigned int chap_state; +} ____cacheline_aligned; + +#endif /*** _ISCSI_CHAP_H_ ***/ diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c new file mode 100644 index 000000000000..32bb92c44450 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -0,0 +1,1882 @@ +/******************************************************************************* + * This file contains the configfs implementation for iSCSI Target mode + * from the LIO-Target Project. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ****************************************************************************/ + +#include <linux/configfs.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_fabric_configfs.h> +#include <target/target_core_fabric_lib.h> +#include <target/target_core_device.h> +#include <target/target_core_tpg.h> +#include <target/target_core_configfs.h> +#include <target/configfs_macros.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_device.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_nodeattrib.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_stat.h" +#include "iscsi_target_configfs.h" + +struct target_fabric_configfs *lio_target_fabric_configfs; + +struct lio_target_configfs_attribute { + struct configfs_attribute attr; + ssize_t (*show)(void *, char *); + ssize_t (*store)(void *, const char *, size_t); +}; + +struct iscsi_portal_group *lio_get_tpg_from_tpg_item( + struct config_item *item, + struct iscsi_tiqn **tiqn_out) +{ + struct se_portal_group *se_tpg = container_of(to_config_group(item), + struct se_portal_group, tpg_group); + struct iscsi_portal_group *tpg = + (struct iscsi_portal_group *)se_tpg->se_tpg_fabric_ptr; + int ret; + + if (!tpg) { + pr_err("Unable to locate struct iscsi_portal_group " + "pointer\n"); + return NULL; + } + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return NULL; + + *tiqn_out = tpg->tpg_tiqn; + return tpg; +} + +/* Start items for lio_target_portal_cit */ + +static ssize_t lio_target_np_show_sctp( + struct se_tpg_np *se_tpg_np, + char *page) +{ + struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np, + struct iscsi_tpg_np, se_tpg_np); + struct iscsi_tpg_np *tpg_np_sctp; + ssize_t rb; + + tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); + if (tpg_np_sctp) + rb = sprintf(page, "1\n"); + else + rb = sprintf(page, "0\n"); + + return rb; +} + +static ssize_t lio_target_np_store_sctp( + struct se_tpg_np *se_tpg_np, + const char *page, + size_t count) +{ + struct iscsi_np *np; + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np = container_of(se_tpg_np, + struct iscsi_tpg_np, se_tpg_np); + struct iscsi_tpg_np *tpg_np_sctp = NULL; + char *endptr; + u32 op; + int ret; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for tpg_enable: %u\n", op); + return -EINVAL; + } + np = tpg_np->tpg_np; + if (!np) { + pr_err("Unable to locate struct iscsi_np from" + " struct iscsi_tpg_np\n"); + return -EINVAL; + } + + tpg = tpg_np->tpg; + if (iscsit_get_tpg(tpg) < 0) + return -EINVAL; + + if (op) { + /* + * Use existing np->np_sockaddr for SCTP network portal reference + */ + tpg_np_sctp = iscsit_tpg_add_network_portal(tpg, &np->np_sockaddr, + np->np_ip, tpg_np, ISCSI_SCTP_TCP); + if (!tpg_np_sctp || IS_ERR(tpg_np_sctp)) + goto out; + } else { + tpg_np_sctp = iscsit_tpg_locate_child_np(tpg_np, ISCSI_SCTP_TCP); + if (!tpg_np_sctp) + goto out; + + ret = iscsit_tpg_del_network_portal(tpg, tpg_np_sctp); + if (ret < 0) + goto out; + } + + iscsit_put_tpg(tpg); + return count; +out: + iscsit_put_tpg(tpg); + return -EINVAL; +} + +TF_NP_BASE_ATTR(lio_target, sctp, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_portal_attrs[] = { + &lio_target_np_sctp.attr, + NULL, +}; + +/* Stop items for lio_target_portal_cit */ + +/* Start items for lio_target_np_cit */ + +#define MAX_PORTAL_LEN 256 + +struct se_tpg_np *lio_target_call_addnptotpg( + struct se_portal_group *se_tpg, + struct config_group *group, + const char *name) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; + char *str, *str2, *ip_str, *port_str; + struct __kernel_sockaddr_storage sockaddr; + struct sockaddr_in *sock_in; + struct sockaddr_in6 *sock_in6; + unsigned long port; + int ret; + char buf[MAX_PORTAL_LEN + 1]; + + if (strlen(name) > MAX_PORTAL_LEN) { + pr_err("strlen(name): %d exceeds MAX_PORTAL_LEN: %d\n", + (int)strlen(name), MAX_PORTAL_LEN); + return ERR_PTR(-EOVERFLOW); + } + memset(buf, 0, MAX_PORTAL_LEN + 1); + snprintf(buf, MAX_PORTAL_LEN, "%s", name); + + memset(&sockaddr, 0, sizeof(struct __kernel_sockaddr_storage)); + + str = strstr(buf, "["); + if (str) { + const char *end; + + str2 = strstr(str, "]"); + if (!str2) { + pr_err("Unable to locate trailing \"]\"" + " in IPv6 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + str++; /* Skip over leading "[" */ + *str2 = '\0'; /* Terminate the IPv6 address */ + str2++; /* Skip over the "]" */ + port_str = strstr(str2, ":"); + if (!port_str) { + pr_err("Unable to locate \":port\"" + " in IPv6 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + *port_str = '\0'; /* Terminate string for IP */ + port_str++; /* Skip over ":" */ + + ret = strict_strtoul(port_str, 0, &port); + if (ret < 0) { + pr_err("strict_strtoul() failed for port_str: %d\n", ret); + return ERR_PTR(ret); + } + sock_in6 = (struct sockaddr_in6 *)&sockaddr; + sock_in6->sin6_family = AF_INET6; + sock_in6->sin6_port = htons((unsigned short)port); + ret = in6_pton(str, IPV6_ADDRESS_SPACE, + (void *)&sock_in6->sin6_addr.in6_u, -1, &end); + if (ret <= 0) { + pr_err("in6_pton returned: %d\n", ret); + return ERR_PTR(-EINVAL); + } + } else { + str = ip_str = &buf[0]; + port_str = strstr(ip_str, ":"); + if (!port_str) { + pr_err("Unable to locate \":port\"" + " in IPv4 iSCSI network portal address\n"); + return ERR_PTR(-EINVAL); + } + *port_str = '\0'; /* Terminate string for IP */ + port_str++; /* Skip over ":" */ + + ret = strict_strtoul(port_str, 0, &port); + if (ret < 0) { + pr_err("strict_strtoul() failed for port_str: %d\n", ret); + return ERR_PTR(ret); + } + sock_in = (struct sockaddr_in *)&sockaddr; + sock_in->sin_family = AF_INET; + sock_in->sin_port = htons((unsigned short)port); + sock_in->sin_addr.s_addr = in_aton(ip_str); + } + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return ERR_PTR(-EINVAL); + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s TPGT: %hu" + " PORTAL: %s\n", + config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, name); + /* + * Assume ISCSI_TCP by default. Other network portals for other + * iSCSI fabrics: + * + * Traditional iSCSI over SCTP (initial support) + * iSER/TCP (TODO, hardware available) + * iSER/SCTP (TODO, software emulation with osc-iwarp) + * iSER/IB (TODO, hardware available) + * + * can be enabled with atributes under + * sys/kernel/config/iscsi/$IQN/$TPG/np/$IP:$PORT/ + * + */ + tpg_np = iscsit_tpg_add_network_portal(tpg, &sockaddr, str, NULL, + ISCSI_TCP); + if (IS_ERR(tpg_np)) { + iscsit_put_tpg(tpg); + return ERR_PTR(PTR_ERR(tpg_np)); + } + pr_debug("LIO_Target_ConfigFS: addnptotpg done!\n"); + + iscsit_put_tpg(tpg); + return &tpg_np->se_tpg_np; +} + +static void lio_target_call_delnpfromtpg( + struct se_tpg_np *se_tpg_np) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np; + struct se_portal_group *se_tpg; + int ret; + + tpg_np = container_of(se_tpg_np, struct iscsi_tpg_np, se_tpg_np); + tpg = tpg_np->tpg; + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return; + + se_tpg = &tpg->tpg_se_tpg; + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s TPGT: %hu" + " PORTAL: %s:%hu\n", config_item_name(&se_tpg->se_tpg_wwn->wwn_group.cg_item), + tpg->tpgt, tpg_np->tpg_np->np_ip, tpg_np->tpg_np->np_port); + + ret = iscsit_tpg_del_network_portal(tpg, tpg_np); + if (ret < 0) + goto out; + + pr_debug("LIO_Target_ConfigFS: delnpfromtpg done!\n"); +out: + iscsit_put_tpg(tpg); +} + +/* End items for lio_target_np_cit */ + +/* Start items for lio_target_nacl_attrib_cit */ + +#define DEF_NACL_ATTRIB(name) \ +static ssize_t iscsi_nacl_attrib_show_##name( \ + struct se_node_acl *se_nacl, \ + char *page) \ +{ \ + struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ + se_node_acl); \ + \ + return sprintf(page, "%u\n", ISCSI_NODE_ATTRIB(nacl)->name); \ +} \ + \ +static ssize_t iscsi_nacl_attrib_store_##name( \ + struct se_node_acl *se_nacl, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_node_acl *nacl = container_of(se_nacl, struct iscsi_node_acl, \ + se_node_acl); \ + char *endptr; \ + u32 val; \ + int ret; \ + \ + val = simple_strtoul(page, &endptr, 0); \ + ret = iscsit_na_##name(nacl, val); \ + if (ret < 0) \ + return ret; \ + \ + return count; \ +} + +#define NACL_ATTR(_name, _mode) TF_NACL_ATTRIB_ATTR(iscsi, _name, _mode); +/* + * Define iscsi_node_attrib_s_dataout_timeout + */ +DEF_NACL_ATTRIB(dataout_timeout); +NACL_ATTR(dataout_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_dataout_timeout_retries + */ +DEF_NACL_ATTRIB(dataout_timeout_retries); +NACL_ATTR(dataout_timeout_retries, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_default_erl + */ +DEF_NACL_ATTRIB(default_erl); +NACL_ATTR(default_erl, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_nopin_timeout + */ +DEF_NACL_ATTRIB(nopin_timeout); +NACL_ATTR(nopin_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_nopin_response_timeout + */ +DEF_NACL_ATTRIB(nopin_response_timeout); +NACL_ATTR(nopin_response_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_datain_pdu_offsets + */ +DEF_NACL_ATTRIB(random_datain_pdu_offsets); +NACL_ATTR(random_datain_pdu_offsets, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_datain_seq_offsets + */ +DEF_NACL_ATTRIB(random_datain_seq_offsets); +NACL_ATTR(random_datain_seq_offsets, S_IRUGO | S_IWUSR); +/* + * Define iscsi_node_attrib_s_random_r2t_offsets + */ +DEF_NACL_ATTRIB(random_r2t_offsets); +NACL_ATTR(random_r2t_offsets, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_nacl_attrib_attrs[] = { + &iscsi_nacl_attrib_dataout_timeout.attr, + &iscsi_nacl_attrib_dataout_timeout_retries.attr, + &iscsi_nacl_attrib_default_erl.attr, + &iscsi_nacl_attrib_nopin_timeout.attr, + &iscsi_nacl_attrib_nopin_response_timeout.attr, + &iscsi_nacl_attrib_random_datain_pdu_offsets.attr, + &iscsi_nacl_attrib_random_datain_seq_offsets.attr, + &iscsi_nacl_attrib_random_r2t_offsets.attr, + NULL, +}; + +/* End items for lio_target_nacl_attrib_cit */ + +/* Start items for lio_target_nacl_auth_cit */ + +#define __DEF_NACL_AUTH_STR(prefix, name, flags) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct iscsi_node_acl *nacl, \ + char *page) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + return snprintf(page, PAGE_SIZE, "%s\n", auth->name); \ +} \ + \ +static ssize_t __iscsi_##prefix##_store_##name( \ + struct iscsi_node_acl *nacl, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + snprintf(auth->name, PAGE_SIZE, "%s", page); \ + if (!strncmp("NULL", auth->name, 4)) \ + auth->naf_flags &= ~flags; \ + else \ + auth->naf_flags |= flags; \ + \ + if ((auth->naf_flags & NAF_USERID_IN_SET) && \ + (auth->naf_flags & NAF_PASSWORD_IN_SET)) \ + auth->authenticate_target = 1; \ + else \ + auth->authenticate_target = 0; \ + \ + return count; \ +} + +#define __DEF_NACL_AUTH_INT(prefix, name) \ +static ssize_t __iscsi_##prefix##_show_##name( \ + struct iscsi_node_acl *nacl, \ + char *page) \ +{ \ + struct iscsi_node_auth *auth = &nacl->node_auth; \ + \ + if (!capable(CAP_SYS_ADMIN)) \ + return -EPERM; \ + \ + return snprintf(page, PAGE_SIZE, "%d\n", auth->name); \ +} + +#define DEF_NACL_AUTH_STR(name, flags) \ + __DEF_NACL_AUTH_STR(nacl_auth, name, flags) \ +static ssize_t iscsi_nacl_auth_show_##name( \ + struct se_node_acl *nacl, \ + char *page) \ +{ \ + return __iscsi_nacl_auth_show_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page); \ +} \ +static ssize_t iscsi_nacl_auth_store_##name( \ + struct se_node_acl *nacl, \ + const char *page, \ + size_t count) \ +{ \ + return __iscsi_nacl_auth_store_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page, count); \ +} + +#define DEF_NACL_AUTH_INT(name) \ + __DEF_NACL_AUTH_INT(nacl_auth, name) \ +static ssize_t iscsi_nacl_auth_show_##name( \ + struct se_node_acl *nacl, \ + char *page) \ +{ \ + return __iscsi_nacl_auth_show_##name(container_of(nacl, \ + struct iscsi_node_acl, se_node_acl), page); \ +} + +#define AUTH_ATTR(_name, _mode) TF_NACL_AUTH_ATTR(iscsi, _name, _mode); +#define AUTH_ATTR_RO(_name) TF_NACL_AUTH_ATTR_RO(iscsi, _name); + +/* + * One-way authentication userid + */ +DEF_NACL_AUTH_STR(userid, NAF_USERID_SET); +AUTH_ATTR(userid, S_IRUGO | S_IWUSR); +/* + * One-way authentication password + */ +DEF_NACL_AUTH_STR(password, NAF_PASSWORD_SET); +AUTH_ATTR(password, S_IRUGO | S_IWUSR); +/* + * Enforce mutual authentication + */ +DEF_NACL_AUTH_INT(authenticate_target); +AUTH_ATTR_RO(authenticate_target); +/* + * Mutual authentication userid + */ +DEF_NACL_AUTH_STR(userid_mutual, NAF_USERID_IN_SET); +AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR); +/* + * Mutual authentication password + */ +DEF_NACL_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET); +AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_nacl_auth_attrs[] = { + &iscsi_nacl_auth_userid.attr, + &iscsi_nacl_auth_password.attr, + &iscsi_nacl_auth_authenticate_target.attr, + &iscsi_nacl_auth_userid_mutual.attr, + &iscsi_nacl_auth_password_mutual.attr, + NULL, +}; + +/* End items for lio_target_nacl_auth_cit */ + +/* Start items for lio_target_nacl_param_cit */ + +#define DEF_NACL_PARAM(name) \ +static ssize_t iscsi_nacl_param_show_##name( \ + struct se_node_acl *se_nacl, \ + char *page) \ +{ \ + struct iscsi_session *sess; \ + struct se_session *se_sess; \ + ssize_t rb; \ + \ + spin_lock_bh(&se_nacl->nacl_sess_lock); \ + se_sess = se_nacl->nacl_sess; \ + if (!se_sess) { \ + rb = snprintf(page, PAGE_SIZE, \ + "No Active iSCSI Session\n"); \ + } else { \ + sess = se_sess->fabric_sess_ptr; \ + rb = snprintf(page, PAGE_SIZE, "%u\n", \ + (u32)sess->sess_ops->name); \ + } \ + spin_unlock_bh(&se_nacl->nacl_sess_lock); \ + \ + return rb; \ +} + +#define NACL_PARAM_ATTR(_name) TF_NACL_PARAM_ATTR_RO(iscsi, _name); + +DEF_NACL_PARAM(MaxConnections); +NACL_PARAM_ATTR(MaxConnections); + +DEF_NACL_PARAM(InitialR2T); +NACL_PARAM_ATTR(InitialR2T); + +DEF_NACL_PARAM(ImmediateData); +NACL_PARAM_ATTR(ImmediateData); + +DEF_NACL_PARAM(MaxBurstLength); +NACL_PARAM_ATTR(MaxBurstLength); + +DEF_NACL_PARAM(FirstBurstLength); +NACL_PARAM_ATTR(FirstBurstLength); + +DEF_NACL_PARAM(DefaultTime2Wait); +NACL_PARAM_ATTR(DefaultTime2Wait); + +DEF_NACL_PARAM(DefaultTime2Retain); +NACL_PARAM_ATTR(DefaultTime2Retain); + +DEF_NACL_PARAM(MaxOutstandingR2T); +NACL_PARAM_ATTR(MaxOutstandingR2T); + +DEF_NACL_PARAM(DataPDUInOrder); +NACL_PARAM_ATTR(DataPDUInOrder); + +DEF_NACL_PARAM(DataSequenceInOrder); +NACL_PARAM_ATTR(DataSequenceInOrder); + +DEF_NACL_PARAM(ErrorRecoveryLevel); +NACL_PARAM_ATTR(ErrorRecoveryLevel); + +static struct configfs_attribute *lio_target_nacl_param_attrs[] = { + &iscsi_nacl_param_MaxConnections.attr, + &iscsi_nacl_param_InitialR2T.attr, + &iscsi_nacl_param_ImmediateData.attr, + &iscsi_nacl_param_MaxBurstLength.attr, + &iscsi_nacl_param_FirstBurstLength.attr, + &iscsi_nacl_param_DefaultTime2Wait.attr, + &iscsi_nacl_param_DefaultTime2Retain.attr, + &iscsi_nacl_param_MaxOutstandingR2T.attr, + &iscsi_nacl_param_DataPDUInOrder.attr, + &iscsi_nacl_param_DataSequenceInOrder.attr, + &iscsi_nacl_param_ErrorRecoveryLevel.attr, + NULL, +}; + +/* End items for lio_target_nacl_param_cit */ + +/* Start items for lio_target_acl_cit */ + +static ssize_t lio_target_nacl_show_info( + struct se_node_acl *se_nacl, + char *page) +{ + struct iscsi_session *sess; + struct iscsi_conn *conn; + struct se_session *se_sess; + ssize_t rb = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (!se_sess) { + rb += sprintf(page+rb, "No active iSCSI Session for Initiator" + " Endpoint: %s\n", se_nacl->initiatorname); + } else { + sess = se_sess->fabric_sess_ptr; + + if (sess->sess_ops->InitiatorName) + rb += sprintf(page+rb, "InitiatorName: %s\n", + sess->sess_ops->InitiatorName); + if (sess->sess_ops->InitiatorAlias) + rb += sprintf(page+rb, "InitiatorAlias: %s\n", + sess->sess_ops->InitiatorAlias); + + rb += sprintf(page+rb, "LIO Session ID: %u " + "ISID: 0x%02x %02x %02x %02x %02x %02x " + "TSIH: %hu ", sess->sid, + sess->isid[0], sess->isid[1], sess->isid[2], + sess->isid[3], sess->isid[4], sess->isid[5], + sess->tsih); + rb += sprintf(page+rb, "SessionType: %s\n", + (sess->sess_ops->SessionType) ? + "Discovery" : "Normal"); + rb += sprintf(page+rb, "Session State: "); + switch (sess->session_state) { + case TARG_SESS_STATE_FREE: + rb += sprintf(page+rb, "TARG_SESS_FREE\n"); + break; + case TARG_SESS_STATE_ACTIVE: + rb += sprintf(page+rb, "TARG_SESS_STATE_ACTIVE\n"); + break; + case TARG_SESS_STATE_LOGGED_IN: + rb += sprintf(page+rb, "TARG_SESS_STATE_LOGGED_IN\n"); + break; + case TARG_SESS_STATE_FAILED: + rb += sprintf(page+rb, "TARG_SESS_STATE_FAILED\n"); + break; + case TARG_SESS_STATE_IN_CONTINUE: + rb += sprintf(page+rb, "TARG_SESS_STATE_IN_CONTINUE\n"); + break; + default: + rb += sprintf(page+rb, "ERROR: Unknown Session" + " State!\n"); + break; + } + + rb += sprintf(page+rb, "---------------------[iSCSI Session" + " Values]-----------------------\n"); + rb += sprintf(page+rb, " CmdSN/WR : CmdSN/WC : ExpCmdSN" + " : MaxCmdSN : ITT : TTT\n"); + rb += sprintf(page+rb, " 0x%08x 0x%08x 0x%08x 0x%08x" + " 0x%08x 0x%08x\n", + sess->cmdsn_window, + (sess->max_cmd_sn - sess->exp_cmd_sn) + 1, + sess->exp_cmd_sn, sess->max_cmd_sn, + sess->init_task_tag, sess->targ_xfer_tag); + rb += sprintf(page+rb, "----------------------[iSCSI" + " Connections]-------------------------\n"); + + spin_lock(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + rb += sprintf(page+rb, "CID: %hu Connection" + " State: ", conn->cid); + switch (conn->conn_state) { + case TARG_CONN_STATE_FREE: + rb += sprintf(page+rb, + "TARG_CONN_STATE_FREE\n"); + break; + case TARG_CONN_STATE_XPT_UP: + rb += sprintf(page+rb, + "TARG_CONN_STATE_XPT_UP\n"); + break; + case TARG_CONN_STATE_IN_LOGIN: + rb += sprintf(page+rb, + "TARG_CONN_STATE_IN_LOGIN\n"); + break; + case TARG_CONN_STATE_LOGGED_IN: + rb += sprintf(page+rb, + "TARG_CONN_STATE_LOGGED_IN\n"); + break; + case TARG_CONN_STATE_IN_LOGOUT: + rb += sprintf(page+rb, + "TARG_CONN_STATE_IN_LOGOUT\n"); + break; + case TARG_CONN_STATE_LOGOUT_REQUESTED: + rb += sprintf(page+rb, + "TARG_CONN_STATE_LOGOUT_REQUESTED\n"); + break; + case TARG_CONN_STATE_CLEANUP_WAIT: + rb += sprintf(page+rb, + "TARG_CONN_STATE_CLEANUP_WAIT\n"); + break; + default: + rb += sprintf(page+rb, + "ERROR: Unknown Connection State!\n"); + break; + } + + rb += sprintf(page+rb, " Address %s %s", conn->login_ip, + (conn->network_transport == ISCSI_TCP) ? + "TCP" : "SCTP"); + rb += sprintf(page+rb, " StatSN: 0x%08x\n", + conn->stat_sn); + } + spin_unlock(&sess->conn_lock); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return rb; +} + +TF_NACL_BASE_ATTR_RO(lio_target, info); + +static ssize_t lio_target_nacl_show_cmdsn_depth( + struct se_node_acl *se_nacl, + char *page) +{ + return sprintf(page, "%u\n", se_nacl->queue_depth); +} + +static ssize_t lio_target_nacl_store_cmdsn_depth( + struct se_node_acl *se_nacl, + const char *page, + size_t count) +{ + struct se_portal_group *se_tpg = se_nacl->se_tpg; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + struct config_item *acl_ci, *tpg_ci, *wwn_ci; + char *endptr; + u32 cmdsn_depth = 0; + int ret; + + cmdsn_depth = simple_strtoul(page, &endptr, 0); + if (cmdsn_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) { + pr_err("Passed cmdsn_depth: %u exceeds" + " TA_DEFAULT_CMDSN_DEPTH_MAX: %u\n", cmdsn_depth, + TA_DEFAULT_CMDSN_DEPTH_MAX); + return -EINVAL; + } + acl_ci = &se_nacl->acl_group.cg_item; + if (!acl_ci) { + pr_err("Unable to locatel acl_ci\n"); + return -EINVAL; + } + tpg_ci = &acl_ci->ci_parent->ci_group->cg_item; + if (!tpg_ci) { + pr_err("Unable to locate tpg_ci\n"); + return -EINVAL; + } + wwn_ci = &tpg_ci->ci_group->cg_item; + if (!wwn_ci) { + pr_err("Unable to locate config_item wwn_ci\n"); + return -EINVAL; + } + + if (iscsit_get_tpg(tpg) < 0) + return -EINVAL; + /* + * iscsit_tpg_set_initiator_node_queue_depth() assumes force=1 + */ + ret = iscsit_tpg_set_initiator_node_queue_depth(tpg, + config_item_name(acl_ci), cmdsn_depth, 1); + + pr_debug("LIO_Target_ConfigFS: %s/%s Set CmdSN Window: %u for" + "InitiatorName: %s\n", config_item_name(wwn_ci), + config_item_name(tpg_ci), cmdsn_depth, + config_item_name(acl_ci)); + + iscsit_put_tpg(tpg); + return (!ret) ? count : (ssize_t)ret; +} + +TF_NACL_BASE_ATTR(lio_target, cmdsn_depth, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_initiator_attrs[] = { + &lio_target_nacl_info.attr, + &lio_target_nacl_cmdsn_depth.attr, + NULL, +}; + +static struct se_node_acl *lio_tpg_alloc_fabric_acl( + struct se_portal_group *se_tpg) +{ + struct iscsi_node_acl *acl; + + acl = kzalloc(sizeof(struct iscsi_node_acl), GFP_KERNEL); + if (!acl) { + pr_err("Unable to allocate memory for struct iscsi_node_acl\n"); + return NULL; + } + + return &acl->se_node_acl; +} + +static struct se_node_acl *lio_target_make_nodeacl( + struct se_portal_group *se_tpg, + struct config_group *group, + const char *name) +{ + struct config_group *stats_cg; + struct iscsi_node_acl *acl; + struct se_node_acl *se_nacl_new, *se_nacl; + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + u32 cmdsn_depth; + + se_nacl_new = lio_tpg_alloc_fabric_acl(se_tpg); + if (!se_nacl_new) + return ERR_PTR(-ENOMEM); + + acl = container_of(se_nacl_new, struct iscsi_node_acl, + se_node_acl); + + cmdsn_depth = ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; + /* + * se_nacl_new may be released by core_tpg_add_initiator_node_acl() + * when converting a NdoeACL from demo mode -> explict + */ + se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new, + name, cmdsn_depth); + if (IS_ERR(se_nacl)) + return se_nacl; + + stats_cg = &acl->se_node_acl.acl_fabric_stat_group; + + stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 2, + GFP_KERNEL); + if (!stats_cg->default_groups) { + pr_err("Unable to allocate memory for" + " stats_cg->default_groups\n"); + core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); + kfree(acl); + return ERR_PTR(-ENOMEM); + } + + stats_cg->default_groups[0] = &NODE_STAT_GRPS(acl)->iscsi_sess_stats_group; + stats_cg->default_groups[1] = NULL; + config_group_init_type_name(&NODE_STAT_GRPS(acl)->iscsi_sess_stats_group, + "iscsi_sess_stats", &iscsi_stat_sess_cit); + + return se_nacl; +} + +static void lio_target_drop_nodeacl( + struct se_node_acl *se_nacl) +{ + struct se_portal_group *se_tpg = se_nacl->se_tpg; + struct iscsi_node_acl *acl = container_of(se_nacl, + struct iscsi_node_acl, se_node_acl); + struct config_item *df_item; + struct config_group *stats_cg; + int i; + + stats_cg = &acl->se_node_acl.acl_fabric_stat_group; + for (i = 0; stats_cg->default_groups[i]; i++) { + df_item = &stats_cg->default_groups[i]->cg_item; + stats_cg->default_groups[i] = NULL; + config_item_put(df_item); + } + kfree(stats_cg->default_groups); + + core_tpg_del_initiator_node_acl(se_tpg, se_nacl, 1); + kfree(acl); +} + +/* End items for lio_target_acl_cit */ + +/* Start items for lio_target_tpg_attrib_cit */ + +#define DEF_TPG_ATTRIB(name) \ + \ +static ssize_t iscsi_tpg_attrib_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + ssize_t rb; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + rb = sprintf(page, "%u\n", ISCSI_TPG_ATTRIB(tpg)->name); \ + iscsit_put_tpg(tpg); \ + return rb; \ +} \ + \ +static ssize_t iscsi_tpg_attrib_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + char *endptr; \ + u32 val; \ + int ret; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + val = simple_strtoul(page, &endptr, 0); \ + ret = iscsit_ta_##name(tpg, val); \ + if (ret < 0) \ + goto out; \ + \ + iscsit_put_tpg(tpg); \ + return count; \ +out: \ + iscsit_put_tpg(tpg); \ + return ret; \ +} + +#define TPG_ATTR(_name, _mode) TF_TPG_ATTRIB_ATTR(iscsi, _name, _mode); + +/* + * Define iscsi_tpg_attrib_s_authentication + */ +DEF_TPG_ATTRIB(authentication); +TPG_ATTR(authentication, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_login_timeout + */ +DEF_TPG_ATTRIB(login_timeout); +TPG_ATTR(login_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_netif_timeout + */ +DEF_TPG_ATTRIB(netif_timeout); +TPG_ATTR(netif_timeout, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_generate_node_acls + */ +DEF_TPG_ATTRIB(generate_node_acls); +TPG_ATTR(generate_node_acls, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_default_cmdsn_depth + */ +DEF_TPG_ATTRIB(default_cmdsn_depth); +TPG_ATTR(default_cmdsn_depth, S_IRUGO | S_IWUSR); +/* + Define iscsi_tpg_attrib_s_cache_dynamic_acls + */ +DEF_TPG_ATTRIB(cache_dynamic_acls); +TPG_ATTR(cache_dynamic_acls, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_demo_mode_write_protect + */ +DEF_TPG_ATTRIB(demo_mode_write_protect); +TPG_ATTR(demo_mode_write_protect, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_prod_mode_write_protect + */ +DEF_TPG_ATTRIB(prod_mode_write_protect); +TPG_ATTR(prod_mode_write_protect, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { + &iscsi_tpg_attrib_authentication.attr, + &iscsi_tpg_attrib_login_timeout.attr, + &iscsi_tpg_attrib_netif_timeout.attr, + &iscsi_tpg_attrib_generate_node_acls.attr, + &iscsi_tpg_attrib_default_cmdsn_depth.attr, + &iscsi_tpg_attrib_cache_dynamic_acls.attr, + &iscsi_tpg_attrib_demo_mode_write_protect.attr, + &iscsi_tpg_attrib_prod_mode_write_protect.attr, + NULL, +}; + +/* End items for lio_target_tpg_attrib_cit */ + +/* Start items for lio_target_tpg_param_cit */ + +#define DEF_TPG_PARAM(name) \ +static ssize_t iscsi_tpg_param_show_##name( \ + struct se_portal_group *se_tpg, \ + char *page) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + struct iscsi_param *param; \ + ssize_t rb; \ + \ + if (iscsit_get_tpg(tpg) < 0) \ + return -EINVAL; \ + \ + param = iscsi_find_param_from_key(__stringify(name), \ + tpg->param_list); \ + if (!param) { \ + iscsit_put_tpg(tpg); \ + return -EINVAL; \ + } \ + rb = snprintf(page, PAGE_SIZE, "%s\n", param->value); \ + \ + iscsit_put_tpg(tpg); \ + return rb; \ +} \ +static ssize_t iscsi_tpg_param_store_##name( \ + struct se_portal_group *se_tpg, \ + const char *page, \ + size_t count) \ +{ \ + struct iscsi_portal_group *tpg = container_of(se_tpg, \ + struct iscsi_portal_group, tpg_se_tpg); \ + char *buf; \ + int ret; \ + \ + buf = kzalloc(PAGE_SIZE, GFP_KERNEL); \ + if (!buf) \ + return -ENOMEM; \ + snprintf(buf, PAGE_SIZE, "%s=%s", __stringify(name), page); \ + buf[strlen(buf)-1] = '\0'; /* Kill newline */ \ + \ + if (iscsit_get_tpg(tpg) < 0) { \ + kfree(buf); \ + return -EINVAL; \ + } \ + \ + ret = iscsi_change_param_value(buf, tpg->param_list, 1); \ + if (ret < 0) \ + goto out; \ + \ + kfree(buf); \ + iscsit_put_tpg(tpg); \ + return count; \ +out: \ + kfree(buf); \ + iscsit_put_tpg(tpg); \ + return -EINVAL; \ +} + +#define TPG_PARAM_ATTR(_name, _mode) TF_TPG_PARAM_ATTR(iscsi, _name, _mode); + +DEF_TPG_PARAM(AuthMethod); +TPG_PARAM_ATTR(AuthMethod, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(HeaderDigest); +TPG_PARAM_ATTR(HeaderDigest, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataDigest); +TPG_PARAM_ATTR(DataDigest, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxConnections); +TPG_PARAM_ATTR(MaxConnections, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(TargetAlias); +TPG_PARAM_ATTR(TargetAlias, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(InitialR2T); +TPG_PARAM_ATTR(InitialR2T, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(ImmediateData); +TPG_PARAM_ATTR(ImmediateData, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxRecvDataSegmentLength); +TPG_PARAM_ATTR(MaxRecvDataSegmentLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxBurstLength); +TPG_PARAM_ATTR(MaxBurstLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(FirstBurstLength); +TPG_PARAM_ATTR(FirstBurstLength, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DefaultTime2Wait); +TPG_PARAM_ATTR(DefaultTime2Wait, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DefaultTime2Retain); +TPG_PARAM_ATTR(DefaultTime2Retain, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(MaxOutstandingR2T); +TPG_PARAM_ATTR(MaxOutstandingR2T, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataPDUInOrder); +TPG_PARAM_ATTR(DataPDUInOrder, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(DataSequenceInOrder); +TPG_PARAM_ATTR(DataSequenceInOrder, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(ErrorRecoveryLevel); +TPG_PARAM_ATTR(ErrorRecoveryLevel, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(IFMarker); +TPG_PARAM_ATTR(IFMarker, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(OFMarker); +TPG_PARAM_ATTR(OFMarker, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(IFMarkInt); +TPG_PARAM_ATTR(IFMarkInt, S_IRUGO | S_IWUSR); + +DEF_TPG_PARAM(OFMarkInt); +TPG_PARAM_ATTR(OFMarkInt, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_param_attrs[] = { + &iscsi_tpg_param_AuthMethod.attr, + &iscsi_tpg_param_HeaderDigest.attr, + &iscsi_tpg_param_DataDigest.attr, + &iscsi_tpg_param_MaxConnections.attr, + &iscsi_tpg_param_TargetAlias.attr, + &iscsi_tpg_param_InitialR2T.attr, + &iscsi_tpg_param_ImmediateData.attr, + &iscsi_tpg_param_MaxRecvDataSegmentLength.attr, + &iscsi_tpg_param_MaxBurstLength.attr, + &iscsi_tpg_param_FirstBurstLength.attr, + &iscsi_tpg_param_DefaultTime2Wait.attr, + &iscsi_tpg_param_DefaultTime2Retain.attr, + &iscsi_tpg_param_MaxOutstandingR2T.attr, + &iscsi_tpg_param_DataPDUInOrder.attr, + &iscsi_tpg_param_DataSequenceInOrder.attr, + &iscsi_tpg_param_ErrorRecoveryLevel.attr, + &iscsi_tpg_param_IFMarker.attr, + &iscsi_tpg_param_OFMarker.attr, + &iscsi_tpg_param_IFMarkInt.attr, + &iscsi_tpg_param_OFMarkInt.attr, + NULL, +}; + +/* End items for lio_target_tpg_param_cit */ + +/* Start items for lio_target_tpg_cit */ + +static ssize_t lio_target_tpg_show_enable( + struct se_portal_group *se_tpg, + char *page) +{ + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + ssize_t len; + + spin_lock(&tpg->tpg_state_lock); + len = sprintf(page, "%d\n", + (tpg->tpg_state == TPG_STATE_ACTIVE) ? 1 : 0); + spin_unlock(&tpg->tpg_state_lock); + + return len; +} + +static ssize_t lio_target_tpg_store_enable( + struct se_portal_group *se_tpg, + const char *page, + size_t count) +{ + struct iscsi_portal_group *tpg = container_of(se_tpg, + struct iscsi_portal_group, tpg_se_tpg); + char *endptr; + u32 op; + int ret = 0; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for tpg_enable: %u\n", op); + return -EINVAL; + } + + ret = iscsit_get_tpg(tpg); + if (ret < 0) + return -EINVAL; + + if (op) { + ret = iscsit_tpg_enable_portal_group(tpg); + if (ret < 0) + goto out; + } else { + /* + * iscsit_tpg_disable_portal_group() assumes force=1 + */ + ret = iscsit_tpg_disable_portal_group(tpg, 1); + if (ret < 0) + goto out; + } + + iscsit_put_tpg(tpg); + return count; +out: + iscsit_put_tpg(tpg); + return -EINVAL; +} + +TF_TPG_BASE_ATTR(lio_target, enable, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_tpg_attrs[] = { + &lio_target_tpg_enable.attr, + NULL, +}; + +/* End items for lio_target_tpg_cit */ + +/* Start items for lio_target_tiqn_cit */ + +struct se_portal_group *lio_target_tiqn_addtpg( + struct se_wwn *wwn, + struct config_group *group, + const char *name) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + char *tpgt_str, *end_ptr; + int ret = 0; + unsigned short int tpgt; + + tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn); + /* + * Only tpgt_# directory groups can be created below + * target/iscsi/iqn.superturodiskarry/ + */ + tpgt_str = strstr(name, "tpgt_"); + if (!tpgt_str) { + pr_err("Unable to locate \"tpgt_#\" directory" + " group\n"); + return NULL; + } + tpgt_str += 5; /* Skip ahead of "tpgt_" */ + tpgt = (unsigned short int) simple_strtoul(tpgt_str, &end_ptr, 0); + + tpg = iscsit_alloc_portal_group(tiqn, tpgt); + if (!tpg) + return NULL; + + ret = core_tpg_register( + &lio_target_fabric_configfs->tf_ops, + wwn, &tpg->tpg_se_tpg, (void *)tpg, + TRANSPORT_TPG_TYPE_NORMAL); + if (ret < 0) + return NULL; + + ret = iscsit_tpg_add_portal_group(tiqn, tpg); + if (ret != 0) + goto out; + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); + pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated TPG: %s\n", + name); + return &tpg->tpg_se_tpg; +out: + core_tpg_deregister(&tpg->tpg_se_tpg); + kfree(tpg); + return NULL; +} + +void lio_target_tiqn_deltpg(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg; + struct iscsi_tiqn *tiqn; + + tpg = container_of(se_tpg, struct iscsi_portal_group, tpg_se_tpg); + tiqn = tpg->tpg_tiqn; + /* + * iscsit_tpg_del_portal_group() assumes force=1 + */ + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> Releasing TPG\n"); + iscsit_tpg_del_portal_group(tiqn, tpg, 1); +} + +/* End items for lio_target_tiqn_cit */ + +/* Start LIO-Target TIQN struct contig_item lio_target_cit */ + +static ssize_t lio_target_wwn_show_attr_lio_version( + struct target_fabric_configfs *tf, + char *page) +{ + return sprintf(page, "RisingTide Systems Linux-iSCSI Target "ISCSIT_VERSION"\n"); +} + +TF_WWN_ATTR_RO(lio_target, lio_version); + +static struct configfs_attribute *lio_target_wwn_attrs[] = { + &lio_target_wwn_lio_version.attr, + NULL, +}; + +struct se_wwn *lio_target_call_coreaddtiqn( + struct target_fabric_configfs *tf, + struct config_group *group, + const char *name) +{ + struct config_group *stats_cg; + struct iscsi_tiqn *tiqn; + + tiqn = iscsit_add_tiqn((unsigned char *)name); + if (IS_ERR(tiqn)) + return ERR_PTR(PTR_ERR(tiqn)); + /* + * Setup struct iscsi_wwn_stat_grps for se_wwn->fabric_stat_group. + */ + stats_cg = &tiqn->tiqn_wwn.fabric_stat_group; + + stats_cg->default_groups = kzalloc(sizeof(struct config_group) * 6, + GFP_KERNEL); + if (!stats_cg->default_groups) { + pr_err("Unable to allocate memory for" + " stats_cg->default_groups\n"); + iscsit_del_tiqn(tiqn); + return ERR_PTR(-ENOMEM); + } + + stats_cg->default_groups[0] = &WWN_STAT_GRPS(tiqn)->iscsi_instance_group; + stats_cg->default_groups[1] = &WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group; + stats_cg->default_groups[2] = &WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group; + stats_cg->default_groups[3] = &WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group; + stats_cg->default_groups[4] = &WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group; + stats_cg->default_groups[5] = NULL; + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_instance_group, + "iscsi_instance", &iscsi_stat_instance_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_sess_err_group, + "iscsi_sess_err", &iscsi_stat_sess_err_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_tgt_attr_group, + "iscsi_tgt_attr", &iscsi_stat_tgt_attr_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_login_stats_group, + "iscsi_login_stats", &iscsi_stat_login_cit); + config_group_init_type_name(&WWN_STAT_GRPS(tiqn)->iscsi_logout_stats_group, + "iscsi_logout_stats", &iscsi_stat_logout_cit); + + pr_debug("LIO_Target_ConfigFS: REGISTER -> %s\n", tiqn->tiqn); + pr_debug("LIO_Target_ConfigFS: REGISTER -> Allocated Node:" + " %s\n", name); + return &tiqn->tiqn_wwn; +} + +void lio_target_call_coredeltiqn( + struct se_wwn *wwn) +{ + struct iscsi_tiqn *tiqn = container_of(wwn, struct iscsi_tiqn, tiqn_wwn); + struct config_item *df_item; + struct config_group *stats_cg; + int i; + + stats_cg = &tiqn->tiqn_wwn.fabric_stat_group; + for (i = 0; stats_cg->default_groups[i]; i++) { + df_item = &stats_cg->default_groups[i]->cg_item; + stats_cg->default_groups[i] = NULL; + config_item_put(df_item); + } + kfree(stats_cg->default_groups); + + pr_debug("LIO_Target_ConfigFS: DEREGISTER -> %s\n", + tiqn->tiqn); + iscsit_del_tiqn(tiqn); +} + +/* End LIO-Target TIQN struct contig_lio_target_cit */ + +/* Start lio_target_discovery_auth_cit */ + +#define DEF_DISC_AUTH_STR(name, flags) \ + __DEF_NACL_AUTH_STR(disc, name, flags) \ +static ssize_t iscsi_disc_show_##name( \ + struct target_fabric_configfs *tf, \ + char *page) \ +{ \ + return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \ + page); \ +} \ +static ssize_t iscsi_disc_store_##name( \ + struct target_fabric_configfs *tf, \ + const char *page, \ + size_t count) \ +{ \ + return __iscsi_disc_store_##name(&iscsit_global->discovery_acl, \ + page, count); \ +} + +#define DEF_DISC_AUTH_INT(name) \ + __DEF_NACL_AUTH_INT(disc, name) \ +static ssize_t iscsi_disc_show_##name( \ + struct target_fabric_configfs *tf, \ + char *page) \ +{ \ + return __iscsi_disc_show_##name(&iscsit_global->discovery_acl, \ + page); \ +} + +#define DISC_AUTH_ATTR(_name, _mode) TF_DISC_ATTR(iscsi, _name, _mode) +#define DISC_AUTH_ATTR_RO(_name) TF_DISC_ATTR_RO(iscsi, _name) + +/* + * One-way authentication userid + */ +DEF_DISC_AUTH_STR(userid, NAF_USERID_SET); +DISC_AUTH_ATTR(userid, S_IRUGO | S_IWUSR); +/* + * One-way authentication password + */ +DEF_DISC_AUTH_STR(password, NAF_PASSWORD_SET); +DISC_AUTH_ATTR(password, S_IRUGO | S_IWUSR); +/* + * Enforce mutual authentication + */ +DEF_DISC_AUTH_INT(authenticate_target); +DISC_AUTH_ATTR_RO(authenticate_target); +/* + * Mutual authentication userid + */ +DEF_DISC_AUTH_STR(userid_mutual, NAF_USERID_IN_SET); +DISC_AUTH_ATTR(userid_mutual, S_IRUGO | S_IWUSR); +/* + * Mutual authentication password + */ +DEF_DISC_AUTH_STR(password_mutual, NAF_PASSWORD_IN_SET); +DISC_AUTH_ATTR(password_mutual, S_IRUGO | S_IWUSR); + +/* + * enforce_discovery_auth + */ +static ssize_t iscsi_disc_show_enforce_discovery_auth( + struct target_fabric_configfs *tf, + char *page) +{ + struct iscsi_node_auth *discovery_auth = &iscsit_global->discovery_acl.node_auth; + + return sprintf(page, "%d\n", discovery_auth->enforce_discovery_auth); +} + +static ssize_t iscsi_disc_store_enforce_discovery_auth( + struct target_fabric_configfs *tf, + const char *page, + size_t count) +{ + struct iscsi_param *param; + struct iscsi_portal_group *discovery_tpg = iscsit_global->discovery_tpg; + char *endptr; + u32 op; + + op = simple_strtoul(page, &endptr, 0); + if ((op != 1) && (op != 0)) { + pr_err("Illegal value for enforce_discovery_auth:" + " %u\n", op); + return -EINVAL; + } + + if (!discovery_tpg) { + pr_err("iscsit_global->discovery_tpg is NULL\n"); + return -EINVAL; + } + + param = iscsi_find_param_from_key(AUTHMETHOD, + discovery_tpg->param_list); + if (!param) + return -EINVAL; + + if (op) { + /* + * Reset the AuthMethod key to CHAP. + */ + if (iscsi_update_param_value(param, CHAP) < 0) + return -EINVAL; + + discovery_tpg->tpg_attrib.authentication = 1; + iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 1; + pr_debug("LIO-CORE[0] Successfully enabled" + " authentication enforcement for iSCSI" + " Discovery TPG\n"); + } else { + /* + * Reset the AuthMethod key to CHAP,None + */ + if (iscsi_update_param_value(param, "CHAP,None") < 0) + return -EINVAL; + + discovery_tpg->tpg_attrib.authentication = 0; + iscsit_global->discovery_acl.node_auth.enforce_discovery_auth = 0; + pr_debug("LIO-CORE[0] Successfully disabled" + " authentication enforcement for iSCSI" + " Discovery TPG\n"); + } + + return count; +} + +DISC_AUTH_ATTR(enforce_discovery_auth, S_IRUGO | S_IWUSR); + +static struct configfs_attribute *lio_target_discovery_auth_attrs[] = { + &iscsi_disc_userid.attr, + &iscsi_disc_password.attr, + &iscsi_disc_authenticate_target.attr, + &iscsi_disc_userid_mutual.attr, + &iscsi_disc_password_mutual.attr, + &iscsi_disc_enforce_discovery_auth.attr, + NULL, +}; + +/* End lio_target_discovery_auth_cit */ + +/* Start functions for target_core_fabric_ops */ + +static char *iscsi_get_fabric_name(void) +{ + return "iSCSI"; +} + +static u32 iscsi_get_task_tag(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return cmd->init_task_tag; +} + +static int iscsi_get_cmd_state(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return cmd->i_state; +} + +static int iscsi_is_state_remove(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + return (cmd->i_state == ISTATE_REMOVE); +} + +static int lio_sess_logged_in(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + int ret; + /* + * Called with spin_lock_bh(&tpg_lock); and + * spin_lock(&se_tpg->session_lock); held. + */ + spin_lock(&sess->conn_lock); + ret = (sess->session_state != TARG_SESS_STATE_LOGGED_IN); + spin_unlock(&sess->conn_lock); + + return ret; +} + +static u32 lio_sess_get_index(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + return sess->session_index; +} + +static u32 lio_sess_get_initiator_sid( + struct se_session *se_sess, + unsigned char *buf, + u32 size) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + /* + * iSCSI Initiator Session Identifier from RFC-3720. + */ + return snprintf(buf, size, "%02x%02x%02x%02x%02x%02x", + sess->isid[0], sess->isid[1], sess->isid[2], + sess->isid[3], sess->isid[4], sess->isid[5]); +} + +static int lio_queue_data_in(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static int lio_write_pending(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + if (!cmd->immediate_data && !cmd->unsolicited_data) + return iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 1); + + return 0; +} + +static int lio_write_pending_status(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + int ret; + + spin_lock_bh(&cmd->istate_lock); + ret = !(cmd->cmd_flags & ICF_GOT_LAST_DATAOUT); + spin_unlock_bh(&cmd->istate_lock); + + return ret; +} + +static int lio_queue_status(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static u16 lio_set_fabric_sense_len(struct se_cmd *se_cmd, u32 sense_length) +{ + unsigned char *buffer = se_cmd->sense_buffer; + /* + * From RFC-3720 10.4.7. Data Segment - Sense and Response Data Segment + * 16-bit SenseLength. + */ + buffer[0] = ((sense_length >> 8) & 0xff); + buffer[1] = (sense_length & 0xff); + /* + * Return two byte offset into allocated sense_buffer. + */ + return 2; +} + +static u16 lio_get_fabric_sense_len(void) +{ + /* + * Return two byte offset into allocated sense_buffer. + */ + return 2; +} + +static int lio_queue_tm_rsp(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->i_state = ISTATE_SEND_TASKMGTRSP; + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + return 0; +} + +static char *lio_tpg_get_endpoint_wwn(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return &tpg->tpg_tiqn->tiqn[0]; +} + +static u16 lio_tpg_get_tag(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return tpg->tpgt; +} + +static u32 lio_tpg_get_default_depth(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->default_cmdsn_depth; +} + +static int lio_tpg_check_demo_mode(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->generate_node_acls; +} + +static int lio_tpg_check_demo_mode_cache(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->cache_dynamic_acls; +} + +static int lio_tpg_check_demo_mode_write_protect( + struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->demo_mode_write_protect; +} + +static int lio_tpg_check_prod_mode_write_protect( + struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return ISCSI_TPG_ATTRIB(tpg)->prod_mode_write_protect; +} + +static void lio_tpg_release_fabric_acl( + struct se_portal_group *se_tpg, + struct se_node_acl *se_acl) +{ + struct iscsi_node_acl *acl = container_of(se_acl, + struct iscsi_node_acl, se_node_acl); + kfree(acl); +} + +/* + * Called with spin_lock_bh(struct se_portal_group->session_lock) held.. + * + * Also, this function calls iscsit_inc_session_usage_count() on the + * struct iscsi_session in question. + */ +static int lio_tpg_shutdown_session(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + if (atomic_read(&sess->session_fall_back_to_erl0) || + atomic_read(&sess->session_logout) || + (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess->conn_lock); + return 0; + } + atomic_set(&sess->session_reinstatement, 1); + spin_unlock(&sess->conn_lock); + + iscsit_inc_session_usage_count(sess); + iscsit_stop_time2retain_timer(sess); + + return 1; +} + +/* + * Calls iscsit_dec_session_usage_count() as inverse of + * lio_tpg_shutdown_session() + */ +static void lio_tpg_close_session(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + /* + * If the iSCSI Session for the iSCSI Initiator Node exists, + * forcefully shutdown the iSCSI NEXUS. + */ + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + iscsit_close_session(sess); +} + +static void lio_tpg_stop_session( + struct se_session *se_sess, + int sess_sleep, + int conn_sleep) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + iscsit_stop_session(sess, sess_sleep, conn_sleep); +} + +static void lio_tpg_fall_back_to_erl0(struct se_session *se_sess) +{ + struct iscsi_session *sess = se_sess->fabric_sess_ptr; + + iscsit_fall_back_to_erl0(sess); +} + +static u32 lio_tpg_get_inst_index(struct se_portal_group *se_tpg) +{ + struct iscsi_portal_group *tpg = se_tpg->se_tpg_fabric_ptr; + + return tpg->tpg_tiqn->tiqn_index; +} + +static void lio_set_default_node_attributes(struct se_node_acl *se_acl) +{ + struct iscsi_node_acl *acl = container_of(se_acl, struct iscsi_node_acl, + se_node_acl); + + ISCSI_NODE_ATTRIB(acl)->nacl = acl; + iscsit_set_default_node_attribues(acl); +} + +static void lio_release_cmd(struct se_cmd *se_cmd) +{ + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + iscsit_release_cmd(cmd); +} + +/* End functions for target_core_fabric_ops */ + +int iscsi_target_register_configfs(void) +{ + struct target_fabric_configfs *fabric; + int ret; + + lio_target_fabric_configfs = NULL; + fabric = target_fabric_configfs_init(THIS_MODULE, "iscsi"); + if (IS_ERR(fabric)) { + pr_err("target_fabric_configfs_init() for" + " LIO-Target failed!\n"); + return PTR_ERR(fabric); + } + /* + * Setup the fabric API of function pointers used by target_core_mod.. + */ + fabric->tf_ops.get_fabric_name = &iscsi_get_fabric_name; + fabric->tf_ops.get_fabric_proto_ident = &iscsi_get_fabric_proto_ident; + fabric->tf_ops.tpg_get_wwn = &lio_tpg_get_endpoint_wwn; + fabric->tf_ops.tpg_get_tag = &lio_tpg_get_tag; + fabric->tf_ops.tpg_get_default_depth = &lio_tpg_get_default_depth; + fabric->tf_ops.tpg_get_pr_transport_id = &iscsi_get_pr_transport_id; + fabric->tf_ops.tpg_get_pr_transport_id_len = + &iscsi_get_pr_transport_id_len; + fabric->tf_ops.tpg_parse_pr_out_transport_id = + &iscsi_parse_pr_out_transport_id; + fabric->tf_ops.tpg_check_demo_mode = &lio_tpg_check_demo_mode; + fabric->tf_ops.tpg_check_demo_mode_cache = + &lio_tpg_check_demo_mode_cache; + fabric->tf_ops.tpg_check_demo_mode_write_protect = + &lio_tpg_check_demo_mode_write_protect; + fabric->tf_ops.tpg_check_prod_mode_write_protect = + &lio_tpg_check_prod_mode_write_protect; + fabric->tf_ops.tpg_alloc_fabric_acl = &lio_tpg_alloc_fabric_acl; + fabric->tf_ops.tpg_release_fabric_acl = &lio_tpg_release_fabric_acl; + fabric->tf_ops.tpg_get_inst_index = &lio_tpg_get_inst_index; + fabric->tf_ops.release_cmd = &lio_release_cmd; + fabric->tf_ops.shutdown_session = &lio_tpg_shutdown_session; + fabric->tf_ops.close_session = &lio_tpg_close_session; + fabric->tf_ops.stop_session = &lio_tpg_stop_session; + fabric->tf_ops.fall_back_to_erl0 = &lio_tpg_fall_back_to_erl0; + fabric->tf_ops.sess_logged_in = &lio_sess_logged_in; + fabric->tf_ops.sess_get_index = &lio_sess_get_index; + fabric->tf_ops.sess_get_initiator_sid = &lio_sess_get_initiator_sid; + fabric->tf_ops.write_pending = &lio_write_pending; + fabric->tf_ops.write_pending_status = &lio_write_pending_status; + fabric->tf_ops.set_default_node_attributes = + &lio_set_default_node_attributes; + fabric->tf_ops.get_task_tag = &iscsi_get_task_tag; + fabric->tf_ops.get_cmd_state = &iscsi_get_cmd_state; + fabric->tf_ops.queue_data_in = &lio_queue_data_in; + fabric->tf_ops.queue_status = &lio_queue_status; + fabric->tf_ops.queue_tm_rsp = &lio_queue_tm_rsp; + fabric->tf_ops.set_fabric_sense_len = &lio_set_fabric_sense_len; + fabric->tf_ops.get_fabric_sense_len = &lio_get_fabric_sense_len; + fabric->tf_ops.is_state_remove = &iscsi_is_state_remove; + /* + * Setup function pointers for generic logic in target_core_fabric_configfs.c + */ + fabric->tf_ops.fabric_make_wwn = &lio_target_call_coreaddtiqn; + fabric->tf_ops.fabric_drop_wwn = &lio_target_call_coredeltiqn; + fabric->tf_ops.fabric_make_tpg = &lio_target_tiqn_addtpg; + fabric->tf_ops.fabric_drop_tpg = &lio_target_tiqn_deltpg; + fabric->tf_ops.fabric_post_link = NULL; + fabric->tf_ops.fabric_pre_unlink = NULL; + fabric->tf_ops.fabric_make_np = &lio_target_call_addnptotpg; + fabric->tf_ops.fabric_drop_np = &lio_target_call_delnpfromtpg; + fabric->tf_ops.fabric_make_nodeacl = &lio_target_make_nodeacl; + fabric->tf_ops.fabric_drop_nodeacl = &lio_target_drop_nodeacl; + /* + * Setup default attribute lists for various fabric->tf_cit_tmpl + * sturct config_item_type's + */ + TF_CIT_TMPL(fabric)->tfc_discovery_cit.ct_attrs = lio_target_discovery_auth_attrs; + TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = lio_target_wwn_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = lio_target_tpg_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = lio_target_tpg_attrib_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = lio_target_tpg_param_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = lio_target_portal_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = lio_target_initiator_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = lio_target_nacl_attrib_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = lio_target_nacl_auth_attrs; + TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = lio_target_nacl_param_attrs; + + ret = target_fabric_configfs_register(fabric); + if (ret < 0) { + pr_err("target_fabric_configfs_register() for" + " LIO-Target failed!\n"); + target_fabric_configfs_free(fabric); + return ret; + } + + lio_target_fabric_configfs = fabric; + pr_debug("LIO_TARGET[0] - Set fabric ->" + " lio_target_fabric_configfs\n"); + return 0; +} + + +void iscsi_target_deregister_configfs(void) +{ + if (!lio_target_fabric_configfs) + return; + /* + * Shutdown discovery sessions and disable discovery TPG + */ + if (iscsit_global->discovery_tpg) + iscsit_tpg_disable_portal_group(iscsit_global->discovery_tpg, 1); + + target_fabric_configfs_deregister(lio_target_fabric_configfs); + lio_target_fabric_configfs = NULL; + pr_debug("LIO_TARGET[0] - Cleared" + " lio_target_fabric_configfs\n"); +} diff --git a/drivers/target/iscsi/iscsi_target_configfs.h b/drivers/target/iscsi/iscsi_target_configfs.h new file mode 100644 index 000000000000..8cd5a63c4edc --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_configfs.h @@ -0,0 +1,7 @@ +#ifndef ISCSI_TARGET_CONFIGFS_H +#define ISCSI_TARGET_CONFIGFS_H + +extern int iscsi_target_register_configfs(void); +extern void iscsi_target_deregister_configfs(void); + +#endif /* ISCSI_TARGET_CONFIGFS_H */ diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h new file mode 100644 index 000000000000..470ed551eeb5 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -0,0 +1,859 @@ +#ifndef ISCSI_TARGET_CORE_H +#define ISCSI_TARGET_CORE_H + +#include <linux/in.h> +#include <linux/configfs.h> +#include <net/sock.h> +#include <net/tcp.h> +#include <scsi/scsi_cmnd.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> + +#define ISCSIT_VERSION "v4.1.0-rc1" +#define ISCSI_MAX_DATASN_MISSING_COUNT 16 +#define ISCSI_TX_THREAD_TCP_TIMEOUT 2 +#define ISCSI_RX_THREAD_TCP_TIMEOUT 2 +#define SECONDS_FOR_ASYNC_LOGOUT 10 +#define SECONDS_FOR_ASYNC_TEXT 10 +#define SECONDS_FOR_LOGOUT_COMP 15 +#define WHITE_SPACE " \t\v\f\n\r" + +/* struct iscsi_node_attrib sanity values */ +#define NA_DATAOUT_TIMEOUT 3 +#define NA_DATAOUT_TIMEOUT_MAX 60 +#define NA_DATAOUT_TIMEOUT_MIX 2 +#define NA_DATAOUT_TIMEOUT_RETRIES 5 +#define NA_DATAOUT_TIMEOUT_RETRIES_MAX 15 +#define NA_DATAOUT_TIMEOUT_RETRIES_MIN 1 +#define NA_NOPIN_TIMEOUT 5 +#define NA_NOPIN_TIMEOUT_MAX 60 +#define NA_NOPIN_TIMEOUT_MIN 3 +#define NA_NOPIN_RESPONSE_TIMEOUT 5 +#define NA_NOPIN_RESPONSE_TIMEOUT_MAX 60 +#define NA_NOPIN_RESPONSE_TIMEOUT_MIN 3 +#define NA_RANDOM_DATAIN_PDU_OFFSETS 0 +#define NA_RANDOM_DATAIN_SEQ_OFFSETS 0 +#define NA_RANDOM_R2T_OFFSETS 0 +#define NA_DEFAULT_ERL 0 +#define NA_DEFAULT_ERL_MAX 2 +#define NA_DEFAULT_ERL_MIN 0 + +/* struct iscsi_tpg_attrib sanity values */ +#define TA_AUTHENTICATION 1 +#define TA_LOGIN_TIMEOUT 15 +#define TA_LOGIN_TIMEOUT_MAX 30 +#define TA_LOGIN_TIMEOUT_MIN 5 +#define TA_NETIF_TIMEOUT 2 +#define TA_NETIF_TIMEOUT_MAX 15 +#define TA_NETIF_TIMEOUT_MIN 2 +#define TA_GENERATE_NODE_ACLS 0 +#define TA_DEFAULT_CMDSN_DEPTH 16 +#define TA_DEFAULT_CMDSN_DEPTH_MAX 512 +#define TA_DEFAULT_CMDSN_DEPTH_MIN 1 +#define TA_CACHE_DYNAMIC_ACLS 0 +/* Enabled by default in demo mode (generic_node_acls=1) */ +#define TA_DEMO_MODE_WRITE_PROTECT 1 +/* Disabled by default in production mode w/ explict ACLs */ +#define TA_PROD_MODE_WRITE_PROTECT 0 +#define TA_CACHE_CORE_NPS 0 + +enum tpg_np_network_transport_table { + ISCSI_TCP = 0, + ISCSI_SCTP_TCP = 1, + ISCSI_SCTP_UDP = 2, + ISCSI_IWARP_TCP = 3, + ISCSI_IWARP_SCTP = 4, + ISCSI_INFINIBAND = 5, +}; + +/* RFC-3720 7.1.4 Standard Connection State Diagram for a Target */ +enum target_conn_state_table { + TARG_CONN_STATE_FREE = 0x1, + TARG_CONN_STATE_XPT_UP = 0x3, + TARG_CONN_STATE_IN_LOGIN = 0x4, + TARG_CONN_STATE_LOGGED_IN = 0x5, + TARG_CONN_STATE_IN_LOGOUT = 0x6, + TARG_CONN_STATE_LOGOUT_REQUESTED = 0x7, + TARG_CONN_STATE_CLEANUP_WAIT = 0x8, +}; + +/* RFC-3720 7.3.2 Session State Diagram for a Target */ +enum target_sess_state_table { + TARG_SESS_STATE_FREE = 0x1, + TARG_SESS_STATE_ACTIVE = 0x2, + TARG_SESS_STATE_LOGGED_IN = 0x3, + TARG_SESS_STATE_FAILED = 0x4, + TARG_SESS_STATE_IN_CONTINUE = 0x5, +}; + +/* struct iscsi_data_count->type */ +enum data_count_type { + ISCSI_RX_DATA = 1, + ISCSI_TX_DATA = 2, +}; + +/* struct iscsi_datain_req->dr_complete */ +enum datain_req_comp_table { + DATAIN_COMPLETE_NORMAL = 1, + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY = 2, + DATAIN_COMPLETE_CONNECTION_RECOVERY = 3, +}; + +/* struct iscsi_datain_req->recovery */ +enum datain_req_rec_table { + DATAIN_WITHIN_COMMAND_RECOVERY = 1, + DATAIN_CONNECTION_RECOVERY = 2, +}; + +/* struct iscsi_portal_group->state */ +enum tpg_state_table { + TPG_STATE_FREE = 0, + TPG_STATE_ACTIVE = 1, + TPG_STATE_INACTIVE = 2, + TPG_STATE_COLD_RESET = 3, +}; + +/* struct iscsi_tiqn->tiqn_state */ +enum tiqn_state_table { + TIQN_STATE_ACTIVE = 1, + TIQN_STATE_SHUTDOWN = 2, +}; + +/* struct iscsi_cmd->cmd_flags */ +enum cmd_flags_table { + ICF_GOT_LAST_DATAOUT = 0x00000001, + ICF_GOT_DATACK_SNACK = 0x00000002, + ICF_NON_IMMEDIATE_UNSOLICITED_DATA = 0x00000004, + ICF_SENT_LAST_R2T = 0x00000008, + ICF_WITHIN_COMMAND_RECOVERY = 0x00000010, + ICF_CONTIG_MEMORY = 0x00000020, + ICF_ATTACHED_TO_RQUEUE = 0x00000040, + ICF_OOO_CMDSN = 0x00000080, + ICF_REJECT_FAIL_CONN = 0x00000100, +}; + +/* struct iscsi_cmd->i_state */ +enum cmd_i_state_table { + ISTATE_NO_STATE = 0, + ISTATE_NEW_CMD = 1, + ISTATE_DEFERRED_CMD = 2, + ISTATE_UNSOLICITED_DATA = 3, + ISTATE_RECEIVE_DATAOUT = 4, + ISTATE_RECEIVE_DATAOUT_RECOVERY = 5, + ISTATE_RECEIVED_LAST_DATAOUT = 6, + ISTATE_WITHIN_DATAOUT_RECOVERY = 7, + ISTATE_IN_CONNECTION_RECOVERY = 8, + ISTATE_RECEIVED_TASKMGT = 9, + ISTATE_SEND_ASYNCMSG = 10, + ISTATE_SENT_ASYNCMSG = 11, + ISTATE_SEND_DATAIN = 12, + ISTATE_SEND_LAST_DATAIN = 13, + ISTATE_SENT_LAST_DATAIN = 14, + ISTATE_SEND_LOGOUTRSP = 15, + ISTATE_SENT_LOGOUTRSP = 16, + ISTATE_SEND_NOPIN = 17, + ISTATE_SENT_NOPIN = 18, + ISTATE_SEND_REJECT = 19, + ISTATE_SENT_REJECT = 20, + ISTATE_SEND_R2T = 21, + ISTATE_SENT_R2T = 22, + ISTATE_SEND_R2T_RECOVERY = 23, + ISTATE_SENT_R2T_RECOVERY = 24, + ISTATE_SEND_LAST_R2T = 25, + ISTATE_SENT_LAST_R2T = 26, + ISTATE_SEND_LAST_R2T_RECOVERY = 27, + ISTATE_SENT_LAST_R2T_RECOVERY = 28, + ISTATE_SEND_STATUS = 29, + ISTATE_SEND_STATUS_BROKEN_PC = 30, + ISTATE_SENT_STATUS = 31, + ISTATE_SEND_STATUS_RECOVERY = 32, + ISTATE_SENT_STATUS_RECOVERY = 33, + ISTATE_SEND_TASKMGTRSP = 34, + ISTATE_SENT_TASKMGTRSP = 35, + ISTATE_SEND_TEXTRSP = 36, + ISTATE_SENT_TEXTRSP = 37, + ISTATE_SEND_NOPIN_WANT_RESPONSE = 38, + ISTATE_SENT_NOPIN_WANT_RESPONSE = 39, + ISTATE_SEND_NOPIN_NO_RESPONSE = 40, + ISTATE_REMOVE = 41, + ISTATE_FREE = 42, +}; + +/* Used for iscsi_recover_cmdsn() return values */ +enum recover_cmdsn_ret_table { + CMDSN_ERROR_CANNOT_RECOVER = -1, + CMDSN_NORMAL_OPERATION = 0, + CMDSN_LOWER_THAN_EXP = 1, + CMDSN_HIGHER_THAN_EXP = 2, +}; + +/* Used for iscsi_handle_immediate_data() return values */ +enum immedate_data_ret_table { + IMMEDIATE_DATA_CANNOT_RECOVER = -1, + IMMEDIATE_DATA_NORMAL_OPERATION = 0, + IMMEDIATE_DATA_ERL1_CRC_FAILURE = 1, +}; + +/* Used for iscsi_decide_dataout_action() return values */ +enum dataout_action_ret_table { + DATAOUT_CANNOT_RECOVER = -1, + DATAOUT_NORMAL = 0, + DATAOUT_SEND_R2T = 1, + DATAOUT_SEND_TO_TRANSPORT = 2, + DATAOUT_WITHIN_COMMAND_RECOVERY = 3, +}; + +/* Used for struct iscsi_node_auth->naf_flags */ +enum naf_flags_table { + NAF_USERID_SET = 0x01, + NAF_PASSWORD_SET = 0x02, + NAF_USERID_IN_SET = 0x04, + NAF_PASSWORD_IN_SET = 0x08, +}; + +/* Used by various struct timer_list to manage iSCSI specific state */ +enum iscsi_timer_flags_table { + ISCSI_TF_RUNNING = 0x01, + ISCSI_TF_STOP = 0x02, + ISCSI_TF_EXPIRED = 0x04, +}; + +/* Used for struct iscsi_np->np_flags */ +enum np_flags_table { + NPF_IP_NETWORK = 0x00, + NPF_SCTP_STRUCT_FILE = 0x01 /* Bugfix */ +}; + +/* Used for struct iscsi_np->np_thread_state */ +enum np_thread_state_table { + ISCSI_NP_THREAD_ACTIVE = 1, + ISCSI_NP_THREAD_INACTIVE = 2, + ISCSI_NP_THREAD_RESET = 3, + ISCSI_NP_THREAD_SHUTDOWN = 4, + ISCSI_NP_THREAD_EXIT = 5, +}; + +struct iscsi_conn_ops { + u8 HeaderDigest; /* [0,1] == [None,CRC32C] */ + u8 DataDigest; /* [0,1] == [None,CRC32C] */ + u32 MaxRecvDataSegmentLength; /* [512..2**24-1] */ + u8 OFMarker; /* [0,1] == [No,Yes] */ + u8 IFMarker; /* [0,1] == [No,Yes] */ + u32 OFMarkInt; /* [1..65535] */ + u32 IFMarkInt; /* [1..65535] */ +}; + +struct iscsi_sess_ops { + char InitiatorName[224]; + char InitiatorAlias[256]; + char TargetName[224]; + char TargetAlias[256]; + char TargetAddress[256]; + u16 TargetPortalGroupTag; /* [0..65535] */ + u16 MaxConnections; /* [1..65535] */ + u8 InitialR2T; /* [0,1] == [No,Yes] */ + u8 ImmediateData; /* [0,1] == [No,Yes] */ + u32 MaxBurstLength; /* [512..2**24-1] */ + u32 FirstBurstLength; /* [512..2**24-1] */ + u16 DefaultTime2Wait; /* [0..3600] */ + u16 DefaultTime2Retain; /* [0..3600] */ + u16 MaxOutstandingR2T; /* [1..65535] */ + u8 DataPDUInOrder; /* [0,1] == [No,Yes] */ + u8 DataSequenceInOrder; /* [0,1] == [No,Yes] */ + u8 ErrorRecoveryLevel; /* [0..2] */ + u8 SessionType; /* [0,1] == [Normal,Discovery]*/ +}; + +struct iscsi_queue_req { + int state; + struct iscsi_cmd *cmd; + struct list_head qr_list; +}; + +struct iscsi_data_count { + int data_length; + int sync_and_steering; + enum data_count_type type; + u32 iov_count; + u32 ss_iov_count; + u32 ss_marker_count; + struct kvec *iov; +}; + +struct iscsi_param_list { + struct list_head param_list; + struct list_head extra_response_list; +}; + +struct iscsi_datain_req { + enum datain_req_comp_table dr_complete; + int generate_recovery_values; + enum datain_req_rec_table recovery; + u32 begrun; + u32 runlength; + u32 data_length; + u32 data_offset; + u32 data_offset_end; + u32 data_sn; + u32 next_burst_len; + u32 read_data_done; + u32 seq_send_order; + struct list_head dr_list; +} ____cacheline_aligned; + +struct iscsi_ooo_cmdsn { + u16 cid; + u32 batch_count; + u32 cmdsn; + u32 exp_cmdsn; + struct iscsi_cmd *cmd; + struct list_head ooo_list; +} ____cacheline_aligned; + +struct iscsi_datain { + u8 flags; + u32 data_sn; + u32 length; + u32 offset; +} ____cacheline_aligned; + +struct iscsi_r2t { + int seq_complete; + int recovery_r2t; + int sent_r2t; + u32 r2t_sn; + u32 offset; + u32 targ_xfer_tag; + u32 xfer_len; + struct list_head r2t_list; +} ____cacheline_aligned; + +struct iscsi_cmd { + enum iscsi_timer_flags_table dataout_timer_flags; + /* DataOUT timeout retries */ + u8 dataout_timeout_retries; + /* Within command recovery count */ + u8 error_recovery_count; + /* iSCSI dependent state for out or order CmdSNs */ + enum cmd_i_state_table deferred_i_state; + /* iSCSI dependent state */ + enum cmd_i_state_table i_state; + /* Command is an immediate command (ISCSI_OP_IMMEDIATE set) */ + u8 immediate_cmd; + /* Immediate data present */ + u8 immediate_data; + /* iSCSI Opcode */ + u8 iscsi_opcode; + /* iSCSI Response Code */ + u8 iscsi_response; + /* Logout reason when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */ + u8 logout_reason; + /* Logout response code when iscsi_opcode == ISCSI_INIT_LOGOUT_CMND */ + u8 logout_response; + /* MaxCmdSN has been incremented */ + u8 maxcmdsn_inc; + /* Immediate Unsolicited Dataout */ + u8 unsolicited_data; + /* CID contained in logout PDU when opcode == ISCSI_INIT_LOGOUT_CMND */ + u16 logout_cid; + /* Command flags */ + enum cmd_flags_table cmd_flags; + /* Initiator Task Tag assigned from Initiator */ + u32 init_task_tag; + /* Target Transfer Tag assigned from Target */ + u32 targ_xfer_tag; + /* CmdSN assigned from Initiator */ + u32 cmd_sn; + /* ExpStatSN assigned from Initiator */ + u32 exp_stat_sn; + /* StatSN assigned to this ITT */ + u32 stat_sn; + /* DataSN Counter */ + u32 data_sn; + /* R2TSN Counter */ + u32 r2t_sn; + /* Last DataSN acknowledged via DataAck SNACK */ + u32 acked_data_sn; + /* Used for echoing NOPOUT ping data */ + u32 buf_ptr_size; + /* Used to store DataDigest */ + u32 data_crc; + /* Total size in bytes associated with command */ + u32 data_length; + /* Counter for MaxOutstandingR2T */ + u32 outstanding_r2ts; + /* Next R2T Offset when DataSequenceInOrder=Yes */ + u32 r2t_offset; + /* Iovec current and orig count for iscsi_cmd->iov_data */ + u32 iov_data_count; + u32 orig_iov_data_count; + /* Number of miscellaneous iovecs used for IP stack calls */ + u32 iov_misc_count; + /* Number of struct iscsi_pdu in struct iscsi_cmd->pdu_list */ + u32 pdu_count; + /* Next struct iscsi_pdu to send in struct iscsi_cmd->pdu_list */ + u32 pdu_send_order; + /* Current struct iscsi_pdu in struct iscsi_cmd->pdu_list */ + u32 pdu_start; + u32 residual_count; + /* Next struct iscsi_seq to send in struct iscsi_cmd->seq_list */ + u32 seq_send_order; + /* Number of struct iscsi_seq in struct iscsi_cmd->seq_list */ + u32 seq_count; + /* Current struct iscsi_seq in struct iscsi_cmd->seq_list */ + u32 seq_no; + /* Lowest offset in current DataOUT sequence */ + u32 seq_start_offset; + /* Highest offset in current DataOUT sequence */ + u32 seq_end_offset; + /* Total size in bytes received so far of READ data */ + u32 read_data_done; + /* Total size in bytes received so far of WRITE data */ + u32 write_data_done; + /* Counter for FirstBurstLength key */ + u32 first_burst_len; + /* Counter for MaxBurstLength key */ + u32 next_burst_len; + /* Transfer size used for IP stack calls */ + u32 tx_size; + /* Buffer used for various purposes */ + void *buf_ptr; + /* See include/linux/dma-mapping.h */ + enum dma_data_direction data_direction; + /* iSCSI PDU Header + CRC */ + unsigned char pdu[ISCSI_HDR_LEN + ISCSI_CRC_LEN]; + /* Number of times struct iscsi_cmd is present in immediate queue */ + atomic_t immed_queue_count; + atomic_t response_queue_count; + atomic_t transport_sent; + spinlock_t datain_lock; + spinlock_t dataout_timeout_lock; + /* spinlock for protecting struct iscsi_cmd->i_state */ + spinlock_t istate_lock; + /* spinlock for adding within command recovery entries */ + spinlock_t error_lock; + /* spinlock for adding R2Ts */ + spinlock_t r2t_lock; + /* DataIN List */ + struct list_head datain_list; + /* R2T List */ + struct list_head cmd_r2t_list; + struct completion reject_comp; + /* Timer for DataOUT */ + struct timer_list dataout_timer; + /* Iovecs for SCSI data payload RX/TX w/ kernel level sockets */ + struct kvec *iov_data; + /* Iovecs for miscellaneous purposes */ +#define ISCSI_MISC_IOVECS 5 + struct kvec iov_misc[ISCSI_MISC_IOVECS]; + /* Array of struct iscsi_pdu used for DataPDUInOrder=No */ + struct iscsi_pdu *pdu_list; + /* Current struct iscsi_pdu used for DataPDUInOrder=No */ + struct iscsi_pdu *pdu_ptr; + /* Array of struct iscsi_seq used for DataSequenceInOrder=No */ + struct iscsi_seq *seq_list; + /* Current struct iscsi_seq used for DataSequenceInOrder=No */ + struct iscsi_seq *seq_ptr; + /* TMR Request when iscsi_opcode == ISCSI_OP_SCSI_TMFUNC */ + struct iscsi_tmr_req *tmr_req; + /* Connection this command is alligient to */ + struct iscsi_conn *conn; + /* Pointer to connection recovery entry */ + struct iscsi_conn_recovery *cr; + /* Session the command is part of, used for connection recovery */ + struct iscsi_session *sess; + /* list_head for connection list */ + struct list_head i_list; + /* The TCM I/O descriptor that is accessed via container_of() */ + struct se_cmd se_cmd; + /* Sense buffer that will be mapped into outgoing status */ +#define ISCSI_SENSE_BUFFER_LEN (TRANSPORT_SENSE_BUFFER + 2) + unsigned char sense_buffer[ISCSI_SENSE_BUFFER_LEN]; + + struct scatterlist *t_mem_sg; + u32 t_mem_sg_nents; + + u32 padding; + u8 pad_bytes[4]; + + struct scatterlist *first_data_sg; + u32 first_data_sg_off; + u32 kmapped_nents; + +} ____cacheline_aligned; + +struct iscsi_tmr_req { + bool task_reassign:1; + u32 ref_cmd_sn; + u32 exp_data_sn; + struct iscsi_conn_recovery *conn_recovery; + struct se_tmr_req *se_tmr_req; +}; + +struct iscsi_conn { + /* Authentication Successful for this connection */ + u8 auth_complete; + /* State connection is currently in */ + u8 conn_state; + u8 conn_logout_reason; + u8 network_transport; + enum iscsi_timer_flags_table nopin_timer_flags; + enum iscsi_timer_flags_table nopin_response_timer_flags; + u8 tx_immediate_queue; + u8 tx_response_queue; + /* Used to know what thread encountered a transport failure */ + u8 which_thread; + /* connection id assigned by the Initiator */ + u16 cid; + /* Remote TCP Port */ + u16 login_port; + int net_size; + u32 auth_id; +#define CONNFLAG_SCTP_STRUCT_FILE 0x01 + u32 conn_flags; + /* Used for iscsi_tx_login_rsp() */ + u32 login_itt; + u32 exp_statsn; + /* Per connection status sequence number */ + u32 stat_sn; + /* IFMarkInt's Current Value */ + u32 if_marker; + /* OFMarkInt's Current Value */ + u32 of_marker; + /* Used for calculating OFMarker offset to next PDU */ + u32 of_marker_offset; + /* Complete Bad PDU for sending reject */ + unsigned char bad_hdr[ISCSI_HDR_LEN]; +#define IPV6_ADDRESS_SPACE 48 + unsigned char login_ip[IPV6_ADDRESS_SPACE]; + int conn_usage_count; + int conn_waiting_on_uc; + atomic_t check_immediate_queue; + atomic_t conn_logout_remove; + atomic_t connection_exit; + atomic_t connection_recovery; + atomic_t connection_reinstatement; + atomic_t connection_wait; + atomic_t connection_wait_rcfr; + atomic_t sleep_on_conn_wait_comp; + atomic_t transport_failed; + struct completion conn_post_wait_comp; + struct completion conn_wait_comp; + struct completion conn_wait_rcfr_comp; + struct completion conn_waiting_on_uc_comp; + struct completion conn_logout_comp; + struct completion tx_half_close_comp; + struct completion rx_half_close_comp; + /* socket used by this connection */ + struct socket *sock; + struct timer_list nopin_timer; + struct timer_list nopin_response_timer; + struct timer_list transport_timer; + /* Spinlock used for add/deleting cmd's from conn_cmd_list */ + spinlock_t cmd_lock; + spinlock_t conn_usage_lock; + spinlock_t immed_queue_lock; + spinlock_t nopin_timer_lock; + spinlock_t response_queue_lock; + spinlock_t state_lock; + /* libcrypto RX and TX contexts for crc32c */ + struct hash_desc conn_rx_hash; + struct hash_desc conn_tx_hash; + /* Used for scheduling TX and RX connection kthreads */ + cpumask_var_t conn_cpumask; + int conn_rx_reset_cpumask:1; + int conn_tx_reset_cpumask:1; + /* list_head of struct iscsi_cmd for this connection */ + struct list_head conn_cmd_list; + struct list_head immed_queue_list; + struct list_head response_queue_list; + struct iscsi_conn_ops *conn_ops; + struct iscsi_param_list *param_list; + /* Used for per connection auth state machine */ + void *auth_protocol; + struct iscsi_login_thread_s *login_thread; + struct iscsi_portal_group *tpg; + /* Pointer to parent session */ + struct iscsi_session *sess; + /* Pointer to thread_set in use for this conn's threads */ + struct iscsi_thread_set *thread_set; + /* list_head for session connection list */ + struct list_head conn_list; +} ____cacheline_aligned; + +struct iscsi_conn_recovery { + u16 cid; + u32 cmd_count; + u32 maxrecvdatasegmentlength; + int ready_for_reallegiance; + struct list_head conn_recovery_cmd_list; + spinlock_t conn_recovery_cmd_lock; + struct timer_list time2retain_timer; + struct iscsi_session *sess; + struct list_head cr_list; +} ____cacheline_aligned; + +struct iscsi_session { + u8 initiator_vendor; + u8 isid[6]; + enum iscsi_timer_flags_table time2retain_timer_flags; + u8 version_active; + u16 cid_called; + u16 conn_recovery_count; + u16 tsih; + /* state session is currently in */ + u32 session_state; + /* session wide counter: initiator assigned task tag */ + u32 init_task_tag; + /* session wide counter: target assigned task tag */ + u32 targ_xfer_tag; + u32 cmdsn_window; + + /* protects cmdsn values */ + struct mutex cmdsn_mutex; + /* session wide counter: expected command sequence number */ + u32 exp_cmd_sn; + /* session wide counter: maximum allowed command sequence number */ + u32 max_cmd_sn; + struct list_head sess_ooo_cmdsn_list; + + /* LIO specific session ID */ + u32 sid; + char auth_type[8]; + /* unique within the target */ + int session_index; + /* Used for session reference counting */ + int session_usage_count; + int session_waiting_on_uc; + u32 cmd_pdus; + u32 rsp_pdus; + u64 tx_data_octets; + u64 rx_data_octets; + u32 conn_digest_errors; + u32 conn_timeout_errors; + u64 creation_time; + spinlock_t session_stats_lock; + /* Number of active connections */ + atomic_t nconn; + atomic_t session_continuation; + atomic_t session_fall_back_to_erl0; + atomic_t session_logout; + atomic_t session_reinstatement; + atomic_t session_stop_active; + atomic_t sleep_on_sess_wait_comp; + atomic_t transport_wait_cmds; + /* connection list */ + struct list_head sess_conn_list; + struct list_head cr_active_list; + struct list_head cr_inactive_list; + spinlock_t conn_lock; + spinlock_t cr_a_lock; + spinlock_t cr_i_lock; + spinlock_t session_usage_lock; + spinlock_t ttt_lock; + struct completion async_msg_comp; + struct completion reinstatement_comp; + struct completion session_wait_comp; + struct completion session_waiting_on_uc_comp; + struct timer_list time2retain_timer; + struct iscsi_sess_ops *sess_ops; + struct se_session *se_sess; + struct iscsi_portal_group *tpg; +} ____cacheline_aligned; + +struct iscsi_login { + u8 auth_complete; + u8 checked_for_existing; + u8 current_stage; + u8 leading_connection; + u8 first_request; + u8 version_min; + u8 version_max; + char isid[6]; + u32 cmd_sn; + u32 init_task_tag; + u32 initial_exp_statsn; + u32 rsp_length; + u16 cid; + u16 tsih; + char *req; + char *rsp; + char *req_buf; + char *rsp_buf; +} ____cacheline_aligned; + +struct iscsi_node_attrib { + u32 dataout_timeout; + u32 dataout_timeout_retries; + u32 default_erl; + u32 nopin_timeout; + u32 nopin_response_timeout; + u32 random_datain_pdu_offsets; + u32 random_datain_seq_offsets; + u32 random_r2t_offsets; + u32 tmr_cold_reset; + u32 tmr_warm_reset; + struct iscsi_node_acl *nacl; +}; + +struct se_dev_entry_s; + +struct iscsi_node_auth { + enum naf_flags_table naf_flags; + int authenticate_target; + /* Used for iscsit_global->discovery_auth, + * set to zero (auth disabled) by default */ + int enforce_discovery_auth; +#define MAX_USER_LEN 256 +#define MAX_PASS_LEN 256 + char userid[MAX_USER_LEN]; + char password[MAX_PASS_LEN]; + char userid_mutual[MAX_USER_LEN]; + char password_mutual[MAX_PASS_LEN]; +}; + +#include "iscsi_target_stat.h" + +struct iscsi_node_stat_grps { + struct config_group iscsi_sess_stats_group; + struct config_group iscsi_conn_stats_group; +}; + +struct iscsi_node_acl { + struct iscsi_node_attrib node_attrib; + struct iscsi_node_auth node_auth; + struct iscsi_node_stat_grps node_stat_grps; + struct se_node_acl se_node_acl; +}; + +#define NODE_STAT_GRPS(nacl) (&(nacl)->node_stat_grps) + +#define ISCSI_NODE_ATTRIB(t) (&(t)->node_attrib) +#define ISCSI_NODE_AUTH(t) (&(t)->node_auth) + +struct iscsi_tpg_attrib { + u32 authentication; + u32 login_timeout; + u32 netif_timeout; + u32 generate_node_acls; + u32 cache_dynamic_acls; + u32 default_cmdsn_depth; + u32 demo_mode_write_protect; + u32 prod_mode_write_protect; + struct iscsi_portal_group *tpg; +}; + +struct iscsi_np { + int np_network_transport; + int np_ip_proto; + int np_sock_type; + enum np_thread_state_table np_thread_state; + enum iscsi_timer_flags_table np_login_timer_flags; + u32 np_exports; + enum np_flags_table np_flags; + unsigned char np_ip[IPV6_ADDRESS_SPACE]; + u16 np_port; + spinlock_t np_thread_lock; + struct completion np_restart_comp; + struct socket *np_socket; + struct __kernel_sockaddr_storage np_sockaddr; + struct task_struct *np_thread; + struct timer_list np_login_timer; + struct iscsi_portal_group *np_login_tpg; + struct list_head np_list; +} ____cacheline_aligned; + +struct iscsi_tpg_np { + struct iscsi_np *tpg_np; + struct iscsi_portal_group *tpg; + struct iscsi_tpg_np *tpg_np_parent; + struct list_head tpg_np_list; + struct list_head tpg_np_child_list; + struct list_head tpg_np_parent_list; + struct se_tpg_np se_tpg_np; + spinlock_t tpg_np_parent_lock; +}; + +struct iscsi_portal_group { + unsigned char tpg_chap_id; + /* TPG State */ + enum tpg_state_table tpg_state; + /* Target Portal Group Tag */ + u16 tpgt; + /* Id assigned to target sessions */ + u16 ntsih; + /* Number of active sessions */ + u32 nsessions; + /* Number of Network Portals available for this TPG */ + u32 num_tpg_nps; + /* Per TPG LIO specific session ID. */ + u32 sid; + /* Spinlock for adding/removing Network Portals */ + spinlock_t tpg_np_lock; + spinlock_t tpg_state_lock; + struct se_portal_group tpg_se_tpg; + struct mutex tpg_access_lock; + struct mutex np_login_lock; + struct iscsi_tpg_attrib tpg_attrib; + /* Pointer to default list of iSCSI parameters for TPG */ + struct iscsi_param_list *param_list; + struct iscsi_tiqn *tpg_tiqn; + struct list_head tpg_gnp_list; + struct list_head tpg_list; +} ____cacheline_aligned; + +#define ISCSI_TPG_C(c) ((struct iscsi_portal_group *)(c)->tpg) +#define ISCSI_TPG_LUN(c, l) ((iscsi_tpg_list_t *)(c)->tpg->tpg_lun_list_t[l]) +#define ISCSI_TPG_S(s) ((struct iscsi_portal_group *)(s)->tpg) +#define ISCSI_TPG_ATTRIB(t) (&(t)->tpg_attrib) +#define SE_TPG(tpg) (&(tpg)->tpg_se_tpg) + +struct iscsi_wwn_stat_grps { + struct config_group iscsi_stat_group; + struct config_group iscsi_instance_group; + struct config_group iscsi_sess_err_group; + struct config_group iscsi_tgt_attr_group; + struct config_group iscsi_login_stats_group; + struct config_group iscsi_logout_stats_group; +}; + +struct iscsi_tiqn { +#define ISCSI_IQN_LEN 224 + unsigned char tiqn[ISCSI_IQN_LEN]; + enum tiqn_state_table tiqn_state; + int tiqn_access_count; + u32 tiqn_active_tpgs; + u32 tiqn_ntpgs; + u32 tiqn_num_tpg_nps; + u32 tiqn_nsessions; + struct list_head tiqn_list; + struct list_head tiqn_tpg_list; + spinlock_t tiqn_state_lock; + spinlock_t tiqn_tpg_lock; + struct se_wwn tiqn_wwn; + struct iscsi_wwn_stat_grps tiqn_stat_grps; + int tiqn_index; + struct iscsi_sess_err_stats sess_err_stats; + struct iscsi_login_stats login_stats; + struct iscsi_logout_stats logout_stats; +} ____cacheline_aligned; + +#define WWN_STAT_GRPS(tiqn) (&(tiqn)->tiqn_stat_grps) + +struct iscsit_global { + /* In core shutdown */ + u32 in_shutdown; + u32 active_ts; + /* Unique identifier used for the authentication daemon */ + u32 auth_id; + u32 inactive_ts; + /* Thread Set bitmap count */ + int ts_bitmap_count; + /* Thread Set bitmap pointer */ + unsigned long *ts_bitmap; + /* Used for iSCSI discovery session authentication */ + struct iscsi_node_acl discovery_acl; + struct iscsi_portal_group *discovery_tpg; +}; + +#endif /* ISCSI_TARGET_CORE_H */ diff --git a/drivers/target/iscsi/iscsi_target_datain_values.c b/drivers/target/iscsi/iscsi_target_datain_values.c new file mode 100644 index 000000000000..8c0495129513 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_datain_values.c @@ -0,0 +1,531 @@ +/******************************************************************************* + * This file contains the iSCSI Target DataIN value generation functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_datain_values.h" + +struct iscsi_datain_req *iscsit_allocate_datain_req(void) +{ + struct iscsi_datain_req *dr; + + dr = kmem_cache_zalloc(lio_dr_cache, GFP_ATOMIC); + if (!dr) { + pr_err("Unable to allocate memory for" + " struct iscsi_datain_req\n"); + return NULL; + } + INIT_LIST_HEAD(&dr->dr_list); + + return dr; +} + +void iscsit_attach_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr) +{ + spin_lock(&cmd->datain_lock); + list_add_tail(&dr->dr_list, &cmd->datain_list); + spin_unlock(&cmd->datain_lock); +} + +void iscsit_free_datain_req(struct iscsi_cmd *cmd, struct iscsi_datain_req *dr) +{ + spin_lock(&cmd->datain_lock); + list_del(&dr->dr_list); + spin_unlock(&cmd->datain_lock); + + kmem_cache_free(lio_dr_cache, dr); +} + +void iscsit_free_all_datain_reqs(struct iscsi_cmd *cmd) +{ + struct iscsi_datain_req *dr, *dr_tmp; + + spin_lock(&cmd->datain_lock); + list_for_each_entry_safe(dr, dr_tmp, &cmd->datain_list, dr_list) { + list_del(&dr->dr_list); + kmem_cache_free(lio_dr_cache, dr); + } + spin_unlock(&cmd->datain_lock); +} + +struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *cmd) +{ + struct iscsi_datain_req *dr; + + if (list_empty(&cmd->datain_list)) { + pr_err("cmd->datain_list is empty for ITT:" + " 0x%08x\n", cmd->init_task_tag); + return NULL; + } + list_for_each_entry(dr, &cmd->datain_list, dr_list) + break; + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=Yes. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 next_burst_len, read_data_done, read_data_left; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_yes( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + next_burst_len = (!dr->recovery) ? + cmd->next_burst_len : dr->next_burst_len; + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + if ((read_data_left <= conn->conn_ops->MaxRecvDataSegmentLength) && + (read_data_left <= (conn->sess->sess_ops->MaxBurstLength - + next_burst_len))) { + datain->length = read_data_left; + + datain->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS); + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + } else { + if ((next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + datain->length = + conn->conn_ops->MaxRecvDataSegmentLength; + next_burst_len += datain->length; + } else { + datain->length = (conn->sess->sess_ops->MaxBurstLength - + next_burst_len); + next_burst_len = 0; + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + } + } + + datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + datain->offset = read_data_done; + + if (!dr->recovery) { + cmd->next_burst_len = next_burst_len; + cmd->read_data_done += datain->length; + } else { + dr->next_burst_len = next_burst_len; + dr->read_data_done += datain->length; + } + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=Yes. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_no_and_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 offset, read_data_done, read_data_left, seq_send_order; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_seq *seq; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_no( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + seq_send_order = (!dr->recovery) ? + cmd->seq_send_order : dr->seq_send_order; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order); + if (!seq) + return NULL; + + seq->sent = 1; + + if (!dr->recovery && !seq->next_burst_len) + seq->first_datasn = cmd->data_sn; + + offset = (seq->offset + seq->next_burst_len); + + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + datain->length = (cmd->data_length - offset); + datain->offset = offset; + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } else { + if ((seq->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + datain->length = + conn->conn_ops->MaxRecvDataSegmentLength; + datain->offset = (seq->offset + seq->next_burst_len); + + seq->next_burst_len += datain->length; + } else { + datain->length = (conn->sess->sess_ops->MaxBurstLength - + seq->next_burst_len); + datain->offset = (seq->offset + seq->next_burst_len); + + datain->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + datain->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } + } + + if ((read_data_done + datain->length) == cmd->data_length) + datain->flags |= ISCSI_FLAG_DATA_STATUS; + + datain->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->seq_send_order = seq_send_order; + cmd->read_data_done += datain->length; + } else { + dr->seq_send_order = seq_send_order; + dr->read_data_done += datain->length; + } + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_CMD_FINAL) + seq->last_datasn = datain->data_sn; + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=Yes and DataPDUInOrder=No. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_yes_and_no( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 next_burst_len, read_data_done, read_data_left; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_pdu *pdu; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_yes( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + next_burst_len = (!dr->recovery) ? + cmd->next_burst_len : dr->next_burst_len; + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return dr; + } + + pdu = iscsit_get_pdu_holder_for_seq(cmd, NULL); + if (!pdu) + return dr; + + if ((read_data_done + pdu->length) == cmd->data_length) { + pdu->flags |= (ISCSI_FLAG_CMD_FINAL | ISCSI_FLAG_DATA_STATUS); + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + next_burst_len = 0; + } else { + if ((next_burst_len + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) + next_burst_len += pdu->length; + else { + pdu->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + next_burst_len = 0; + } + } + + pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->next_burst_len = next_burst_len; + cmd->read_data_done += pdu->length; + } else { + dr->next_burst_len = next_burst_len; + dr->read_data_done += pdu->length; + } + + datain->flags = pdu->flags; + datain->length = pdu->length; + datain->offset = pdu->offset; + datain->data_sn = pdu->data_sn; + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +/* + * For Normal and Recovery DataSequenceInOrder=No and DataPDUInOrder=No. + */ +static struct iscsi_datain_req *iscsit_set_datain_values_no_and_no( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + u32 read_data_done, read_data_left, seq_send_order; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct iscsi_pdu *pdu; + struct iscsi_seq *seq = NULL; + + dr = iscsit_get_datain_req(cmd); + if (!dr) + return NULL; + + if (dr->recovery && dr->generate_recovery_values) { + if (iscsit_create_recovery_datain_values_datasequenceinorder_no( + cmd, dr) < 0) + return NULL; + + dr->generate_recovery_values = 0; + } + + read_data_done = (!dr->recovery) ? + cmd->read_data_done : dr->read_data_done; + seq_send_order = (!dr->recovery) ? + cmd->seq_send_order : dr->seq_send_order; + + read_data_left = (cmd->data_length - read_data_done); + if (!read_data_left) { + pr_err("ITT: 0x%08x read_data_left is zero!\n", + cmd->init_task_tag); + return NULL; + } + + seq = iscsit_get_seq_holder_for_datain(cmd, seq_send_order); + if (!seq) + return NULL; + + seq->sent = 1; + + if (!dr->recovery && !seq->next_burst_len) + seq->first_datasn = cmd->data_sn; + + pdu = iscsit_get_pdu_holder_for_seq(cmd, seq); + if (!pdu) + return NULL; + + if (seq->pdu_send_order == seq->pdu_count) { + pdu->flags |= ISCSI_FLAG_CMD_FINAL; + if (conn->sess->sess_ops->ErrorRecoveryLevel > 0) + pdu->flags |= ISCSI_FLAG_DATA_ACK; + + seq->next_burst_len = 0; + seq_send_order++; + } else + seq->next_burst_len += pdu->length; + + if ((read_data_done + pdu->length) == cmd->data_length) + pdu->flags |= ISCSI_FLAG_DATA_STATUS; + + pdu->data_sn = (!dr->recovery) ? cmd->data_sn++ : dr->data_sn++; + if (!dr->recovery) { + cmd->seq_send_order = seq_send_order; + cmd->read_data_done += pdu->length; + } else { + dr->seq_send_order = seq_send_order; + dr->read_data_done += pdu->length; + } + + datain->flags = pdu->flags; + datain->length = pdu->length; + datain->offset = pdu->offset; + datain->data_sn = pdu->data_sn; + + if (!dr->recovery) { + if (datain->flags & ISCSI_FLAG_CMD_FINAL) + seq->last_datasn = datain->data_sn; + if (datain->flags & ISCSI_FLAG_DATA_STATUS) + dr->dr_complete = DATAIN_COMPLETE_NORMAL; + + return dr; + } + + if (!dr->runlength) { + if (datain->flags & ISCSI_FLAG_DATA_STATUS) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } else { + if ((dr->begrun + dr->runlength) == dr->data_sn) { + dr->dr_complete = + (dr->recovery == DATAIN_WITHIN_COMMAND_RECOVERY) ? + DATAIN_COMPLETE_WITHIN_COMMAND_RECOVERY : + DATAIN_COMPLETE_CONNECTION_RECOVERY; + } + } + + return dr; +} + +struct iscsi_datain_req *iscsit_get_datain_values( + struct iscsi_cmd *cmd, + struct iscsi_datain *datain) +{ + struct iscsi_conn *conn = cmd->conn; + + if (conn->sess->sess_ops->DataSequenceInOrder && + conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_yes_and_yes(cmd, datain); + else if (!conn->sess->sess_ops->DataSequenceInOrder && + conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_no_and_yes(cmd, datain); + else if (conn->sess->sess_ops->DataSequenceInOrder && + !conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_yes_and_no(cmd, datain); + else if (!conn->sess->sess_ops->DataSequenceInOrder && + !conn->sess->sess_ops->DataPDUInOrder) + return iscsit_set_datain_values_no_and_no(cmd, datain); + + return NULL; +} diff --git a/drivers/target/iscsi/iscsi_target_datain_values.h b/drivers/target/iscsi/iscsi_target_datain_values.h new file mode 100644 index 000000000000..646429ac5a02 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_datain_values.h @@ -0,0 +1,12 @@ +#ifndef ISCSI_TARGET_DATAIN_VALUES_H +#define ISCSI_TARGET_DATAIN_VALUES_H + +extern struct iscsi_datain_req *iscsit_allocate_datain_req(void); +extern void iscsit_attach_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); +extern void iscsit_free_datain_req(struct iscsi_cmd *, struct iscsi_datain_req *); +extern void iscsit_free_all_datain_reqs(struct iscsi_cmd *); +extern struct iscsi_datain_req *iscsit_get_datain_req(struct iscsi_cmd *); +extern struct iscsi_datain_req *iscsit_get_datain_values(struct iscsi_cmd *, + struct iscsi_datain *); + +#endif /*** ISCSI_TARGET_DATAIN_VALUES_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_device.c b/drivers/target/iscsi/iscsi_target_device.c new file mode 100644 index 000000000000..a19fa5eea88e --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_device.c @@ -0,0 +1,87 @@ +/******************************************************************************* + * This file contains the iSCSI Virtual Device and Disk Transport + * agnostic related functions. + * + \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/scsi_device.h> +#include <target/target_core_base.h> +#include <target/target_core_device.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" + +int iscsit_get_lun_for_tmr( + struct iscsi_cmd *cmd, + u64 lun) +{ + u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + + return transport_lookup_tmr_lun(&cmd->se_cmd, unpacked_lun); +} + +int iscsit_get_lun_for_cmd( + struct iscsi_cmd *cmd, + unsigned char *cdb, + u64 lun) +{ + u32 unpacked_lun = scsilun_to_int((struct scsi_lun *)&lun); + + return transport_lookup_cmd_lun(&cmd->se_cmd, unpacked_lun); +} + +void iscsit_determine_maxcmdsn(struct iscsi_session *sess) +{ + struct se_node_acl *se_nacl; + + /* + * This is a discovery session, the single queue slot was already + * assigned in iscsi_login_zero_tsih(). Since only Logout and + * Text Opcodes are allowed during discovery we do not have to worry + * about the HBA's queue depth here. + */ + if (sess->sess_ops->SessionType) + return; + + se_nacl = sess->se_sess->se_node_acl; + + /* + * This is a normal session, set the Session's CmdSN window to the + * struct se_node_acl->queue_depth. The value in struct se_node_acl->queue_depth + * has already been validated as a legal value in + * core_set_queue_depth_for_node(). + */ + sess->cmdsn_window = se_nacl->queue_depth; + sess->max_cmd_sn = (sess->max_cmd_sn + se_nacl->queue_depth) - 1; +} + +void iscsit_increment_maxcmdsn(struct iscsi_cmd *cmd, struct iscsi_session *sess) +{ + if (cmd->immediate_cmd || cmd->maxcmdsn_inc) + return; + + cmd->maxcmdsn_inc = 1; + + mutex_lock(&sess->cmdsn_mutex); + sess->max_cmd_sn += 1; + pr_debug("Updated MaxCmdSN to 0x%08x\n", sess->max_cmd_sn); + mutex_unlock(&sess->cmdsn_mutex); +} diff --git a/drivers/target/iscsi/iscsi_target_device.h b/drivers/target/iscsi/iscsi_target_device.h new file mode 100644 index 000000000000..bef1cada15f8 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_device.h @@ -0,0 +1,9 @@ +#ifndef ISCSI_TARGET_DEVICE_H +#define ISCSI_TARGET_DEVICE_H + +extern int iscsit_get_lun_for_tmr(struct iscsi_cmd *, u64); +extern int iscsit_get_lun_for_cmd(struct iscsi_cmd *, unsigned char *, u64); +extern void iscsit_determine_maxcmdsn(struct iscsi_session *); +extern void iscsit_increment_maxcmdsn(struct iscsi_cmd *, struct iscsi_session *); + +#endif /* ISCSI_TARGET_DEVICE_H */ diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c new file mode 100644 index 000000000000..b7ffc3cd40cc --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -0,0 +1,1004 @@ +/****************************************************************************** + * This file contains error recovery level zero functions used by + * the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +/* + * Used to set values in struct iscsi_cmd that iscsit_dataout_check_sequence() + * checks against to determine a PDU's Offset+Length is within the current + * DataOUT Sequence. Used for DataSequenceInOrder=Yes only. + */ +void iscsit_set_dataout_sequence_values( + struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + /* + * Still set seq_start_offset and seq_end_offset for Unsolicited + * DataOUT, even if DataSequenceInOrder=No. + */ + if (cmd->unsolicited_data) { + cmd->seq_start_offset = cmd->write_data_done; + cmd->seq_end_offset = (cmd->write_data_done + + (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length); + return; + } + + if (!conn->sess->sess_ops->DataSequenceInOrder) + return; + + if (!cmd->seq_start_offset && !cmd->seq_end_offset) { + cmd->seq_start_offset = cmd->write_data_done; + cmd->seq_end_offset = (cmd->data_length > + conn->sess->sess_ops->MaxBurstLength) ? + (cmd->write_data_done + + conn->sess->sess_ops->MaxBurstLength) : cmd->data_length; + } else { + cmd->seq_start_offset = cmd->seq_end_offset; + cmd->seq_end_offset = ((cmd->seq_end_offset + + conn->sess->sess_ops->MaxBurstLength) >= + cmd->data_length) ? cmd->data_length : + (cmd->seq_end_offset + + conn->sess->sess_ops->MaxBurstLength); + } +} + +static int iscsit_dataout_within_command_recovery_check( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * We do the within-command recovery checks here as it is + * the first function called in iscsi_check_pre_dataout(). + * Basically, if we are in within-command recovery and + * the PDU does not contain the offset the sequence needs, + * dump the payload. + * + * This only applies to DataPDUInOrder=Yes, for + * DataPDUInOrder=No we only re-request the failed PDU + * and check that all PDUs in a sequence are received + * upon end of sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((cmd->cmd_flags & ICF_WITHIN_COMMAND_RECOVERY) && + (cmd->write_data_done != hdr->offset)) + goto dump; + + cmd->cmd_flags &= ~ICF_WITHIN_COMMAND_RECOVERY; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length); + if (!seq) + return DATAOUT_CANNOT_RECOVER; + /* + * Set the struct iscsi_seq pointer to reuse later. + */ + cmd->seq_ptr = seq; + + if (conn->sess->sess_ops->DataPDUInOrder) { + if ((seq->status == + DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) && + ((seq->offset != hdr->offset) || + (seq->data_sn != hdr->datasn))) + goto dump; + } else { + if ((seq->status == + DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY) && + (seq->data_sn != hdr->datasn)) + goto dump; + } + + if (seq->status == DATAOUT_SEQUENCE_COMPLETE) + goto dump; + + if (seq->status != DATAOUT_SEQUENCE_COMPLETE) + seq->status = 0; + } + + return DATAOUT_NORMAL; + +dump: + pr_err("Dumping DataOUT PDU Offset: %u Length: %d DataSN:" + " 0x%08x\n", hdr->offset, payload_length, hdr->datasn); + return iscsit_dump_data_payload(conn, payload_length, 1); +} + +static int iscsit_dataout_check_unsolicited_sequence( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + u32 first_burst_len; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + + if ((hdr->offset < cmd->seq_start_offset) || + ((hdr->offset + payload_length) > cmd->seq_end_offset)) { + pr_err("Command ITT: 0x%08x with Offset: %u," + " Length: %u outside of Unsolicited Sequence %u:%u while" + " DataSequenceInOrder=Yes.\n", cmd->init_task_tag, + hdr->offset, payload_length, cmd->seq_start_offset, + cmd->seq_end_offset); + return DATAOUT_CANNOT_RECOVER; + } + + first_burst_len = (cmd->first_burst_len + payload_length); + + if (first_burst_len > conn->sess->sess_ops->FirstBurstLength) { + pr_err("Total %u bytes exceeds FirstBurstLength: %u" + " for this Unsolicited DataOut Burst.\n", + first_burst_len, conn->sess->sess_ops->FirstBurstLength); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return DATAOUT_CANNOT_RECOVER; + } + + /* + * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity + * checks for the current Unsolicited DataOUT Sequence. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) { + /* + * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of + * sequence checks are handled in + * iscsit_dataout_datapduinorder_no_fbit(). + */ + if (!conn->sess->sess_ops->DataPDUInOrder) + goto out; + + if ((first_burst_len != cmd->data_length) && + (first_burst_len != conn->sess->sess_ops->FirstBurstLength)) { + pr_err("Unsolicited non-immediate data" + " received %u does not equal FirstBurstLength: %u, and" + " does not equal ExpXferLen %u.\n", first_burst_len, + conn->sess->sess_ops->FirstBurstLength, + cmd->data_length); + transport_send_check_condition_and_sense(&cmd->se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (first_burst_len == conn->sess->sess_ops->FirstBurstLength) { + pr_err("Command ITT: 0x%08x reached" + " FirstBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol" + " error.\n", cmd->init_task_tag, + conn->sess->sess_ops->FirstBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + if (first_burst_len == cmd->data_length) { + pr_err("Command ITT: 0x%08x reached" + " ExpXferLen: %u, but ISCSI_FLAG_CMD_FINAL is not set. protocol" + " error.\n", cmd->init_task_tag, cmd->data_length); + return DATAOUT_CANNOT_RECOVER; + } + } + +out: + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_check_sequence( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + u32 next_burst_len; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *seq = NULL; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * For DataSequenceInOrder=Yes: Check that the offset and offset+length + * is within range as defined by iscsi_set_dataout_sequence_values(). + * + * For DataSequenceInOrder=No: Check that an struct iscsi_seq exists for + * offset+length tuple. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + /* + * Due to possibility of recovery DataOUT sent by the initiator + * fullfilling an Recovery R2T, it's best to just dump the + * payload here, instead of erroring out. + */ + if ((hdr->offset < cmd->seq_start_offset) || + ((hdr->offset + payload_length) > cmd->seq_end_offset)) { + pr_err("Command ITT: 0x%08x with Offset: %u," + " Length: %u outside of Sequence %u:%u while" + " DataSequenceInOrder=Yes.\n", cmd->init_task_tag, + hdr->offset, payload_length, cmd->seq_start_offset, + cmd->seq_end_offset); + + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + return DATAOUT_WITHIN_COMMAND_RECOVERY; + } + + next_burst_len = (cmd->next_burst_len + payload_length); + } else { + seq = iscsit_get_seq_holder(cmd, hdr->offset, payload_length); + if (!seq) + return DATAOUT_CANNOT_RECOVER; + /* + * Set the struct iscsi_seq pointer to reuse later. + */ + cmd->seq_ptr = seq; + + if (seq->status == DATAOUT_SEQUENCE_COMPLETE) { + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + return DATAOUT_WITHIN_COMMAND_RECOVERY; + } + + next_burst_len = (seq->next_burst_len + payload_length); + } + + if (next_burst_len > conn->sess->sess_ops->MaxBurstLength) { + pr_err("Command ITT: 0x%08x, NextBurstLength: %u and" + " Length: %u exceeds MaxBurstLength: %u. protocol" + " error.\n", cmd->init_task_tag, + (next_burst_len - payload_length), + payload_length, conn->sess->sess_ops->MaxBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + + /* + * Perform various MaxBurstLength and ISCSI_FLAG_CMD_FINAL sanity + * checks for the current DataOUT Sequence. + */ + if (hdr->flags & ISCSI_FLAG_CMD_FINAL) { + /* + * Ignore ISCSI_FLAG_CMD_FINAL checks while DataPDUInOrder=No, end of + * sequence checks are handled in + * iscsit_dataout_datapduinorder_no_fbit(). + */ + if (!conn->sess->sess_ops->DataPDUInOrder) + goto out; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((next_burst_len < + conn->sess->sess_ops->MaxBurstLength) && + ((cmd->write_data_done + payload_length) < + cmd->data_length)) { + pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL" + " before end of DataOUT sequence, protocol" + " error.\n", cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (next_burst_len < seq->xfer_len) { + pr_err("Command ITT: 0x%08x set ISCSI_FLAG_CMD_FINAL" + " before end of DataOUT sequence, protocol" + " error.\n", cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } + } else { + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (next_burst_len == + conn->sess->sess_ops->MaxBurstLength) { + pr_err("Command ITT: 0x%08x reached" + " MaxBurstLength: %u, but ISCSI_FLAG_CMD_FINAL is" + " not set, protocol error.", cmd->init_task_tag, + conn->sess->sess_ops->MaxBurstLength); + return DATAOUT_CANNOT_RECOVER; + } + if ((cmd->write_data_done + payload_length) == + cmd->data_length) { + pr_err("Command ITT: 0x%08x reached" + " last DataOUT PDU in sequence but ISCSI_FLAG_" + "CMD_FINAL is not set, protocol error.\n", + cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } else { + if (next_burst_len == seq->xfer_len) { + pr_err("Command ITT: 0x%08x reached" + " last DataOUT PDU in sequence but ISCSI_FLAG_" + "CMD_FINAL is not set, protocol error.\n", + cmd->init_task_tag); + return DATAOUT_CANNOT_RECOVER; + } + } + } + +out: + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_check_datasn( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int dump = 0, recovery = 0; + u32 data_sn = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * Considering the target has no method of re-requesting DataOUT + * by DataSN, if we receieve a greater DataSN than expected we + * assume the functions for DataPDUInOrder=[Yes,No] below will + * handle it. + * + * If the DataSN is less than expected, dump the payload. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) + data_sn = cmd->data_sn; + else { + struct iscsi_seq *seq = cmd->seq_ptr; + data_sn = seq->data_sn; + } + + if (hdr->datasn > data_sn) { + pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" + " higher than expected 0x%08x.\n", cmd->init_task_tag, + hdr->datasn, data_sn); + recovery = 1; + goto recover; + } else if (hdr->datasn < data_sn) { + pr_err("Command ITT: 0x%08x, received DataSN: 0x%08x" + " lower than expected 0x%08x, discarding payload.\n", + cmd->init_task_tag, hdr->datasn, data_sn); + dump = 1; + goto dump; + } + + return DATAOUT_NORMAL; + +recover: + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to perform within-command recovery" + " while ERL=0.\n"); + return DATAOUT_CANNOT_RECOVER; + } +dump: + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (recovery || dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : + DATAOUT_NORMAL; +} + +static int iscsit_dataout_pre_datapduinorder_yes( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int dump = 0, recovery = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + /* + * For DataSequenceInOrder=Yes: If the offset is greater than the global + * DataPDUInOrder=Yes offset counter in struct iscsi_cmd a protcol error has + * occured and fail the connection. + * + * For DataSequenceInOrder=No: If the offset is greater than the per + * sequence DataPDUInOrder=Yes offset counter in struct iscsi_seq a protocol + * error has occured and fail the connection. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (hdr->offset != cmd->write_data_done) { + pr_err("Command ITT: 0x%08x, received offset" + " %u different than expected %u.\n", cmd->init_task_tag, + hdr->offset, cmd->write_data_done); + recovery = 1; + goto recover; + } + } else { + struct iscsi_seq *seq = cmd->seq_ptr; + + if (hdr->offset > seq->offset) { + pr_err("Command ITT: 0x%08x, received offset" + " %u greater than expected %u.\n", cmd->init_task_tag, + hdr->offset, seq->offset); + recovery = 1; + goto recover; + } else if (hdr->offset < seq->offset) { + pr_err("Command ITT: 0x%08x, received offset" + " %u less than expected %u, discarding payload.\n", + cmd->init_task_tag, hdr->offset, seq->offset); + dump = 1; + goto dump; + } + } + + return DATAOUT_NORMAL; + +recover: + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to perform within-command recovery" + " while ERL=0.\n"); + return DATAOUT_CANNOT_RECOVER; + } +dump: + if (iscsit_dump_data_payload(conn, payload_length, 1) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (recovery) ? iscsit_recover_dataout_sequence(cmd, + hdr->offset, payload_length) : + (dump) ? DATAOUT_WITHIN_COMMAND_RECOVERY : DATAOUT_NORMAL; +} + +static int iscsit_dataout_pre_datapduinorder_no( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_pdu *pdu; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + pdu = iscsit_get_pdu_holder(cmd, hdr->offset, payload_length); + if (!pdu) + return DATAOUT_CANNOT_RECOVER; + + cmd->pdu_ptr = pdu; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + case ISCSI_PDU_CRC_FAILED: + case ISCSI_PDU_TIMED_OUT: + break; + case ISCSI_PDU_RECEIVED_OK: + pr_err("Command ITT: 0x%08x received already gotten" + " Offset: %u, Length: %u\n", cmd->init_task_tag, + hdr->offset, payload_length); + return iscsit_dump_data_payload(cmd->conn, payload_length, 1); + default: + return DATAOUT_CANNOT_RECOVER; + } + + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_update_r2t(struct iscsi_cmd *cmd, u32 offset, u32 length) +{ + struct iscsi_r2t *r2t; + + if (cmd->unsolicited_data) + return 0; + + r2t = iscsit_get_r2t_for_eos(cmd, offset, length); + if (!r2t) + return -1; + + spin_lock_bh(&cmd->r2t_lock); + r2t->seq_complete = 1; + cmd->outstanding_r2ts--; + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +static int iscsit_dataout_update_datapduinorder_no( + struct iscsi_cmd *cmd, + u32 data_sn, + int f_bit) +{ + int ret = 0; + struct iscsi_pdu *pdu = cmd->pdu_ptr; + + pdu->data_sn = data_sn; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + case ISCSI_PDU_CRC_FAILED: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + case ISCSI_PDU_TIMED_OUT: + pdu->status = ISCSI_PDU_RECEIVED_OK; + break; + default: + return DATAOUT_CANNOT_RECOVER; + } + + if (f_bit) { + ret = iscsit_dataout_datapduinorder_no_fbit(cmd, pdu); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + return DATAOUT_NORMAL; +} + +static int iscsit_dataout_post_crc_passed( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int ret, send_r2t = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *seq = NULL; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (cmd->unsolicited_data) { + if ((cmd->first_burst_len + payload_length) == + conn->sess->sess_ops->FirstBurstLength) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no(cmd, + hdr->datasn, (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + cmd->first_burst_len += payload_length; + + if (conn->sess->sess_ops->DataSequenceInOrder) + cmd->data_sn++; + else { + seq = cmd->seq_ptr; + seq->data_sn++; + seq->offset += payload_length; + } + + if (send_r2t) { + if (seq) + seq->status = DATAOUT_SEQUENCE_COMPLETE; + cmd->first_burst_len = 0; + cmd->unsolicited_data = 0; + } + } else { + if (conn->sess->sess_ops->DataSequenceInOrder) { + if ((cmd->next_burst_len + payload_length) == + conn->sess->sess_ops->MaxBurstLength) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no( + cmd, hdr->datasn, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + cmd->next_burst_len += payload_length; + cmd->data_sn++; + + if (send_r2t) + cmd->next_burst_len = 0; + } else { + seq = cmd->seq_ptr; + + if ((seq->next_burst_len + payload_length) == + seq->xfer_len) { + if (iscsit_dataout_update_r2t(cmd, hdr->offset, + payload_length) < 0) + return DATAOUT_CANNOT_RECOVER; + send_r2t = 1; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + ret = iscsit_dataout_update_datapduinorder_no( + cmd, hdr->datasn, + (hdr->flags & ISCSI_FLAG_CMD_FINAL)); + if (ret == DATAOUT_CANNOT_RECOVER) + return ret; + } + + seq->data_sn++; + seq->offset += payload_length; + seq->next_burst_len += payload_length; + + if (send_r2t) { + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_COMPLETE; + } + } + } + + if (send_r2t && conn->sess->sess_ops->DataSequenceInOrder) + cmd->data_sn = 0; + + cmd->write_data_done += payload_length; + + return (cmd->write_data_done == cmd->data_length) ? + DATAOUT_SEND_TO_TRANSPORT : (send_r2t) ? + DATAOUT_SEND_R2T : DATAOUT_NORMAL; +} + +static int iscsit_dataout_post_crc_failed( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (conn->sess->sess_ops->DataPDUInOrder) + goto recover; + /* + * The rest of this function is only called when DataPDUInOrder=No. + */ + pdu = cmd->pdu_ptr; + + switch (pdu->status) { + case ISCSI_PDU_NOT_RECEIVED: + pdu->status = ISCSI_PDU_CRC_FAILED; + break; + case ISCSI_PDU_CRC_FAILED: + break; + case ISCSI_PDU_TIMED_OUT: + pdu->status = ISCSI_PDU_CRC_FAILED; + break; + default: + return DATAOUT_CANNOT_RECOVER; + } + +recover: + return iscsit_recover_dataout_sequence(cmd, hdr->offset, payload_length); +} + +/* + * Called from iscsit_handle_data_out() before DataOUT Payload is received + * and CRC computed. + */ +extern int iscsit_check_pre_dataout( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + int ret; + struct iscsi_conn *conn = cmd->conn; + + ret = iscsit_dataout_within_command_recovery_check(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + + ret = iscsit_dataout_check_datasn(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + + if (cmd->unsolicited_data) { + ret = iscsit_dataout_check_unsolicited_sequence(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + } else { + ret = iscsit_dataout_check_sequence(cmd, buf); + if ((ret == DATAOUT_WITHIN_COMMAND_RECOVERY) || + (ret == DATAOUT_CANNOT_RECOVER)) + return ret; + } + + return (conn->sess->sess_ops->DataPDUInOrder) ? + iscsit_dataout_pre_datapduinorder_yes(cmd, buf) : + iscsit_dataout_pre_datapduinorder_no(cmd, buf); +} + +/* + * Called from iscsit_handle_data_out() after DataOUT Payload is received + * and CRC computed. + */ +int iscsit_check_post_dataout( + struct iscsi_cmd *cmd, + unsigned char *buf, + u8 data_crc_failed) +{ + struct iscsi_conn *conn = cmd->conn; + + cmd->dataout_timeout_retries = 0; + + if (!data_crc_failed) + return iscsit_dataout_post_crc_passed(cmd, buf); + else { + if (!conn->sess->sess_ops->ErrorRecoveryLevel) { + pr_err("Unable to recover from DataOUT CRC" + " failure while ERL=0, closing session.\n"); + iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, + 1, 0, buf, cmd); + return DATAOUT_CANNOT_RECOVER; + } + + iscsit_add_reject_from_cmd(ISCSI_REASON_DATA_DIGEST_ERROR, + 0, 0, buf, cmd); + return iscsit_dataout_post_crc_failed(cmd, buf); + } +} + +static void iscsit_handle_time2retain_timeout(unsigned long data) +{ + struct iscsi_session *sess = (struct iscsi_session *) data; + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + spin_lock_bh(&se_tpg->session_lock); + if (sess->time2retain_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&se_tpg->session_lock); + return; + } + if (atomic_read(&sess->session_reinstatement)) { + pr_err("Exiting Time2Retain handler because" + " session_reinstatement=1\n"); + spin_unlock_bh(&se_tpg->session_lock); + return; + } + sess->time2retain_timer_flags |= ISCSI_TF_EXPIRED; + + pr_err("Time2Retain timer expired for SID: %u, cleaning up" + " iSCSI session.\n", sess->sid); + { + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + if (tiqn) { + spin_lock(&tiqn->sess_err_stats.lock); + strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name, + (void *)sess->sess_ops->InitiatorName); + tiqn->sess_err_stats.last_sess_failure_type = + ISCSI_SESS_ERR_CXN_TIMEOUT; + tiqn->sess_err_stats.cxn_timeout_errors++; + sess->conn_timeout_errors++; + spin_unlock(&tiqn->sess_err_stats.lock); + } + } + + spin_unlock_bh(&se_tpg->session_lock); + iscsit_close_session(sess); +} + +extern void iscsit_start_time2retain_handler(struct iscsi_session *sess) +{ + int tpg_active; + /* + * Only start Time2Retain timer when the assoicated TPG is still in + * an ACTIVE (eg: not disabled or shutdown) state. + */ + spin_lock(&ISCSI_TPG_S(sess)->tpg_state_lock); + tpg_active = (ISCSI_TPG_S(sess)->tpg_state == TPG_STATE_ACTIVE); + spin_unlock(&ISCSI_TPG_S(sess)->tpg_state_lock); + + if (!tpg_active) + return; + + if (sess->time2retain_timer_flags & ISCSI_TF_RUNNING) + return; + + pr_debug("Starting Time2Retain timer for %u seconds on" + " SID: %u\n", sess->sess_ops->DefaultTime2Retain, sess->sid); + + init_timer(&sess->time2retain_timer); + sess->time2retain_timer.expires = + (get_jiffies_64() + sess->sess_ops->DefaultTime2Retain * HZ); + sess->time2retain_timer.data = (unsigned long)sess; + sess->time2retain_timer.function = iscsit_handle_time2retain_timeout; + sess->time2retain_timer_flags &= ~ISCSI_TF_STOP; + sess->time2retain_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&sess->time2retain_timer); +} + +/* + * Called with spin_lock_bh(&struct se_portal_group->session_lock) held + */ +extern int iscsit_stop_time2retain_timer(struct iscsi_session *sess) +{ + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + + if (sess->time2retain_timer_flags & ISCSI_TF_EXPIRED) + return -1; + + if (!(sess->time2retain_timer_flags & ISCSI_TF_RUNNING)) + return 0; + + sess->time2retain_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&se_tpg->session_lock); + + del_timer_sync(&sess->time2retain_timer); + + spin_lock_bh(&se_tpg->session_lock); + sess->time2retain_timer_flags &= ~ISCSI_TF_RUNNING; + pr_debug("Stopped Time2Retain Timer for SID: %u\n", + sess->sid); + return 0; +} + +void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + goto sleep; + } + + if (atomic_read(&conn->transport_failed)) { + spin_unlock_bh(&conn->state_lock); + goto sleep; + } + spin_unlock_bh(&conn->state_lock); + + iscsi_thread_set_force_reinstatement(conn); + +sleep: + wait_for_completion(&conn->conn_wait_rcfr_comp); + complete(&conn->conn_post_wait_comp); +} + +void iscsit_cause_connection_reinstatement(struct iscsi_conn *conn, int sleep) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (atomic_read(&conn->transport_failed)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (atomic_read(&conn->connection_reinstatement)) { + spin_unlock_bh(&conn->state_lock); + return; + } + + if (iscsi_thread_set_force_reinstatement(conn) < 0) { + spin_unlock_bh(&conn->state_lock); + return; + } + + atomic_set(&conn->connection_reinstatement, 1); + if (!sleep) { + spin_unlock_bh(&conn->state_lock); + return; + } + + atomic_set(&conn->sleep_on_conn_wait_comp, 1); + spin_unlock_bh(&conn->state_lock); + + wait_for_completion(&conn->conn_wait_comp); + complete(&conn->conn_post_wait_comp); +} + +void iscsit_fall_back_to_erl0(struct iscsi_session *sess) +{ + pr_debug("Falling back to ErrorRecoveryLevel=0 for SID:" + " %u\n", sess->sid); + + atomic_set(&sess->session_fall_back_to_erl0, 1); +} + +static void iscsit_handle_connection_cleanup(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + if ((sess->sess_ops->ErrorRecoveryLevel == 2) && + !atomic_read(&sess->session_reinstatement) && + !atomic_read(&sess->session_fall_back_to_erl0)) + iscsit_connection_recovery_transport_reset(conn); + else { + pr_debug("Performing cleanup for failed iSCSI" + " Connection ID: %hu from %s\n", conn->cid, + sess->sess_ops->InitiatorName); + iscsit_close_connection(conn); + } +} + +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->state_lock); + if (atomic_read(&conn->connection_exit)) { + spin_unlock_bh(&conn->state_lock); + return; + } + atomic_set(&conn->connection_exit, 1); + + if (conn->conn_state == TARG_CONN_STATE_IN_LOGOUT) { + spin_unlock_bh(&conn->state_lock); + iscsit_close_connection(conn); + return; + } + + if (conn->conn_state == TARG_CONN_STATE_CLEANUP_WAIT) { + spin_unlock_bh(&conn->state_lock); + return; + } + + pr_debug("Moving to TARG_CONN_STATE_CLEANUP_WAIT.\n"); + conn->conn_state = TARG_CONN_STATE_CLEANUP_WAIT; + spin_unlock_bh(&conn->state_lock); + + iscsit_handle_connection_cleanup(conn); +} + +/* + * This is the simple function that makes the magic of + * sync and steering happen in the follow paradoxical order: + * + * 0) Receive conn->of_marker (bytes left until next OFMarker) + * bytes into an offload buffer. When we pass the exact number + * of bytes in conn->of_marker, iscsit_dump_data_payload() and hence + * rx_data() will automatically receive the identical u32 marker + * values and store it in conn->of_marker_offset; + * 1) Now conn->of_marker_offset will contain the offset to the start + * of the next iSCSI PDU. Dump these remaining bytes into another + * offload buffer. + * 2) We are done! + * Next byte in the TCP stream will contain the next iSCSI PDU! + * Cool Huh?! + */ +int iscsit_recover_from_unknown_opcode(struct iscsi_conn *conn) +{ + /* + * Make sure the remaining bytes to next maker is a sane value. + */ + if (conn->of_marker > (conn->conn_ops->OFMarkInt * 4)) { + pr_err("Remaining bytes to OFMarker: %u exceeds" + " OFMarkInt bytes: %u.\n", conn->of_marker, + conn->conn_ops->OFMarkInt * 4); + return -1; + } + + pr_debug("Advancing %u bytes in TCP stream to get to the" + " next OFMarker.\n", conn->of_marker); + + if (iscsit_dump_data_payload(conn, conn->of_marker, 0) < 0) + return -1; + + /* + * Make sure the offset marker we retrived is a valid value. + */ + if (conn->of_marker_offset > (ISCSI_HDR_LEN + (ISCSI_CRC_LEN * 2) + + conn->conn_ops->MaxRecvDataSegmentLength)) { + pr_err("OfMarker offset value: %u exceeds limit.\n", + conn->of_marker_offset); + return -1; + } + + pr_debug("Discarding %u bytes of TCP stream to get to the" + " next iSCSI Opcode.\n", conn->of_marker_offset); + + if (iscsit_dump_data_payload(conn, conn->of_marker_offset, 0) < 0) + return -1; + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_erl0.h b/drivers/target/iscsi/iscsi_target_erl0.h new file mode 100644 index 000000000000..21acc9a06376 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl0.h @@ -0,0 +1,15 @@ +#ifndef ISCSI_TARGET_ERL0_H +#define ISCSI_TARGET_ERL0_H + +extern void iscsit_set_dataout_sequence_values(struct iscsi_cmd *); +extern int iscsit_check_pre_dataout(struct iscsi_cmd *, unsigned char *); +extern int iscsit_check_post_dataout(struct iscsi_cmd *, unsigned char *, u8); +extern void iscsit_start_time2retain_handler(struct iscsi_session *); +extern int iscsit_stop_time2retain_timer(struct iscsi_session *); +extern void iscsit_connection_reinstatement_rcfr(struct iscsi_conn *); +extern void iscsit_cause_connection_reinstatement(struct iscsi_conn *, int); +extern void iscsit_fall_back_to_erl0(struct iscsi_session *); +extern void iscsit_take_action_for_connection_exit(struct iscsi_conn *); +extern int iscsit_recover_from_unknown_opcode(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_ERL0_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_erl1.c b/drivers/target/iscsi/iscsi_target_erl1.c new file mode 100644 index 000000000000..980650792cf6 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl1.c @@ -0,0 +1,1299 @@ +/******************************************************************************* + * This file contains error recovery level one used by the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/list.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target.h" + +#define OFFLOAD_BUF_SIZE 32768 + +/* + * Used to dump excess datain payload for certain error recovery + * situations. Receive in OFFLOAD_BUF_SIZE max of datain per rx_data(). + * + * dump_padding_digest denotes if padding and data digests need + * to be dumped. + */ +int iscsit_dump_data_payload( + struct iscsi_conn *conn, + u32 buf_len, + int dump_padding_digest) +{ + char *buf, pad_bytes[4]; + int ret = DATAOUT_WITHIN_COMMAND_RECOVERY, rx_got; + u32 length, padding, offset = 0, size; + struct kvec iov; + + length = (buf_len > OFFLOAD_BUF_SIZE) ? OFFLOAD_BUF_SIZE : buf_len; + + buf = kzalloc(length, GFP_ATOMIC); + if (!buf) { + pr_err("Unable to allocate %u bytes for offload" + " buffer.\n", length); + return -1; + } + memset(&iov, 0, sizeof(struct kvec)); + + while (offset < buf_len) { + size = ((offset + length) > buf_len) ? + (buf_len - offset) : length; + + iov.iov_len = size; + iov.iov_base = buf; + + rx_got = rx_data(conn, &iov, 1, size); + if (rx_got != size) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + + offset += size; + } + + if (!dump_padding_digest) + goto out; + + padding = ((-buf_len) & 3); + if (padding != 0) { + iov.iov_len = padding; + iov.iov_base = pad_bytes; + + rx_got = rx_data(conn, &iov, 1, padding); + if (rx_got != padding) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + } + + if (conn->conn_ops->DataDigest) { + u32 data_crc; + + iov.iov_len = ISCSI_CRC_LEN; + iov.iov_base = &data_crc; + + rx_got = rx_data(conn, &iov, 1, ISCSI_CRC_LEN); + if (rx_got != ISCSI_CRC_LEN) { + ret = DATAOUT_CANNOT_RECOVER; + goto out; + } + } + +out: + kfree(buf); + return ret; +} + +/* + * Used for retransmitting R2Ts from a R2T SNACK request. + */ +static int iscsit_send_recovery_r2t_for_snack( + struct iscsi_cmd *cmd, + struct iscsi_r2t *r2t) +{ + /* + * If the struct iscsi_r2t has not been sent yet, we can safely + * ignore retransmission + * of the R2TSN in question. + */ + spin_lock_bh(&cmd->r2t_lock); + if (!r2t->sent_r2t) { + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + r2t->sent_r2t = 0; + spin_unlock_bh(&cmd->r2t_lock); + + iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T); + + return 0; +} + +static int iscsit_handle_r2t_snack( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 begrun, + u32 runlength) +{ + u32 last_r2tsn; + struct iscsi_r2t *r2t; + + /* + * Make sure the initiator is not requesting retransmission + * of R2TSNs already acknowledged by a TMR TASK_REASSIGN. + */ + if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (begrun <= cmd->acked_data_sn)) { + pr_err("ITT: 0x%08x, R2T SNACK requesting" + " retransmission of R2TSN: 0x%08x to 0x%08x but already" + " acked to R2TSN: 0x%08x by TMR TASK_REASSIGN," + " protocol error.\n", cmd->init_task_tag, begrun, + (begrun + runlength), cmd->acked_data_sn); + + return iscsit_add_reject_from_cmd( + ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + if (runlength) { + if ((begrun + runlength) > cmd->r2t_sn) { + pr_err("Command ITT: 0x%08x received R2T SNACK" + " with BegRun: 0x%08x, RunLength: 0x%08x, exceeds" + " current R2TSN: 0x%08x, protocol error.\n", + cmd->init_task_tag, begrun, runlength, cmd->r2t_sn); + return iscsit_add_reject_from_cmd( + ISCSI_REASON_BOOKMARK_INVALID, 1, 0, buf, cmd); + } + last_r2tsn = (begrun + runlength); + } else + last_r2tsn = cmd->r2t_sn; + + while (begrun < last_r2tsn) { + r2t = iscsit_get_holder_for_r2tsn(cmd, begrun); + if (!r2t) + return -1; + if (iscsit_send_recovery_r2t_for_snack(cmd, r2t) < 0) + return -1; + + begrun++; + } + + return 0; +} + +/* + * Generates Offsets and NextBurstLength based on Begrun and Runlength + * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN. + * + * For DataSequenceInOrder=Yes and DataPDUInOrder=[Yes,No] only. + * + * FIXME: How is this handled for a RData SNACK? + */ +int iscsit_create_recovery_datain_values_datasequenceinorder_yes( + struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr) +{ + u32 data_sn = 0, data_sn_count = 0; + u32 pdu_start = 0, seq_no = 0; + u32 begrun = dr->begrun; + struct iscsi_conn *conn = cmd->conn; + + while (begrun > data_sn++) { + data_sn_count++; + if ((dr->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + dr->read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + dr->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + } else { + dr->read_data_done += + (conn->sess->sess_ops->MaxBurstLength - + dr->next_burst_len); + dr->next_burst_len = 0; + pdu_start += data_sn_count; + data_sn_count = 0; + seq_no++; + } + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + cmd->seq_no = seq_no; + cmd->pdu_start = pdu_start; + cmd->pdu_send_order = data_sn_count; + } + + return 0; +} + +/* + * Generates Offsets and NextBurstLength based on Begrun and Runlength + * carried in a Data SNACK or ExpDataSN in TMR TASK_REASSIGN. + * + * For DataSequenceInOrder=No and DataPDUInOrder=[Yes,No] only. + * + * FIXME: How is this handled for a RData SNACK? + */ +int iscsit_create_recovery_datain_values_datasequenceinorder_no( + struct iscsi_cmd *cmd, + struct iscsi_datain_req *dr) +{ + int found_seq = 0, i; + u32 data_sn, read_data_done = 0, seq_send_order = 0; + u32 begrun = dr->begrun; + u32 runlength = dr->runlength; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_seq *first_seq = NULL, *seq = NULL; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return -1; + } + + /* + * Calculate read_data_done for all sequences containing a + * first_datasn and last_datasn less than the BegRun. + * + * Locate the struct iscsi_seq the BegRun lies within and calculate + * NextBurstLenghth up to the DataSN based on MaxRecvDataSegmentLength. + * + * Also use struct iscsi_seq->seq_send_order to determine where to start. + */ + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + + if (!seq->seq_send_order) + first_seq = seq; + + /* + * No data has been transferred for this DataIN sequence, so the + * seq->first_datasn and seq->last_datasn have not been set. + */ + if (!seq->sent) { +#if 0 + pr_err("Ignoring non-sent sequence 0x%08x ->" + " 0x%08x\n\n", seq->first_datasn, + seq->last_datasn); +#endif + continue; + } + + /* + * This DataIN sequence is precedes the received BegRun, add the + * total xfer_len of the sequence to read_data_done and reset + * seq->pdu_send_order. + */ + if ((seq->first_datasn < begrun) && + (seq->last_datasn < begrun)) { +#if 0 + pr_err("Pre BegRun sequence 0x%08x ->" + " 0x%08x\n", seq->first_datasn, + seq->last_datasn); +#endif + read_data_done += cmd->seq_list[i].xfer_len; + seq->next_burst_len = seq->pdu_send_order = 0; + continue; + } + + /* + * The BegRun lies within this DataIN sequence. + */ + if ((seq->first_datasn <= begrun) && + (seq->last_datasn >= begrun)) { +#if 0 + pr_err("Found sequence begrun: 0x%08x in" + " 0x%08x -> 0x%08x\n", begrun, + seq->first_datasn, seq->last_datasn); +#endif + seq_send_order = seq->seq_send_order; + data_sn = seq->first_datasn; + seq->next_burst_len = seq->pdu_send_order = 0; + found_seq = 1; + + /* + * For DataPDUInOrder=Yes, while the first DataSN of + * the sequence is less than the received BegRun, add + * the MaxRecvDataSegmentLength to read_data_done and + * to the sequence's next_burst_len; + * + * For DataPDUInOrder=No, while the first DataSN of the + * sequence is less than the received BegRun, find the + * struct iscsi_pdu of the DataSN in question and add the + * MaxRecvDataSegmentLength to read_data_done and to the + * sequence's next_burst_len; + */ + if (conn->sess->sess_ops->DataPDUInOrder) { + while (data_sn < begrun) { + seq->pdu_send_order++; + read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + seq->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + data_sn++; + } + } else { + int j; + struct iscsi_pdu *pdu; + + while (data_sn < begrun) { + seq->pdu_send_order++; + + for (j = 0; j < seq->pdu_count; j++) { + pdu = &cmd->pdu_list[ + seq->pdu_start + j]; + if (pdu->data_sn == data_sn) { + read_data_done += + pdu->length; + seq->next_burst_len += + pdu->length; + } + } + data_sn++; + } + } + continue; + } + + /* + * This DataIN sequence is larger than the received BegRun, + * reset seq->pdu_send_order and continue. + */ + if ((seq->first_datasn > begrun) || + (seq->last_datasn > begrun)) { +#if 0 + pr_err("Post BegRun sequence 0x%08x -> 0x%08x\n", + seq->first_datasn, seq->last_datasn); +#endif + seq->next_burst_len = seq->pdu_send_order = 0; + continue; + } + } + + if (!found_seq) { + if (!begrun) { + if (!first_seq) { + pr_err("ITT: 0x%08x, Begrun: 0x%08x" + " but first_seq is NULL\n", + cmd->init_task_tag, begrun); + return -1; + } + seq_send_order = first_seq->seq_send_order; + seq->next_burst_len = seq->pdu_send_order = 0; + goto done; + } + + pr_err("Unable to locate struct iscsi_seq for ITT: 0x%08x," + " BegRun: 0x%08x, RunLength: 0x%08x while" + " DataSequenceInOrder=No and DataPDUInOrder=%s.\n", + cmd->init_task_tag, begrun, runlength, + (conn->sess->sess_ops->DataPDUInOrder) ? "Yes" : "No"); + return -1; + } + +done: + dr->read_data_done = read_data_done; + dr->seq_send_order = seq_send_order; + + return 0; +} + +static int iscsit_handle_recovery_datain( + struct iscsi_cmd *cmd, + unsigned char *buf, + u32 begrun, + u32 runlength) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct se_cmd *se_cmd = &cmd->se_cmd; + + if (!atomic_read(&se_cmd->t_transport_complete)) { + pr_err("Ignoring ITT: 0x%08x Data SNACK\n", + cmd->init_task_tag); + return 0; + } + + /* + * Make sure the initiator is not requesting retransmission + * of DataSNs already acknowledged by a Data ACK SNACK. + */ + if ((cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (begrun <= cmd->acked_data_sn)) { + pr_err("ITT: 0x%08x, Data SNACK requesting" + " retransmission of DataSN: 0x%08x to 0x%08x but" + " already acked to DataSN: 0x%08x by Data ACK SNACK," + " protocol error.\n", cmd->init_task_tag, begrun, + (begrun + runlength), cmd->acked_data_sn); + + return iscsit_add_reject_from_cmd(ISCSI_REASON_PROTOCOL_ERROR, + 1, 0, buf, cmd); + } + + /* + * Make sure BegRun and RunLength in the Data SNACK are sane. + * Note: (cmd->data_sn - 1) will carry the maximum DataSN sent. + */ + if ((begrun + runlength) > (cmd->data_sn - 1)) { + pr_err("Initiator requesting BegRun: 0x%08x, RunLength" + ": 0x%08x greater than maximum DataSN: 0x%08x.\n", + begrun, runlength, (cmd->data_sn - 1)); + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_INVALID, + 1, 0, buf, cmd); + } + + dr = iscsit_allocate_datain_req(); + if (!dr) + return iscsit_add_reject_from_cmd(ISCSI_REASON_BOOKMARK_NO_RESOURCES, + 1, 0, buf, cmd); + + dr->data_sn = dr->begrun = begrun; + dr->runlength = runlength; + dr->generate_recovery_values = 1; + dr->recovery = DATAIN_WITHIN_COMMAND_RECOVERY; + + iscsit_attach_datain_req(cmd, dr); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + + return 0; +} + +int iscsit_handle_recovery_datain_or_r2t( + struct iscsi_conn *conn, + unsigned char *buf, + u32 init_task_tag, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd; + + cmd = iscsit_find_cmd_from_itt(conn, init_task_tag); + if (!cmd) + return 0; + + /* + * FIXME: This will not work for bidi commands. + */ + switch (cmd->data_direction) { + case DMA_TO_DEVICE: + return iscsit_handle_r2t_snack(cmd, buf, begrun, runlength); + case DMA_FROM_DEVICE: + return iscsit_handle_recovery_datain(cmd, buf, begrun, + runlength); + default: + pr_err("Unknown cmd->data_direction: 0x%02x\n", + cmd->data_direction); + return -1; + } + + return 0; +} + +/* #warning FIXME: Status SNACK needs to be dependent on OPCODE!!! */ +int iscsit_handle_status_snack( + struct iscsi_conn *conn, + u32 init_task_tag, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd = NULL; + u32 last_statsn; + int found_cmd; + + if (conn->exp_statsn > begrun) { + pr_err("Got Status SNACK Begrun: 0x%08x, RunLength:" + " 0x%08x but already got ExpStatSN: 0x%08x on CID:" + " %hu.\n", begrun, runlength, conn->exp_statsn, + conn->cid); + return 0; + } + + last_statsn = (!runlength) ? conn->stat_sn : (begrun + runlength); + + while (begrun < last_statsn) { + found_cmd = 0; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->stat_sn == begrun) { + found_cmd = 1; + break; + } + } + spin_unlock_bh(&conn->cmd_lock); + + if (!found_cmd) { + pr_err("Unable to find StatSN: 0x%08x for" + " a Status SNACK, assuming this was a" + " protactic SNACK for an untransmitted" + " StatSN, ignoring.\n", begrun); + begrun++; + continue; + } + + spin_lock_bh(&cmd->istate_lock); + if (cmd->i_state == ISTATE_SEND_DATAIN) { + spin_unlock_bh(&cmd->istate_lock); + pr_err("Ignoring Status SNACK for BegRun:" + " 0x%08x, RunLength: 0x%08x, assuming this was" + " a protactic SNACK for an untransmitted" + " StatSN\n", begrun, runlength); + begrun++; + continue; + } + spin_unlock_bh(&cmd->istate_lock); + + cmd->i_state = ISTATE_SEND_STATUS_RECOVERY; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + begrun++; + } + + return 0; +} + +int iscsit_handle_data_ack( + struct iscsi_conn *conn, + u32 targ_xfer_tag, + u32 begrun, + u32 runlength) +{ + struct iscsi_cmd *cmd = NULL; + + cmd = iscsit_find_cmd_from_ttt(conn, targ_xfer_tag); + if (!cmd) { + pr_err("Data ACK SNACK for TTT: 0x%08x is" + " invalid.\n", targ_xfer_tag); + return -1; + } + + if (begrun <= cmd->acked_data_sn) { + pr_err("ITT: 0x%08x Data ACK SNACK BegRUN: 0x%08x is" + " less than the already acked DataSN: 0x%08x.\n", + cmd->init_task_tag, begrun, cmd->acked_data_sn); + return -1; + } + + /* + * For Data ACK SNACK, BegRun is the next expected DataSN. + * (see iSCSI v19: 10.16.6) + */ + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (begrun - 1); + + pr_debug("Received Data ACK SNACK for ITT: 0x%08x," + " updated acked DataSN to 0x%08x.\n", + cmd->init_task_tag, cmd->acked_data_sn); + + return 0; +} + +static int iscsit_send_recovery_r2t( + struct iscsi_cmd *cmd, + u32 offset, + u32 xfer_len) +{ + int ret; + + spin_lock_bh(&cmd->r2t_lock); + ret = iscsit_add_r2t_to_list(cmd, offset, xfer_len, 1, 0); + spin_unlock_bh(&cmd->r2t_lock); + + return ret; +} + +int iscsit_dataout_datapduinorder_no_fbit( + struct iscsi_cmd *cmd, + struct iscsi_pdu *pdu) +{ + int i, send_recovery_r2t = 0, recovery = 0; + u32 length = 0, offset = 0, pdu_count = 0, xfer_len = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *first_pdu = NULL; + + /* + * Get an struct iscsi_pdu pointer to the first PDU, and total PDU count + * of the DataOUT sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + for (i = 0; i < cmd->pdu_count; i++) { + if (cmd->pdu_list[i].seq_no == pdu->seq_no) { + if (!first_pdu) + first_pdu = &cmd->pdu_list[i]; + xfer_len += cmd->pdu_list[i].length; + pdu_count++; + } else if (pdu_count) + break; + } + } else { + struct iscsi_seq *seq = cmd->seq_ptr; + + first_pdu = &cmd->pdu_list[seq->pdu_start]; + pdu_count = seq->pdu_count; + } + + if (!first_pdu || !pdu_count) + return DATAOUT_CANNOT_RECOVER; + + /* + * Loop through the ending DataOUT Sequence checking each struct iscsi_pdu. + * The following ugly logic does batching of not received PDUs. + */ + for (i = 0; i < pdu_count; i++) { + if (first_pdu[i].status == ISCSI_PDU_RECEIVED_OK) { + if (!send_recovery_r2t) + continue; + + if (iscsit_send_recovery_r2t(cmd, offset, length) < 0) + return DATAOUT_CANNOT_RECOVER; + + send_recovery_r2t = length = offset = 0; + continue; + } + /* + * Set recovery = 1 for any missing, CRC failed, or timed + * out PDUs to let the DataOUT logic know that this sequence + * has not been completed yet. + * + * Also, only send a Recovery R2T for ISCSI_PDU_NOT_RECEIVED. + * We assume if the PDU either failed CRC or timed out + * that a Recovery R2T has already been sent. + */ + recovery = 1; + + if (first_pdu[i].status != ISCSI_PDU_NOT_RECEIVED) + continue; + + if (!offset) + offset = first_pdu[i].offset; + length += first_pdu[i].length; + + send_recovery_r2t = 1; + } + + if (send_recovery_r2t) + if (iscsit_send_recovery_r2t(cmd, offset, length) < 0) + return DATAOUT_CANNOT_RECOVER; + + return (!recovery) ? DATAOUT_NORMAL : DATAOUT_WITHIN_COMMAND_RECOVERY; +} + +static int iscsit_recalculate_dataout_values( + struct iscsi_cmd *cmd, + u32 pdu_offset, + u32 pdu_length, + u32 *r2t_offset, + u32 *r2t_length) +{ + int i; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + cmd->data_sn = 0; + + if (conn->sess->sess_ops->DataPDUInOrder) { + *r2t_offset = cmd->write_data_done; + *r2t_length = (cmd->seq_end_offset - + cmd->write_data_done); + return 0; + } + + *r2t_offset = cmd->seq_start_offset; + *r2t_length = (cmd->seq_end_offset - cmd->seq_start_offset); + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= cmd->seq_start_offset) && + ((pdu->offset + pdu->length) <= + cmd->seq_end_offset)) { + if (!cmd->unsolicited_data) + cmd->next_burst_len -= pdu->length; + else + cmd->first_burst_len -= pdu->length; + + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } else { + struct iscsi_seq *seq = NULL; + + seq = iscsit_get_seq_holder(cmd, pdu_offset, pdu_length); + if (!seq) + return -1; + + *r2t_offset = seq->orig_offset; + *r2t_length = seq->xfer_len; + + cmd->write_data_done -= (seq->offset - seq->orig_offset); + if (cmd->immediate_data) + cmd->first_burst_len = cmd->write_data_done; + + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataPDUInOrder) + return 0; + + for (i = 0; i < seq->pdu_count; i++) { + pdu = &cmd->pdu_list[i+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + + return 0; +} + +int iscsit_recover_dataout_sequence( + struct iscsi_cmd *cmd, + u32 pdu_offset, + u32 pdu_length) +{ + u32 r2t_length = 0, r2t_offset = 0; + + spin_lock_bh(&cmd->istate_lock); + cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY; + spin_unlock_bh(&cmd->istate_lock); + + if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length, + &r2t_offset, &r2t_length) < 0) + return DATAOUT_CANNOT_RECOVER; + + iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length); + + return DATAOUT_WITHIN_COMMAND_RECOVERY; +} + +static struct iscsi_ooo_cmdsn *iscsit_allocate_ooo_cmdsn(void) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL; + + ooo_cmdsn = kmem_cache_zalloc(lio_ooo_cache, GFP_ATOMIC); + if (!ooo_cmdsn) { + pr_err("Unable to allocate memory for" + " struct iscsi_ooo_cmdsn.\n"); + return NULL; + } + INIT_LIST_HEAD(&ooo_cmdsn->ooo_list); + + return ooo_cmdsn; +} + +/* + * Called with sess->cmdsn_mutex held. + */ +static int iscsit_attach_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_ooo_cmdsn *ooo_cmdsn) +{ + struct iscsi_ooo_cmdsn *ooo_tail, *ooo_tmp; + /* + * We attach the struct iscsi_ooo_cmdsn entry to the out of order + * list in increasing CmdSN order. + * This allows iscsi_execute_ooo_cmdsns() to detect any + * additional CmdSN holes while performing delayed execution. + */ + if (list_empty(&sess->sess_ooo_cmdsn_list)) + list_add_tail(&ooo_cmdsn->ooo_list, + &sess->sess_ooo_cmdsn_list); + else { + ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev, + typeof(*ooo_tail), ooo_list); + /* + * CmdSN is greater than the tail of the list. + */ + if (ooo_tail->cmdsn < ooo_cmdsn->cmdsn) + list_add_tail(&ooo_cmdsn->ooo_list, + &sess->sess_ooo_cmdsn_list); + else { + /* + * CmdSN is either lower than the head, or somewhere + * in the middle. + */ + list_for_each_entry(ooo_tmp, &sess->sess_ooo_cmdsn_list, + ooo_list) { + while (ooo_tmp->cmdsn < ooo_cmdsn->cmdsn) + continue; + + list_add(&ooo_cmdsn->ooo_list, + &ooo_tmp->ooo_list); + break; + } + } + } + + return 0; +} + +/* + * Removes an struct iscsi_ooo_cmdsn from a session's list, + * called with struct iscsi_session->cmdsn_mutex held. + */ +void iscsit_remove_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_ooo_cmdsn *ooo_cmdsn) +{ + list_del(&ooo_cmdsn->ooo_list); + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); +} + +void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *conn) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn; + struct iscsi_session *sess = conn->sess; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry(ooo_cmdsn, &sess->sess_ooo_cmdsn_list, ooo_list) { + if (ooo_cmdsn->cid != conn->cid) + continue; + + ooo_cmdsn->cmd = NULL; + } + mutex_unlock(&sess->cmdsn_mutex); +} + +/* + * Called with sess->cmdsn_mutex held. + */ +int iscsit_execute_ooo_cmdsns(struct iscsi_session *sess) +{ + int ooo_count = 0; + struct iscsi_cmd *cmd = NULL; + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + if (ooo_cmdsn->cmdsn != sess->exp_cmd_sn) + continue; + + if (!ooo_cmdsn->cmd) { + sess->exp_cmd_sn++; + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + continue; + } + + cmd = ooo_cmdsn->cmd; + cmd->i_state = cmd->deferred_i_state; + ooo_count++; + sess->exp_cmd_sn++; + pr_debug("Executing out of order CmdSN: 0x%08x," + " incremented ExpCmdSN to 0x%08x.\n", + cmd->cmd_sn, sess->exp_cmd_sn); + + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + + if (iscsit_execute_cmd(cmd, 1) < 0) + return -1; + + continue; + } + + return ooo_count; +} + +/* + * Called either: + * + * 1. With sess->cmdsn_mutex held from iscsi_execute_ooo_cmdsns() + * or iscsi_check_received_cmdsn(). + * 2. With no locks held directly from iscsi_handle_XXX_pdu() functions + * for immediate commands. + */ +int iscsit_execute_cmd(struct iscsi_cmd *cmd, int ooo) +{ + struct se_cmd *se_cmd = &cmd->se_cmd; + int lr = 0; + + spin_lock_bh(&cmd->istate_lock); + if (ooo) + cmd->cmd_flags &= ~ICF_OOO_CMDSN; + + switch (cmd->iscsi_opcode) { + case ISCSI_OP_SCSI_CMD: + /* + * Go ahead and send the CHECK_CONDITION status for + * any SCSI CDB exceptions that may have occurred, also + * handle the SCF_SCSI_RESERVATION_CONFLICT case here as well. + */ + if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) { + if (se_cmd->se_cmd_flags & + SCF_SCSI_RESERVATION_CONFLICT) { + cmd->i_state = ISTATE_SEND_STATUS; + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, + cmd->i_state); + return 0; + } + spin_unlock_bh(&cmd->istate_lock); + /* + * Determine if delayed TASK_ABORTED status for WRITEs + * should be sent now if no unsolicited data out + * payloads are expected, or if the delayed status + * should be sent after unsolicited data out with + * ISCSI_FLAG_CMD_FINAL set in iscsi_handle_data_out() + */ + if (transport_check_aborted_status(se_cmd, + (cmd->unsolicited_data == 0)) != 0) + return 0; + /* + * Otherwise send CHECK_CONDITION and sense for + * exception + */ + return transport_send_check_condition_and_sense(se_cmd, + se_cmd->scsi_sense_reason, 0); + } + /* + * Special case for delayed CmdSN with Immediate + * Data and/or Unsolicited Data Out attached. + */ + if (cmd->immediate_data) { + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + spin_unlock_bh(&cmd->istate_lock); + return transport_generic_handle_data( + &cmd->se_cmd); + } + spin_unlock_bh(&cmd->istate_lock); + + if (!(cmd->cmd_flags & + ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { + /* + * Send the delayed TASK_ABORTED status for + * WRITEs if no more unsolicitied data is + * expected. + */ + if (transport_check_aborted_status(se_cmd, 1) + != 0) + return 0; + + iscsit_set_dataout_sequence_values(cmd); + iscsit_build_r2ts_for_cmd(cmd, cmd->conn, 0); + } + return 0; + } + /* + * The default handler. + */ + spin_unlock_bh(&cmd->istate_lock); + + if ((cmd->data_direction == DMA_TO_DEVICE) && + !(cmd->cmd_flags & ICF_NON_IMMEDIATE_UNSOLICITED_DATA)) { + /* + * Send the delayed TASK_ABORTED status for WRITEs if + * no more nsolicitied data is expected. + */ + if (transport_check_aborted_status(se_cmd, 1) != 0) + return 0; + + iscsit_set_dataout_sequence_values(cmd); + spin_lock_bh(&cmd->dataout_timeout_lock); + iscsit_start_dataout_timer(cmd, cmd->conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + } + return transport_handle_cdb_direct(&cmd->se_cmd); + + case ISCSI_OP_NOOP_OUT: + case ISCSI_OP_TEXT: + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, cmd->i_state); + break; + case ISCSI_OP_SCSI_TMFUNC: + if (se_cmd->se_cmd_flags & SCF_SCSI_CDB_EXCEPTION) { + spin_unlock_bh(&cmd->istate_lock); + iscsit_add_cmd_to_response_queue(cmd, cmd->conn, + cmd->i_state); + return 0; + } + spin_unlock_bh(&cmd->istate_lock); + + return transport_generic_handle_tmr(&cmd->se_cmd); + case ISCSI_OP_LOGOUT: + spin_unlock_bh(&cmd->istate_lock); + switch (cmd->logout_reason) { + case ISCSI_LOGOUT_REASON_CLOSE_SESSION: + lr = iscsit_logout_closesession(cmd, cmd->conn); + break; + case ISCSI_LOGOUT_REASON_CLOSE_CONNECTION: + lr = iscsit_logout_closeconnection(cmd, cmd->conn); + break; + case ISCSI_LOGOUT_REASON_RECOVERY: + lr = iscsit_logout_removeconnforrecovery(cmd, cmd->conn); + break; + default: + pr_err("Unknown iSCSI Logout Request Code:" + " 0x%02x\n", cmd->logout_reason); + return -1; + } + + return lr; + default: + spin_unlock_bh(&cmd->istate_lock); + pr_err("Cannot perform out of order execution for" + " unknown iSCSI Opcode: 0x%02x\n", cmd->iscsi_opcode); + return -1; + } + + return 0; +} + +void iscsit_free_all_ooo_cmdsns(struct iscsi_session *sess) +{ + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + + list_del(&ooo_cmdsn->ooo_list); + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); + } + mutex_unlock(&sess->cmdsn_mutex); +} + +int iscsit_handle_ooo_cmdsn( + struct iscsi_session *sess, + struct iscsi_cmd *cmd, + u32 cmdsn) +{ + int batch = 0; + struct iscsi_ooo_cmdsn *ooo_cmdsn = NULL, *ooo_tail = NULL; + + cmd->deferred_i_state = cmd->i_state; + cmd->i_state = ISTATE_DEFERRED_CMD; + cmd->cmd_flags |= ICF_OOO_CMDSN; + + if (list_empty(&sess->sess_ooo_cmdsn_list)) + batch = 1; + else { + ooo_tail = list_entry(sess->sess_ooo_cmdsn_list.prev, + typeof(*ooo_tail), ooo_list); + if (ooo_tail->cmdsn != (cmdsn - 1)) + batch = 1; + } + + ooo_cmdsn = iscsit_allocate_ooo_cmdsn(); + if (!ooo_cmdsn) + return CMDSN_ERROR_CANNOT_RECOVER; + + ooo_cmdsn->cmd = cmd; + ooo_cmdsn->batch_count = (batch) ? + (cmdsn - sess->exp_cmd_sn) : 1; + ooo_cmdsn->cid = cmd->conn->cid; + ooo_cmdsn->exp_cmdsn = sess->exp_cmd_sn; + ooo_cmdsn->cmdsn = cmdsn; + + if (iscsit_attach_ooo_cmdsn(sess, ooo_cmdsn) < 0) { + kmem_cache_free(lio_ooo_cache, ooo_cmdsn); + return CMDSN_ERROR_CANNOT_RECOVER; + } + + return CMDSN_HIGHER_THAN_EXP; +} + +static int iscsit_set_dataout_timeout_values( + struct iscsi_cmd *cmd, + u32 *offset, + u32 *length) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_r2t *r2t; + + if (cmd->unsolicited_data) { + *offset = 0; + *length = (conn->sess->sess_ops->FirstBurstLength > + cmd->data_length) ? + cmd->data_length : + conn->sess->sess_ops->FirstBurstLength; + return 0; + } + + spin_lock_bh(&cmd->r2t_lock); + if (list_empty(&cmd->cmd_r2t_list)) { + pr_err("cmd->cmd_r2t_list is empty!\n"); + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (r2t->sent_r2t && !r2t->recovery_r2t && !r2t->seq_complete) { + *offset = r2t->offset; + *length = r2t->xfer_len; + spin_unlock_bh(&cmd->r2t_lock); + return 0; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate any incomplete DataOUT" + " sequences for ITT: 0x%08x.\n", cmd->init_task_tag); + + return -1; +} + +/* + * NOTE: Called from interrupt (timer) context. + */ +static void iscsit_handle_dataout_timeout(unsigned long data) +{ + u32 pdu_length = 0, pdu_offset = 0; + u32 r2t_length = 0, r2t_offset = 0; + struct iscsi_cmd *cmd = (struct iscsi_cmd *) data; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = NULL; + struct iscsi_node_attrib *na; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&cmd->dataout_timeout_lock); + if (cmd->dataout_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING; + sess = conn->sess; + na = iscsit_tpg_get_node_attrib(sess); + + if (!sess->sess_ops->ErrorRecoveryLevel) { + pr_debug("Unable to recover from DataOut timeout while" + " in ERL=0.\n"); + goto failure; + } + + if (++cmd->dataout_timeout_retries == na->dataout_timeout_retries) { + pr_debug("Command ITT: 0x%08x exceeded max retries" + " for DataOUT timeout %u, closing iSCSI connection.\n", + cmd->init_task_tag, na->dataout_timeout_retries); + goto failure; + } + + cmd->cmd_flags |= ICF_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (conn->sess->sess_ops->DataPDUInOrder) { + pdu_offset = cmd->write_data_done; + if ((pdu_offset + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len)) > cmd->data_length) + pdu_length = (cmd->data_length - + cmd->write_data_done); + else + pdu_length = (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len); + } else { + pdu_offset = cmd->seq_start_offset; + pdu_length = (cmd->seq_end_offset - + cmd->seq_start_offset); + } + } else { + if (iscsit_set_dataout_timeout_values(cmd, &pdu_offset, + &pdu_length) < 0) + goto failure; + } + + if (iscsit_recalculate_dataout_values(cmd, pdu_offset, pdu_length, + &r2t_offset, &r2t_length) < 0) + goto failure; + + pr_debug("Command ITT: 0x%08x timed out waiting for" + " completion of %sDataOUT Sequence Offset: %u, Length: %u\n", + cmd->init_task_tag, (cmd->unsolicited_data) ? "Unsolicited " : + "", r2t_offset, r2t_length); + + if (iscsit_send_recovery_r2t(cmd, r2t_offset, r2t_length) < 0) + goto failure; + + iscsit_start_dataout_timer(cmd, conn); + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_dec_conn_usage_count(conn); + + return; + +failure: + spin_unlock_bh(&cmd->dataout_timeout_lock); + iscsit_cause_connection_reinstatement(conn, 0); + iscsit_dec_conn_usage_count(conn); +} + +void iscsit_mod_dataout_timer(struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&cmd->dataout_timeout_lock); + if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + return; + } + + mod_timer(&cmd->dataout_timer, + (get_jiffies_64() + na->dataout_timeout * HZ)); + pr_debug("Updated DataOUT timer for ITT: 0x%08x", + cmd->init_task_tag); + spin_unlock_bh(&cmd->dataout_timeout_lock); +} + +/* + * Called with cmd->dataout_timeout_lock held. + */ +void iscsit_start_dataout_timer( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = na = iscsit_tpg_get_node_attrib(sess); + + if (cmd->dataout_timer_flags & ISCSI_TF_RUNNING) + return; + + pr_debug("Starting DataOUT timer for ITT: 0x%08x on" + " CID: %hu.\n", cmd->init_task_tag, conn->cid); + + init_timer(&cmd->dataout_timer); + cmd->dataout_timer.expires = (get_jiffies_64() + na->dataout_timeout * HZ); + cmd->dataout_timer.data = (unsigned long)cmd; + cmd->dataout_timer.function = iscsit_handle_dataout_timeout; + cmd->dataout_timer_flags &= ~ISCSI_TF_STOP; + cmd->dataout_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&cmd->dataout_timer); +} + +void iscsit_stop_dataout_timer(struct iscsi_cmd *cmd) +{ + spin_lock_bh(&cmd->dataout_timeout_lock); + if (!(cmd->dataout_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&cmd->dataout_timeout_lock); + return; + } + cmd->dataout_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&cmd->dataout_timeout_lock); + + del_timer_sync(&cmd->dataout_timer); + + spin_lock_bh(&cmd->dataout_timeout_lock); + cmd->dataout_timer_flags &= ~ISCSI_TF_RUNNING; + pr_debug("Stopped DataOUT Timer for ITT: 0x%08x\n", + cmd->init_task_tag); + spin_unlock_bh(&cmd->dataout_timeout_lock); +} diff --git a/drivers/target/iscsi/iscsi_target_erl1.h b/drivers/target/iscsi/iscsi_target_erl1.h new file mode 100644 index 000000000000..85e67e29de6b --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl1.h @@ -0,0 +1,26 @@ +#ifndef ISCSI_TARGET_ERL1_H +#define ISCSI_TARGET_ERL1_H + +extern int iscsit_dump_data_payload(struct iscsi_conn *, u32, int); +extern int iscsit_create_recovery_datain_values_datasequenceinorder_yes( + struct iscsi_cmd *, struct iscsi_datain_req *); +extern int iscsit_create_recovery_datain_values_datasequenceinorder_no( + struct iscsi_cmd *, struct iscsi_datain_req *); +extern int iscsit_handle_recovery_datain_or_r2t(struct iscsi_conn *, unsigned char *, + u32, u32, u32, u32); +extern int iscsit_handle_status_snack(struct iscsi_conn *, u32, u32, + u32, u32); +extern int iscsit_handle_data_ack(struct iscsi_conn *, u32, u32, u32); +extern int iscsit_dataout_datapduinorder_no_fbit(struct iscsi_cmd *, struct iscsi_pdu *); +extern int iscsit_recover_dataout_sequence(struct iscsi_cmd *, u32, u32); +extern void iscsit_clear_ooo_cmdsns_for_conn(struct iscsi_conn *); +extern void iscsit_free_all_ooo_cmdsns(struct iscsi_session *); +extern int iscsit_execute_ooo_cmdsns(struct iscsi_session *); +extern int iscsit_execute_cmd(struct iscsi_cmd *, int); +extern int iscsit_handle_ooo_cmdsn(struct iscsi_session *, struct iscsi_cmd *, u32); +extern void iscsit_remove_ooo_cmdsn(struct iscsi_session *, struct iscsi_ooo_cmdsn *); +extern void iscsit_mod_dataout_timer(struct iscsi_cmd *); +extern void iscsit_start_dataout_timer(struct iscsi_cmd *, struct iscsi_conn *); +extern void iscsit_stop_dataout_timer(struct iscsi_cmd *); + +#endif /* ISCSI_TARGET_ERL1_H */ diff --git a/drivers/target/iscsi/iscsi_target_erl2.c b/drivers/target/iscsi/iscsi_target_erl2.c new file mode 100644 index 000000000000..91a4d170bda4 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl2.c @@ -0,0 +1,474 @@ +/******************************************************************************* + * This file contains error recovery level two functions used by + * the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_util.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target.h" + +/* + * FIXME: Does RData SNACK apply here as well? + */ +void iscsit_create_conn_recovery_datain_values( + struct iscsi_cmd *cmd, + u32 exp_data_sn) +{ + u32 data_sn = 0; + struct iscsi_conn *conn = cmd->conn; + + cmd->next_burst_len = 0; + cmd->read_data_done = 0; + + while (exp_data_sn > data_sn) { + if ((cmd->next_burst_len + + conn->conn_ops->MaxRecvDataSegmentLength) < + conn->sess->sess_ops->MaxBurstLength) { + cmd->read_data_done += + conn->conn_ops->MaxRecvDataSegmentLength; + cmd->next_burst_len += + conn->conn_ops->MaxRecvDataSegmentLength; + } else { + cmd->read_data_done += + (conn->sess->sess_ops->MaxBurstLength - + cmd->next_burst_len); + cmd->next_burst_len = 0; + } + data_sn++; + } +} + +void iscsit_create_conn_recovery_dataout_values( + struct iscsi_cmd *cmd) +{ + u32 write_data_done = 0; + struct iscsi_conn *conn = cmd->conn; + + cmd->data_sn = 0; + cmd->next_burst_len = 0; + + while (cmd->write_data_done > write_data_done) { + if ((write_data_done + conn->sess->sess_ops->MaxBurstLength) <= + cmd->write_data_done) + write_data_done += conn->sess->sess_ops->MaxBurstLength; + else + break; + } + + cmd->write_data_done = write_data_done; +} + +static int iscsit_attach_active_connection_recovery_entry( + struct iscsi_session *sess, + struct iscsi_conn_recovery *cr) +{ + spin_lock(&sess->cr_a_lock); + list_add_tail(&cr->cr_list, &sess->cr_active_list); + spin_unlock(&sess->cr_a_lock); + + return 0; +} + +static int iscsit_attach_inactive_connection_recovery_entry( + struct iscsi_session *sess, + struct iscsi_conn_recovery *cr) +{ + spin_lock(&sess->cr_i_lock); + list_add_tail(&cr->cr_list, &sess->cr_inactive_list); + + sess->conn_recovery_count++; + pr_debug("Incremented connection recovery count to %u for" + " SID: %u\n", sess->conn_recovery_count, sess->sid); + spin_unlock(&sess->cr_i_lock); + + return 0; +} + +struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry( + struct iscsi_session *sess, + u16 cid) +{ + struct iscsi_conn_recovery *cr; + + spin_lock(&sess->cr_i_lock); + list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) { + if (cr->cid == cid) { + spin_unlock(&sess->cr_i_lock); + return cr; + } + } + spin_unlock(&sess->cr_i_lock); + + return NULL; +} + +void iscsit_free_connection_recovery_entires(struct iscsi_session *sess) +{ + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_conn_recovery *cr, *cr_tmp; + + spin_lock(&sess->cr_a_lock); + list_for_each_entry_safe(cr, cr_tmp, &sess->cr_active_list, cr_list) { + list_del(&cr->cr_list); + spin_unlock(&sess->cr_a_lock); + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + list_del(&cmd->i_list); + cmd->conn = NULL; + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_lock(&sess->cr_a_lock); + + kfree(cr); + } + spin_unlock(&sess->cr_a_lock); + + spin_lock(&sess->cr_i_lock); + list_for_each_entry_safe(cr, cr_tmp, &sess->cr_inactive_list, cr_list) { + list_del(&cr->cr_list); + spin_unlock(&sess->cr_i_lock); + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + list_del(&cmd->i_list); + cmd->conn = NULL; + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_lock(&sess->cr_i_lock); + + kfree(cr); + } + spin_unlock(&sess->cr_i_lock); +} + +int iscsit_remove_active_connection_recovery_entry( + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + spin_lock(&sess->cr_a_lock); + list_del(&cr->cr_list); + + sess->conn_recovery_count--; + pr_debug("Decremented connection recovery count to %u for" + " SID: %u\n", sess->conn_recovery_count, sess->sid); + spin_unlock(&sess->cr_a_lock); + + kfree(cr); + + return 0; +} + +int iscsit_remove_inactive_connection_recovery_entry( + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + spin_lock(&sess->cr_i_lock); + list_del(&cr->cr_list); + spin_unlock(&sess->cr_i_lock); + + return 0; +} + +/* + * Called with cr->conn_recovery_cmd_lock help. + */ +int iscsit_remove_cmd_from_connection_recovery( + struct iscsi_cmd *cmd, + struct iscsi_session *sess) +{ + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + BUG(); + } + cr = cmd->cr; + + list_del(&cmd->i_list); + return --cr->cmd_count; +} + +void iscsit_discard_cr_cmds_by_expstatsn( + struct iscsi_conn_recovery *cr, + u32 exp_statsn) +{ + u32 dropped_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_session *sess = cr->sess; + + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, + &cr->conn_recovery_cmd_list, i_list) { + + if (((cmd->deferred_i_state != ISTATE_SENT_STATUS) && + (cmd->deferred_i_state != ISTATE_REMOVE)) || + (cmd->stat_sn >= exp_statsn)) { + continue; + } + + dropped_count++; + pr_debug("Dropping Acknowledged ITT: 0x%08x, StatSN:" + " 0x%08x, CID: %hu.\n", cmd->init_task_tag, + cmd->stat_sn, cr->cid); + + iscsit_remove_cmd_from_connection_recovery(cmd, sess); + + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 0); + spin_lock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&cr->conn_recovery_cmd_lock); + + pr_debug("Dropped %u total acknowledged commands on" + " CID: %hu less than old ExpStatSN: 0x%08x\n", + dropped_count, cr->cid, exp_statsn); + + if (!cr->cmd_count) { + pr_debug("No commands to be reassigned for failed" + " connection CID: %hu on SID: %u\n", + cr->cid, sess->sid); + iscsit_remove_inactive_connection_recovery_entry(cr, sess); + iscsit_attach_active_connection_recovery_entry(sess, cr); + pr_debug("iSCSI connection recovery successful for CID:" + " %hu on SID: %u\n", cr->cid, sess->sid); + iscsit_remove_active_connection_recovery_entry(cr, sess); + } else { + iscsit_remove_inactive_connection_recovery_entry(cr, sess); + iscsit_attach_active_connection_recovery_entry(sess, cr); + } +} + +int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *conn) +{ + u32 dropped_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_ooo_cmdsn *ooo_cmdsn, *ooo_cmdsn_tmp; + struct iscsi_session *sess = conn->sess; + + mutex_lock(&sess->cmdsn_mutex); + list_for_each_entry_safe(ooo_cmdsn, ooo_cmdsn_tmp, + &sess->sess_ooo_cmdsn_list, ooo_list) { + + if (ooo_cmdsn->cid != conn->cid) + continue; + + dropped_count++; + pr_debug("Dropping unacknowledged CmdSN:" + " 0x%08x during connection recovery on CID: %hu\n", + ooo_cmdsn->cmdsn, conn->cid); + iscsit_remove_ooo_cmdsn(sess, ooo_cmdsn); + } + mutex_unlock(&sess->cmdsn_mutex); + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + if (!(cmd->cmd_flags & ICF_OOO_CMDSN)) + continue; + + list_del(&cmd->i_list); + + spin_unlock_bh(&conn->cmd_lock); + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock_bh(&conn->cmd_lock); + } + spin_unlock_bh(&conn->cmd_lock); + + pr_debug("Dropped %u total unacknowledged commands on CID:" + " %hu for ExpCmdSN: 0x%08x.\n", dropped_count, conn->cid, + sess->exp_cmd_sn); + return 0; +} + +int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *conn) +{ + u32 cmd_count = 0; + struct iscsi_cmd *cmd, *cmd_tmp; + struct iscsi_conn_recovery *cr; + + /* + * Allocate an struct iscsi_conn_recovery for this connection. + * Each struct iscsi_cmd contains an struct iscsi_conn_recovery pointer + * (struct iscsi_cmd->cr) so we need to allocate this before preparing the + * connection's command list for connection recovery. + */ + cr = kzalloc(sizeof(struct iscsi_conn_recovery), GFP_KERNEL); + if (!cr) { + pr_err("Unable to allocate memory for" + " struct iscsi_conn_recovery.\n"); + return -1; + } + INIT_LIST_HEAD(&cr->cr_list); + INIT_LIST_HEAD(&cr->conn_recovery_cmd_list); + spin_lock_init(&cr->conn_recovery_cmd_lock); + /* + * Only perform connection recovery on ISCSI_OP_SCSI_CMD or + * ISCSI_OP_NOOP_OUT opcodes. For all other opcodes call + * list_del(&cmd->i_list); to release the command to the + * session pool and remove it from the connection's list. + * + * Also stop the DataOUT timer, which will be restarted after + * sending the TMR response. + */ + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry_safe(cmd, cmd_tmp, &conn->conn_cmd_list, i_list) { + + if ((cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) && + (cmd->iscsi_opcode != ISCSI_OP_NOOP_OUT)) { + pr_debug("Not performing realligence on" + " Opcode: 0x%02x, ITT: 0x%08x, CmdSN: 0x%08x," + " CID: %hu\n", cmd->iscsi_opcode, + cmd->init_task_tag, cmd->cmd_sn, conn->cid); + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 0); + spin_lock_bh(&conn->cmd_lock); + continue; + } + + /* + * Special case where commands greater than or equal to + * the session's ExpCmdSN are attached to the connection + * list but not to the out of order CmdSN list. The one + * obvious case is when a command with immediate data + * attached must only check the CmdSN against ExpCmdSN + * after the data is received. The special case below + * is when the connection fails before data is received, + * but also may apply to other PDUs, so it has been + * made generic here. + */ + if (!(cmd->cmd_flags & ICF_OOO_CMDSN) && !cmd->immediate_cmd && + (cmd->cmd_sn >= conn->sess->exp_cmd_sn)) { + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + if (!(cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) || + !(cmd->se_cmd.transport_wait_for_tasks)) + iscsit_release_cmd(cmd); + else + cmd->se_cmd.transport_wait_for_tasks( + &cmd->se_cmd, 1, 1); + spin_lock_bh(&conn->cmd_lock); + continue; + } + + cmd_count++; + pr_debug("Preparing Opcode: 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, StatSN: 0x%08x, CID: %hu for" + " realligence.\n", cmd->iscsi_opcode, + cmd->init_task_tag, cmd->cmd_sn, cmd->stat_sn, + conn->cid); + + cmd->deferred_i_state = cmd->i_state; + cmd->i_state = ISTATE_IN_CONNECTION_RECOVERY; + + if (cmd->data_direction == DMA_TO_DEVICE) + iscsit_stop_dataout_timer(cmd); + + cmd->sess = conn->sess; + + list_del(&cmd->i_list); + spin_unlock_bh(&conn->cmd_lock); + + iscsit_free_all_datain_reqs(cmd); + + if ((cmd->se_cmd.se_cmd_flags & SCF_SE_LUN_CMD) && + cmd->se_cmd.transport_wait_for_tasks) + cmd->se_cmd.transport_wait_for_tasks(&cmd->se_cmd, + 0, 0); + /* + * Add the struct iscsi_cmd to the connection recovery cmd list + */ + spin_lock(&cr->conn_recovery_cmd_lock); + list_add_tail(&cmd->i_list, &cr->conn_recovery_cmd_list); + spin_unlock(&cr->conn_recovery_cmd_lock); + + spin_lock_bh(&conn->cmd_lock); + cmd->cr = cr; + cmd->conn = NULL; + } + spin_unlock_bh(&conn->cmd_lock); + /* + * Fill in the various values in the preallocated struct iscsi_conn_recovery. + */ + cr->cid = conn->cid; + cr->cmd_count = cmd_count; + cr->maxrecvdatasegmentlength = conn->conn_ops->MaxRecvDataSegmentLength; + cr->sess = conn->sess; + + iscsit_attach_inactive_connection_recovery_entry(conn->sess, cr); + + return 0; +} + +int iscsit_connection_recovery_transport_reset(struct iscsi_conn *conn) +{ + atomic_set(&conn->connection_recovery, 1); + + if (iscsit_close_connection(conn) < 0) + return -1; + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_erl2.h b/drivers/target/iscsi/iscsi_target_erl2.h new file mode 100644 index 000000000000..22f8d24780a6 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_erl2.h @@ -0,0 +1,18 @@ +#ifndef ISCSI_TARGET_ERL2_H +#define ISCSI_TARGET_ERL2_H + +extern void iscsit_create_conn_recovery_datain_values(struct iscsi_cmd *, u32); +extern void iscsit_create_conn_recovery_dataout_values(struct iscsi_cmd *); +extern struct iscsi_conn_recovery *iscsit_get_inactive_connection_recovery_entry( + struct iscsi_session *, u16); +extern void iscsit_free_connection_recovery_entires(struct iscsi_session *); +extern int iscsit_remove_active_connection_recovery_entry( + struct iscsi_conn_recovery *, struct iscsi_session *); +extern int iscsit_remove_cmd_from_connection_recovery(struct iscsi_cmd *, + struct iscsi_session *); +extern void iscsit_discard_cr_cmds_by_expstatsn(struct iscsi_conn_recovery *, u32); +extern int iscsit_discard_unacknowledged_ooo_cmdsns_for_conn(struct iscsi_conn *); +extern int iscsit_prepare_cmds_for_realligance(struct iscsi_conn *); +extern int iscsit_connection_recovery_transport_reset(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_ERL2_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c new file mode 100644 index 000000000000..bcaf82f47037 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -0,0 +1,1232 @@ +/******************************************************************************* + * This file contains the login functions used by the iSCSI Target driver. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/string.h> +#include <linux/kthread.h> +#include <linux/crypto.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_device.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_login.h" +#include "iscsi_target_stat.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_parameters.h" + +extern struct idr sess_idr; +extern struct mutex auth_id_lock; +extern spinlock_t sess_idr_lock; + +static int iscsi_login_init_conn(struct iscsi_conn *conn) +{ + INIT_LIST_HEAD(&conn->conn_list); + INIT_LIST_HEAD(&conn->conn_cmd_list); + INIT_LIST_HEAD(&conn->immed_queue_list); + INIT_LIST_HEAD(&conn->response_queue_list); + init_completion(&conn->conn_post_wait_comp); + init_completion(&conn->conn_wait_comp); + init_completion(&conn->conn_wait_rcfr_comp); + init_completion(&conn->conn_waiting_on_uc_comp); + init_completion(&conn->conn_logout_comp); + init_completion(&conn->rx_half_close_comp); + init_completion(&conn->tx_half_close_comp); + spin_lock_init(&conn->cmd_lock); + spin_lock_init(&conn->conn_usage_lock); + spin_lock_init(&conn->immed_queue_lock); + spin_lock_init(&conn->nopin_timer_lock); + spin_lock_init(&conn->response_queue_lock); + spin_lock_init(&conn->state_lock); + + if (!zalloc_cpumask_var(&conn->conn_cpumask, GFP_KERNEL)) { + pr_err("Unable to allocate conn->conn_cpumask\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * Used by iscsi_target_nego.c:iscsi_target_locate_portal() to setup + * per struct iscsi_conn libcrypto contexts for crc32c and crc32-intel + */ +int iscsi_login_setup_crypto(struct iscsi_conn *conn) +{ + /* + * Setup slicing by CRC32C algorithm for RX and TX libcrypto contexts + * which will default to crc32c_intel.ko for cpu_has_xmm4_2, or fallback + * to software 1x8 byte slicing from crc32c.ko + */ + conn->conn_rx_hash.flags = 0; + conn->conn_rx_hash.tfm = crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(conn->conn_rx_hash.tfm)) { + pr_err("crypto_alloc_hash() failed for conn_rx_tfm\n"); + return -ENOMEM; + } + + conn->conn_tx_hash.flags = 0; + conn->conn_tx_hash.tfm = crypto_alloc_hash("crc32c", 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(conn->conn_tx_hash.tfm)) { + pr_err("crypto_alloc_hash() failed for conn_tx_tfm\n"); + crypto_free_hash(conn->conn_rx_hash.tfm); + return -ENOMEM; + } + + return 0; +} + +static int iscsi_login_check_initiator_version( + struct iscsi_conn *conn, + u8 version_max, + u8 version_min) +{ + if ((version_max != 0x00) || (version_min != 0x00)) { + pr_err("Unsupported iSCSI IETF Pre-RFC Revision," + " version Min/Max 0x%02x/0x%02x, rejecting login.\n", + version_min, version_max); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_NO_VERSION); + return -1; + } + + return 0; +} + +int iscsi_check_for_session_reinstatement(struct iscsi_conn *conn) +{ + int sessiontype; + struct iscsi_param *initiatorname_param = NULL, *sessiontype_param = NULL; + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_session *sess = NULL, *sess_p = NULL; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + + initiatorname_param = iscsi_find_param_from_key( + INITIATORNAME, conn->param_list); + if (!initiatorname_param) + return -1; + + sessiontype_param = iscsi_find_param_from_key( + SESSIONTYPE, conn->param_list); + if (!sessiontype_param) + return -1; + + sessiontype = (strncmp(sessiontype_param->value, NORMAL, 6)) ? 1 : 0; + + spin_lock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + + sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; + spin_lock(&sess_p->conn_lock); + if (atomic_read(&sess_p->session_fall_back_to_erl0) || + atomic_read(&sess_p->session_logout) || + (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) { + spin_unlock(&sess_p->conn_lock); + continue; + } + if (!memcmp((void *)sess_p->isid, (void *)conn->sess->isid, 6) && + (!strcmp((void *)sess_p->sess_ops->InitiatorName, + (void *)initiatorname_param->value) && + (sess_p->sess_ops->SessionType == sessiontype))) { + atomic_set(&sess_p->session_reinstatement, 1); + spin_unlock(&sess_p->conn_lock); + iscsit_inc_session_usage_count(sess_p); + iscsit_stop_time2retain_timer(sess_p); + sess = sess_p; + break; + } + spin_unlock(&sess_p->conn_lock); + } + spin_unlock_bh(&se_tpg->session_lock); + /* + * If the Time2Retain handler has expired, the session is already gone. + */ + if (!sess) + return 0; + + pr_debug("%s iSCSI Session SID %u is still active for %s," + " preforming session reinstatement.\n", (sessiontype) ? + "Discovery" : "Normal", sess->sid, + sess->sess_ops->InitiatorName); + + spin_lock_bh(&sess->conn_lock); + if (sess->session_state == TARG_SESS_STATE_FAILED) { + spin_unlock_bh(&sess->conn_lock); + iscsit_dec_session_usage_count(sess); + return iscsit_close_session(sess); + } + spin_unlock_bh(&sess->conn_lock); + + iscsit_stop_session(sess, 1, 1); + iscsit_dec_session_usage_count(sess); + + return iscsit_close_session(sess); +} + +static void iscsi_login_set_conn_values( + struct iscsi_session *sess, + struct iscsi_conn *conn, + u16 cid) +{ + conn->sess = sess; + conn->cid = cid; + /* + * Generate a random Status sequence number (statsn) for the new + * iSCSI connection. + */ + get_random_bytes(&conn->stat_sn, sizeof(u32)); + + mutex_lock(&auth_id_lock); + conn->auth_id = iscsit_global->auth_id++; + mutex_unlock(&auth_id_lock); +} + +/* + * This is the leading connection of a new session, + * or session reinstatement. + */ +static int iscsi_login_zero_tsih_s1( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_session *sess = NULL; + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + sess = kzalloc(sizeof(struct iscsi_session), GFP_KERNEL); + if (!sess) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + pr_err("Could not allocate memory for session\n"); + return -1; + } + + iscsi_login_set_conn_values(sess, conn, pdu->cid); + sess->init_task_tag = pdu->itt; + memcpy((void *)&sess->isid, (void *)pdu->isid, 6); + sess->exp_cmd_sn = pdu->cmdsn; + INIT_LIST_HEAD(&sess->sess_conn_list); + INIT_LIST_HEAD(&sess->sess_ooo_cmdsn_list); + INIT_LIST_HEAD(&sess->cr_active_list); + INIT_LIST_HEAD(&sess->cr_inactive_list); + init_completion(&sess->async_msg_comp); + init_completion(&sess->reinstatement_comp); + init_completion(&sess->session_wait_comp); + init_completion(&sess->session_waiting_on_uc_comp); + mutex_init(&sess->cmdsn_mutex); + spin_lock_init(&sess->conn_lock); + spin_lock_init(&sess->cr_a_lock); + spin_lock_init(&sess->cr_i_lock); + spin_lock_init(&sess->session_usage_lock); + spin_lock_init(&sess->ttt_lock); + + if (!idr_pre_get(&sess_idr, GFP_KERNEL)) { + pr_err("idr_pre_get() for sess_idr failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + spin_lock(&sess_idr_lock); + idr_get_new(&sess_idr, NULL, &sess->session_index); + spin_unlock(&sess_idr_lock); + + sess->creation_time = get_jiffies_64(); + spin_lock_init(&sess->session_stats_lock); + /* + * The FFP CmdSN window values will be allocated from the TPG's + * Initiator Node's ACL once the login has been successfully completed. + */ + sess->max_cmd_sn = pdu->cmdsn; + + sess->sess_ops = kzalloc(sizeof(struct iscsi_sess_ops), GFP_KERNEL); + if (!sess->sess_ops) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + pr_err("Unable to allocate memory for" + " struct iscsi_sess_ops.\n"); + return -1; + } + + sess->se_sess = transport_init_session(); + if (!sess->se_sess) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + return 0; +} + +static int iscsi_login_zero_tsih_s2( + struct iscsi_conn *conn) +{ + struct iscsi_node_attrib *na; + struct iscsi_session *sess = conn->sess; + unsigned char buf[32]; + + sess->tpg = conn->tpg; + + /* + * Assign a new TPG Session Handle. Note this is protected with + * struct iscsi_portal_group->np_login_sem from iscsit_access_np(). + */ + sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + if (!sess->tsih) + sess->tsih = ++ISCSI_TPG_S(sess)->ntsih; + + /* + * Create the default params from user defined values.. + */ + if (iscsi_copy_param_list(&conn->param_list, + ISCSI_TPG_C(conn)->param_list, 1) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + iscsi_set_keys_to_negotiate(0, conn->param_list); + + if (sess->sess_ops->SessionType) + return iscsi_set_keys_irrelevant_for_discovery( + conn->param_list); + + na = iscsit_tpg_get_node_attrib(sess); + + /* + * Need to send TargetPortalGroupTag back in first login response + * on any iSCSI connection where the Initiator provides TargetName. + * See 5.3.1. Login Phase Start + * + * In our case, we have already located the struct iscsi_tiqn at this point. + */ + memset(buf, 0, 32); + sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + /* + * Workaround for Initiators that have broken connection recovery logic. + * + * "We would really like to get rid of this." Linux-iSCSI.org team + */ + memset(buf, 0, 32); + sprintf(buf, "ErrorRecoveryLevel=%d", na->default_erl); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + if (iscsi_login_disable_FIM_keys(conn->param_list, conn) < 0) + return -1; + + return 0; +} + +/* + * Remove PSTATE_NEGOTIATE for the four FIM related keys. + * The Initiator node will be able to enable FIM by proposing them itself. + */ +int iscsi_login_disable_FIM_keys( + struct iscsi_param_list *param_list, + struct iscsi_conn *conn) +{ + struct iscsi_param *param; + + param = iscsi_find_param_from_key("OFMarker", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " OFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("OFMarkInt", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("IFMarker", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + param = iscsi_find_param_from_key("IFMarkInt", param_list); + if (!param) { + pr_err("iscsi_find_param_from_key() for" + " IFMarker failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + param->state &= ~PSTATE_NEGOTIATE; + + return 0; +} + +static int iscsi_login_non_zero_tsih_s1( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + iscsi_login_set_conn_values(NULL, conn, pdu->cid); + return 0; +} + +/* + * Add a new connection to an existing session. + */ +static int iscsi_login_non_zero_tsih_s2( + struct iscsi_conn *conn, + unsigned char *buf) +{ + struct iscsi_portal_group *tpg = conn->tpg; + struct iscsi_session *sess = NULL, *sess_p = NULL; + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct se_session *se_sess, *se_sess_tmp; + struct iscsi_login_req *pdu = (struct iscsi_login_req *)buf; + + spin_lock_bh(&se_tpg->session_lock); + list_for_each_entry_safe(se_sess, se_sess_tmp, &se_tpg->tpg_sess_list, + sess_list) { + + sess_p = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (atomic_read(&sess_p->session_fall_back_to_erl0) || + atomic_read(&sess_p->session_logout) || + (sess_p->time2retain_timer_flags & ISCSI_TF_EXPIRED)) + continue; + if (!memcmp((const void *)sess_p->isid, + (const void *)pdu->isid, 6) && + (sess_p->tsih == pdu->tsih)) { + iscsit_inc_session_usage_count(sess_p); + iscsit_stop_time2retain_timer(sess_p); + sess = sess_p; + break; + } + } + spin_unlock_bh(&se_tpg->session_lock); + + /* + * If the Time2Retain handler has expired, the session is already gone. + */ + if (!sess) { + pr_err("Initiator attempting to add a connection to" + " a non-existent session, rejecting iSCSI Login.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_NO_SESSION); + return -1; + } + + /* + * Stop the Time2Retain timer if this is a failed session, we restart + * the timer if the login is not successful. + */ + spin_lock_bh(&sess->conn_lock); + if (sess->session_state == TARG_SESS_STATE_FAILED) + atomic_set(&sess->session_continuation, 1); + spin_unlock_bh(&sess->conn_lock); + + iscsi_login_set_conn_values(sess, conn, pdu->cid); + + if (iscsi_copy_param_list(&conn->param_list, + ISCSI_TPG_C(conn)->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + iscsi_set_keys_to_negotiate(0, conn->param_list); + /* + * Need to send TargetPortalGroupTag back in first login response + * on any iSCSI connection where the Initiator provides TargetName. + * See 5.3.1. Login Phase Start + * + * In our case, we have already located the struct iscsi_tiqn at this point. + */ + memset(buf, 0, 32); + sprintf(buf, "TargetPortalGroupTag=%hu", ISCSI_TPG_S(sess)->tpgt); + if (iscsi_change_param_value(buf, conn->param_list, 0) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + return -1; + } + + return iscsi_login_disable_FIM_keys(conn->param_list, conn); +} + +int iscsi_login_post_auth_non_zero_tsih( + struct iscsi_conn *conn, + u16 cid, + u32 exp_statsn) +{ + struct iscsi_conn *conn_ptr = NULL; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_session *sess = conn->sess; + + /* + * By following item 5 in the login table, if we have found + * an existing ISID and a valid/existing TSIH and an existing + * CID we do connection reinstatement. Currently we dont not + * support it so we send back an non-zero status class to the + * initiator and release the new connection. + */ + conn_ptr = iscsit_get_conn_from_cid_rcfr(sess, cid); + if ((conn_ptr)) { + pr_err("Connection exists with CID %hu for %s," + " performing connection reinstatement.\n", + conn_ptr->cid, sess->sess_ops->InitiatorName); + + iscsit_connection_reinstatement_rcfr(conn_ptr); + iscsit_dec_conn_usage_count(conn_ptr); + } + + /* + * Check for any connection recovery entires containing CID. + * We use the original ExpStatSN sent in the first login request + * to acknowledge commands for the failed connection. + * + * Also note that an explict logout may have already been sent, + * but the response may not be sent due to additional connection + * loss. + */ + if (sess->sess_ops->ErrorRecoveryLevel == 2) { + cr = iscsit_get_inactive_connection_recovery_entry( + sess, cid); + if ((cr)) { + pr_debug("Performing implicit logout" + " for connection recovery on CID: %hu\n", + conn->cid); + iscsit_discard_cr_cmds_by_expstatsn(cr, exp_statsn); + } + } + + /* + * Else we follow item 4 from the login table in that we have + * found an existing ISID and a valid/existing TSIH and a new + * CID we go ahead and continue to add a new connection to the + * session. + */ + pr_debug("Adding CID %hu to existing session for %s.\n", + cid, sess->sess_ops->InitiatorName); + + if ((atomic_read(&sess->nconn) + 1) > sess->sess_ops->MaxConnections) { + pr_err("Adding additional connection to this session" + " would exceed MaxConnections %d, login failed.\n", + sess->sess_ops->MaxConnections); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_ISID_ERROR); + return -1; + } + + return 0; +} + +static void iscsi_post_login_start_timers(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + + if (!sess->sess_ops->SessionType) + iscsit_start_nopin_timer(conn); +} + +static int iscsi_post_login_handler( + struct iscsi_np *np, + struct iscsi_conn *conn, + u8 zero_tsih) +{ + int stop_timer = 0; + struct iscsi_session *sess = conn->sess; + struct se_session *se_sess = sess->se_sess; + struct iscsi_portal_group *tpg = ISCSI_TPG_S(sess); + struct se_portal_group *se_tpg = &tpg->tpg_se_tpg; + struct iscsi_thread_set *ts; + + iscsit_inc_conn_usage_count(conn); + + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_SUCCESS, + ISCSI_LOGIN_STATUS_ACCEPT); + + pr_debug("Moving to TARG_CONN_STATE_LOGGED_IN.\n"); + conn->conn_state = TARG_CONN_STATE_LOGGED_IN; + + iscsi_set_connection_parameters(conn->conn_ops, conn->param_list); + iscsit_set_sync_and_steering_values(conn); + /* + * SCSI Initiator -> SCSI Target Port Mapping + */ + ts = iscsi_get_thread_set(); + if (!zero_tsih) { + iscsi_set_session_parameters(sess->sess_ops, + conn->param_list, 0); + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + + spin_lock_bh(&sess->conn_lock); + atomic_set(&sess->session_continuation, 0); + if (sess->session_state == TARG_SESS_STATE_FAILED) { + pr_debug("Moving to" + " TARG_SESS_STATE_LOGGED_IN.\n"); + sess->session_state = TARG_SESS_STATE_LOGGED_IN; + stop_timer = 1; + } + + pr_debug("iSCSI Login successful on CID: %hu from %s to" + " %s:%hu,%hu\n", conn->cid, conn->login_ip, np->np_ip, + np->np_port, tpg->tpgt); + + list_add_tail(&conn->conn_list, &sess->sess_conn_list); + atomic_inc(&sess->nconn); + pr_debug("Incremented iSCSI Connection count to %hu" + " from node: %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + spin_unlock_bh(&sess->conn_lock); + + iscsi_post_login_start_timers(conn); + iscsi_activate_thread_set(conn, ts); + /* + * Determine CPU mask to ensure connection's RX and TX kthreads + * are scheduled on the same CPU. + */ + iscsit_thread_get_cpumask(conn); + conn->conn_rx_reset_cpumask = 1; + conn->conn_tx_reset_cpumask = 1; + + iscsit_dec_conn_usage_count(conn); + if (stop_timer) { + spin_lock_bh(&se_tpg->session_lock); + iscsit_stop_time2retain_timer(sess); + spin_unlock_bh(&se_tpg->session_lock); + } + iscsit_dec_session_usage_count(sess); + return 0; + } + + iscsi_set_session_parameters(sess->sess_ops, conn->param_list, 1); + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + + iscsit_determine_maxcmdsn(sess); + + spin_lock_bh(&se_tpg->session_lock); + __transport_register_session(&sess->tpg->tpg_se_tpg, + se_sess->se_node_acl, se_sess, (void *)sess); + pr_debug("Moving to TARG_SESS_STATE_LOGGED_IN.\n"); + sess->session_state = TARG_SESS_STATE_LOGGED_IN; + + pr_debug("iSCSI Login successful on CID: %hu from %s to %s:%hu,%hu\n", + conn->cid, conn->login_ip, np->np_ip, np->np_port, tpg->tpgt); + + spin_lock_bh(&sess->conn_lock); + list_add_tail(&conn->conn_list, &sess->sess_conn_list); + atomic_inc(&sess->nconn); + pr_debug("Incremented iSCSI Connection count to %hu from node:" + " %s\n", atomic_read(&sess->nconn), + sess->sess_ops->InitiatorName); + spin_unlock_bh(&sess->conn_lock); + + sess->sid = tpg->sid++; + if (!sess->sid) + sess->sid = tpg->sid++; + pr_debug("Established iSCSI session from node: %s\n", + sess->sess_ops->InitiatorName); + + tpg->nsessions++; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_nsessions++; + + pr_debug("Incremented number of active iSCSI sessions to %u on" + " iSCSI Target Portal Group: %hu\n", tpg->nsessions, tpg->tpgt); + spin_unlock_bh(&se_tpg->session_lock); + + iscsi_post_login_start_timers(conn); + iscsi_activate_thread_set(conn, ts); + /* + * Determine CPU mask to ensure connection's RX and TX kthreads + * are scheduled on the same CPU. + */ + iscsit_thread_get_cpumask(conn); + conn->conn_rx_reset_cpumask = 1; + conn->conn_tx_reset_cpumask = 1; + + iscsit_dec_conn_usage_count(conn); + + return 0; +} + +static void iscsi_handle_login_thread_timeout(unsigned long data) +{ + struct iscsi_np *np = (struct iscsi_np *) data; + + spin_lock_bh(&np->np_thread_lock); + pr_err("iSCSI Login timeout on Network Portal %s:%hu\n", + np->np_ip, np->np_port); + + if (np->np_login_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&np->np_thread_lock); + return; + } + + if (np->np_thread) + send_sig(SIGINT, np->np_thread, 1); + + np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&np->np_thread_lock); +} + +static void iscsi_start_login_thread_timer(struct iscsi_np *np) +{ + /* + * This used the TA_LOGIN_TIMEOUT constant because at this + * point we do not have access to ISCSI_TPG_ATTRIB(tpg)->login_timeout + */ + spin_lock_bh(&np->np_thread_lock); + init_timer(&np->np_login_timer); + np->np_login_timer.expires = (get_jiffies_64() + TA_LOGIN_TIMEOUT * HZ); + np->np_login_timer.data = (unsigned long)np; + np->np_login_timer.function = iscsi_handle_login_thread_timeout; + np->np_login_timer_flags &= ~ISCSI_TF_STOP; + np->np_login_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&np->np_login_timer); + + pr_debug("Added timeout timer to iSCSI login request for" + " %u seconds.\n", TA_LOGIN_TIMEOUT); + spin_unlock_bh(&np->np_thread_lock); +} + +static void iscsi_stop_login_thread_timer(struct iscsi_np *np) +{ + spin_lock_bh(&np->np_thread_lock); + if (!(np->np_login_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&np->np_thread_lock); + return; + } + np->np_login_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&np->np_thread_lock); + + del_timer_sync(&np->np_login_timer); + + spin_lock_bh(&np->np_thread_lock); + np->np_login_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&np->np_thread_lock); +} + +int iscsi_target_setup_login_socket( + struct iscsi_np *np, + struct __kernel_sockaddr_storage *sockaddr) +{ + struct socket *sock; + int backlog = 5, ret, opt = 0, len; + + switch (np->np_network_transport) { + case ISCSI_TCP: + np->np_ip_proto = IPPROTO_TCP; + np->np_sock_type = SOCK_STREAM; + break; + case ISCSI_SCTP_TCP: + np->np_ip_proto = IPPROTO_SCTP; + np->np_sock_type = SOCK_STREAM; + break; + case ISCSI_SCTP_UDP: + np->np_ip_proto = IPPROTO_SCTP; + np->np_sock_type = SOCK_SEQPACKET; + break; + case ISCSI_IWARP_TCP: + case ISCSI_IWARP_SCTP: + case ISCSI_INFINIBAND: + default: + pr_err("Unsupported network_transport: %d\n", + np->np_network_transport); + return -EINVAL; + } + + ret = sock_create(sockaddr->ss_family, np->np_sock_type, + np->np_ip_proto, &sock); + if (ret < 0) { + pr_err("sock_create() failed.\n"); + return ret; + } + np->np_socket = sock; + /* + * The SCTP stack needs struct socket->file. + */ + if ((np->np_network_transport == ISCSI_SCTP_TCP) || + (np->np_network_transport == ISCSI_SCTP_UDP)) { + if (!sock->file) { + sock->file = kzalloc(sizeof(struct file), GFP_KERNEL); + if (!sock->file) { + pr_err("Unable to allocate struct" + " file for SCTP\n"); + ret = -ENOMEM; + goto fail; + } + np->np_flags |= NPF_SCTP_STRUCT_FILE; + } + } + /* + * Setup the np->np_sockaddr from the passed sockaddr setup + * in iscsi_target_configfs.c code.. + */ + memcpy((void *)&np->np_sockaddr, (void *)sockaddr, + sizeof(struct __kernel_sockaddr_storage)); + + if (sockaddr->ss_family == AF_INET6) + len = sizeof(struct sockaddr_in6); + else + len = sizeof(struct sockaddr_in); + /* + * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY. + */ + opt = 1; + if (np->np_network_transport == ISCSI_TCP) { + ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + pr_err("kernel_setsockopt() for TCP_NODELAY" + " failed: %d\n", ret); + goto fail; + } + } + + ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, + (char *)&opt, sizeof(opt)); + if (ret < 0) { + pr_err("kernel_setsockopt() for SO_REUSEADDR" + " failed\n"); + goto fail; + } + + ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len); + if (ret < 0) { + pr_err("kernel_bind() failed: %d\n", ret); + goto fail; + } + + ret = kernel_listen(sock, backlog); + if (ret != 0) { + pr_err("kernel_listen() failed: %d\n", ret); + goto fail; + } + + return 0; + +fail: + np->np_socket = NULL; + if (sock) { + if (np->np_flags & NPF_SCTP_STRUCT_FILE) { + kfree(sock->file); + sock->file = NULL; + } + + sock_release(sock); + } + return ret; +} + +static int __iscsi_target_login_thread(struct iscsi_np *np) +{ + u8 buffer[ISCSI_HDR_LEN], iscsi_opcode, zero_tsih = 0; + int err, ret = 0, ip_proto, sock_type, set_sctp_conn_flag, stop; + struct iscsi_conn *conn = NULL; + struct iscsi_login *login; + struct iscsi_portal_group *tpg = NULL; + struct socket *new_sock, *sock; + struct kvec iov; + struct iscsi_login_req *pdu; + struct sockaddr_in sock_in; + struct sockaddr_in6 sock_in6; + + flush_signals(current); + set_sctp_conn_flag = 0; + sock = np->np_socket; + ip_proto = np->np_ip_proto; + sock_type = np->np_sock_type; + + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + complete(&np->np_restart_comp); + } else { + np->np_thread_state = ISCSI_NP_THREAD_ACTIVE; + } + spin_unlock_bh(&np->np_thread_lock); + + if (kernel_accept(sock, &new_sock, 0) < 0) { + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state == ISCSI_NP_THREAD_RESET) { + spin_unlock_bh(&np->np_thread_lock); + complete(&np->np_restart_comp); + /* Get another socket */ + return 1; + } + spin_unlock_bh(&np->np_thread_lock); + goto out; + } + /* + * The SCTP stack needs struct socket->file. + */ + if ((np->np_network_transport == ISCSI_SCTP_TCP) || + (np->np_network_transport == ISCSI_SCTP_UDP)) { + if (!new_sock->file) { + new_sock->file = kzalloc( + sizeof(struct file), GFP_KERNEL); + if (!new_sock->file) { + pr_err("Unable to allocate struct" + " file for SCTP\n"); + sock_release(new_sock); + /* Get another socket */ + return 1; + } + set_sctp_conn_flag = 1; + } + } + + iscsi_start_login_thread_timer(np); + + conn = kzalloc(sizeof(struct iscsi_conn), GFP_KERNEL); + if (!conn) { + pr_err("Could not allocate memory for" + " new connection\n"); + if (set_sctp_conn_flag) { + kfree(new_sock->file); + new_sock->file = NULL; + } + sock_release(new_sock); + /* Get another socket */ + return 1; + } + + pr_debug("Moving to TARG_CONN_STATE_FREE.\n"); + conn->conn_state = TARG_CONN_STATE_FREE; + conn->sock = new_sock; + + if (set_sctp_conn_flag) + conn->conn_flags |= CONNFLAG_SCTP_STRUCT_FILE; + + pr_debug("Moving to TARG_CONN_STATE_XPT_UP.\n"); + conn->conn_state = TARG_CONN_STATE_XPT_UP; + + /* + * Allocate conn->conn_ops early as a failure calling + * iscsit_tx_login_rsp() below will call tx_data(). + */ + conn->conn_ops = kzalloc(sizeof(struct iscsi_conn_ops), GFP_KERNEL); + if (!conn->conn_ops) { + pr_err("Unable to allocate memory for" + " struct iscsi_conn_ops.\n"); + goto new_sess_out; + } + /* + * Perform the remaining iSCSI connection initialization items.. + */ + if (iscsi_login_init_conn(conn) < 0) + goto new_sess_out; + + memset(buffer, 0, ISCSI_HDR_LEN); + memset(&iov, 0, sizeof(struct kvec)); + iov.iov_base = buffer; + iov.iov_len = ISCSI_HDR_LEN; + + if (rx_data(conn, &iov, 1, ISCSI_HDR_LEN) <= 0) { + pr_err("rx_data() returned an error.\n"); + goto new_sess_out; + } + + iscsi_opcode = (buffer[0] & ISCSI_OPCODE_MASK); + if (!(iscsi_opcode & ISCSI_OP_LOGIN)) { + pr_err("First opcode is not login request," + " failing login request.\n"); + goto new_sess_out; + } + + pdu = (struct iscsi_login_req *) buffer; + pdu->cid = be16_to_cpu(pdu->cid); + pdu->tsih = be16_to_cpu(pdu->tsih); + pdu->itt = be32_to_cpu(pdu->itt); + pdu->cmdsn = be32_to_cpu(pdu->cmdsn); + pdu->exp_statsn = be32_to_cpu(pdu->exp_statsn); + /* + * Used by iscsit_tx_login_rsp() for Login Resonses PDUs + * when Status-Class != 0. + */ + conn->login_itt = pdu->itt; + + spin_lock_bh(&np->np_thread_lock); + if (np->np_thread_state != ISCSI_NP_THREAD_ACTIVE) { + spin_unlock_bh(&np->np_thread_lock); + pr_err("iSCSI Network Portal on %s:%hu currently not" + " active.\n", np->np_ip, np->np_port); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + goto new_sess_out; + } + spin_unlock_bh(&np->np_thread_lock); + + if (np->np_sockaddr.ss_family == AF_INET6) { + memset(&sock_in6, 0, sizeof(struct sockaddr_in6)); + + if (conn->sock->ops->getname(conn->sock, + (struct sockaddr *)&sock_in6, &err, 1) < 0) { + pr_err("sock_ops->getname() failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } +#if 0 + if (!iscsi_ntop6((const unsigned char *) + &sock_in6.sin6_addr.in6_u, + (char *)&conn->ipv6_login_ip[0], + IPV6_ADDRESS_SPACE)) { + pr_err("iscsi_ntop6() failed\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } +#else + pr_debug("Skipping iscsi_ntop6()\n"); +#endif + } else { + memset(&sock_in, 0, sizeof(struct sockaddr_in)); + + if (conn->sock->ops->getname(conn->sock, + (struct sockaddr *)&sock_in, &err, 1) < 0) { + pr_err("sock_ops->getname() failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + goto new_sess_out; + } + sprintf(conn->login_ip, "%pI4", &sock_in.sin_addr.s_addr); + conn->login_port = ntohs(sock_in.sin_port); + } + + conn->network_transport = np->np_network_transport; + + pr_debug("Received iSCSI login request from %s on %s Network" + " Portal %s:%hu\n", conn->login_ip, + (conn->network_transport == ISCSI_TCP) ? "TCP" : "SCTP", + np->np_ip, np->np_port); + + pr_debug("Moving to TARG_CONN_STATE_IN_LOGIN.\n"); + conn->conn_state = TARG_CONN_STATE_IN_LOGIN; + + if (iscsi_login_check_initiator_version(conn, pdu->max_version, + pdu->min_version) < 0) + goto new_sess_out; + + zero_tsih = (pdu->tsih == 0x0000); + if ((zero_tsih)) { + /* + * This is the leading connection of a new session. + * We wait until after authentication to check for + * session reinstatement. + */ + if (iscsi_login_zero_tsih_s1(conn, buffer) < 0) + goto new_sess_out; + } else { + /* + * Add a new connection to an existing session. + * We check for a non-existant session in + * iscsi_login_non_zero_tsih_s2() below based + * on ISID/TSIH, but wait until after authentication + * to check for connection reinstatement, etc. + */ + if (iscsi_login_non_zero_tsih_s1(conn, buffer) < 0) + goto new_sess_out; + } + + /* + * This will process the first login request, and call + * iscsi_target_locate_portal(), and return a valid struct iscsi_login. + */ + login = iscsi_target_init_negotiation(np, conn, buffer); + if (!login) { + tpg = conn->tpg; + goto new_sess_out; + } + + tpg = conn->tpg; + if (!tpg) { + pr_err("Unable to locate struct iscsi_conn->tpg\n"); + goto new_sess_out; + } + + if (zero_tsih) { + if (iscsi_login_zero_tsih_s2(conn) < 0) { + iscsi_target_nego_release(login, conn); + goto new_sess_out; + } + } else { + if (iscsi_login_non_zero_tsih_s2(conn, buffer) < 0) { + iscsi_target_nego_release(login, conn); + goto old_sess_out; + } + } + + if (iscsi_target_start_negotiation(login, conn) < 0) + goto new_sess_out; + + if (!conn->sess) { + pr_err("struct iscsi_conn session pointer is NULL!\n"); + goto new_sess_out; + } + + iscsi_stop_login_thread_timer(np); + + if (signal_pending(current)) + goto new_sess_out; + + ret = iscsi_post_login_handler(np, conn, zero_tsih); + + if (ret < 0) + goto new_sess_out; + + iscsit_deaccess_np(np, tpg); + tpg = NULL; + /* Get another socket */ + return 1; + +new_sess_out: + pr_err("iSCSI Login negotiation failed.\n"); + iscsit_collect_login_stats(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + if (!zero_tsih || !conn->sess) + goto old_sess_out; + if (conn->sess->se_sess) + transport_free_session(conn->sess->se_sess); + if (conn->sess->session_index != 0) { + spin_lock_bh(&sess_idr_lock); + idr_remove(&sess_idr, conn->sess->session_index); + spin_unlock_bh(&sess_idr_lock); + } + if (conn->sess->sess_ops) + kfree(conn->sess->sess_ops); + if (conn->sess) + kfree(conn->sess); +old_sess_out: + iscsi_stop_login_thread_timer(np); + /* + * If login negotiation fails check if the Time2Retain timer + * needs to be restarted. + */ + if (!zero_tsih && conn->sess) { + spin_lock_bh(&conn->sess->conn_lock); + if (conn->sess->session_state == TARG_SESS_STATE_FAILED) { + struct se_portal_group *se_tpg = + &ISCSI_TPG_C(conn)->tpg_se_tpg; + + atomic_set(&conn->sess->session_continuation, 0); + spin_unlock_bh(&conn->sess->conn_lock); + spin_lock_bh(&se_tpg->session_lock); + iscsit_start_time2retain_handler(conn->sess); + spin_unlock_bh(&se_tpg->session_lock); + } else + spin_unlock_bh(&conn->sess->conn_lock); + iscsit_dec_session_usage_count(conn->sess); + } + + if (!IS_ERR(conn->conn_rx_hash.tfm)) + crypto_free_hash(conn->conn_rx_hash.tfm); + if (!IS_ERR(conn->conn_tx_hash.tfm)) + crypto_free_hash(conn->conn_tx_hash.tfm); + + if (conn->conn_cpumask) + free_cpumask_var(conn->conn_cpumask); + + kfree(conn->conn_ops); + + if (conn->param_list) { + iscsi_release_param_list(conn->param_list); + conn->param_list = NULL; + } + if (conn->sock) { + if (conn->conn_flags & CONNFLAG_SCTP_STRUCT_FILE) { + kfree(conn->sock->file); + conn->sock->file = NULL; + } + sock_release(conn->sock); + } + kfree(conn); + + if (tpg) { + iscsit_deaccess_np(np, tpg); + tpg = NULL; + } + +out: + stop = kthread_should_stop(); + if (!stop && signal_pending(current)) { + spin_lock_bh(&np->np_thread_lock); + stop = (np->np_thread_state == ISCSI_NP_THREAD_SHUTDOWN); + spin_unlock_bh(&np->np_thread_lock); + } + /* Wait for another socket.. */ + if (!stop) + return 1; + + iscsi_stop_login_thread_timer(np); + spin_lock_bh(&np->np_thread_lock); + np->np_thread_state = ISCSI_NP_THREAD_EXIT; + spin_unlock_bh(&np->np_thread_lock); + return 0; +} + +int iscsi_target_login_thread(void *arg) +{ + struct iscsi_np *np = (struct iscsi_np *)arg; + int ret; + + allow_signal(SIGINT); + + while (!kthread_should_stop()) { + ret = __iscsi_target_login_thread(np); + /* + * We break and exit here unless another sock_accept() call + * is expected. + */ + if (ret != 1) + break; + } + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_login.h b/drivers/target/iscsi/iscsi_target_login.h new file mode 100644 index 000000000000..091dcae2532b --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_login.h @@ -0,0 +1,12 @@ +#ifndef ISCSI_TARGET_LOGIN_H +#define ISCSI_TARGET_LOGIN_H + +extern int iscsi_login_setup_crypto(struct iscsi_conn *); +extern int iscsi_check_for_session_reinstatement(struct iscsi_conn *); +extern int iscsi_login_post_auth_non_zero_tsih(struct iscsi_conn *, u16, u32); +extern int iscsi_target_setup_login_socket(struct iscsi_np *, + struct __kernel_sockaddr_storage *); +extern int iscsi_target_login_thread(void *); +extern int iscsi_login_disable_FIM_keys(struct iscsi_param_list *, struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_LOGIN_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c new file mode 100644 index 000000000000..713a4d23557a --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -0,0 +1,1067 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI Parameter negotiation. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/ctype.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_tpg.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_login.h" +#include "iscsi_target_nego.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_auth.h" + +#define MAX_LOGIN_PDUS 7 +#define TEXT_LEN 4096 + +void convert_null_to_semi(char *buf, int len) +{ + int i; + + for (i = 0; i < len; i++) + if (buf[i] == '\0') + buf[i] = ';'; +} + +int strlen_semi(char *buf) +{ + int i = 0; + + while (buf[i] != '\0') { + if (buf[i] == ';') + return i; + i++; + } + + return -1; +} + +int extract_param( + const char *in_buf, + const char *pattern, + unsigned int max_length, + char *out_buf, + unsigned char *type) +{ + char *ptr; + int len; + + if (!in_buf || !pattern || !out_buf || !type) + return -1; + + ptr = strstr(in_buf, pattern); + if (!ptr) + return -1; + + ptr = strstr(ptr, "="); + if (!ptr) + return -1; + + ptr += 1; + if (*ptr == '0' && (*(ptr+1) == 'x' || *(ptr+1) == 'X')) { + ptr += 2; /* skip 0x */ + *type = HEX; + } else + *type = DECIMAL; + + len = strlen_semi(ptr); + if (len < 0) + return -1; + + if (len > max_length) { + pr_err("Length of input: %d exeeds max_length:" + " %d\n", len, max_length); + return -1; + } + memcpy(out_buf, ptr, len); + out_buf[len] = '\0'; + + return 0; +} + +static u32 iscsi_handle_authentication( + struct iscsi_conn *conn, + char *in_buf, + char *out_buf, + int in_length, + int *out_length, + unsigned char *authtype) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_auth *auth; + struct iscsi_node_acl *iscsi_nacl; + struct se_node_acl *se_nacl; + + if (!sess->sess_ops->SessionType) { + /* + * For SessionType=Normal + */ + se_nacl = conn->sess->se_sess->se_node_acl; + if (!se_nacl) { + pr_err("Unable to locate struct se_node_acl for" + " CHAP auth\n"); + return -1; + } + iscsi_nacl = container_of(se_nacl, struct iscsi_node_acl, + se_node_acl); + if (!iscsi_nacl) { + pr_err("Unable to locate struct iscsi_node_acl for" + " CHAP auth\n"); + return -1; + } + + auth = ISCSI_NODE_AUTH(iscsi_nacl); + } else { + /* + * For SessionType=Discovery + */ + auth = &iscsit_global->discovery_acl.node_auth; + } + + if (strstr("CHAP", authtype)) + strcpy(conn->sess->auth_type, "CHAP"); + else + strcpy(conn->sess->auth_type, NONE); + + if (strstr("None", authtype)) + return 1; +#ifdef CANSRP + else if (strstr("SRP", authtype)) + return srp_main_loop(conn, auth, in_buf, out_buf, + &in_length, out_length); +#endif + else if (strstr("CHAP", authtype)) + return chap_main_loop(conn, auth, in_buf, out_buf, + &in_length, out_length); + else if (strstr("SPKM1", authtype)) + return 2; + else if (strstr("SPKM2", authtype)) + return 2; + else if (strstr("KRB5", authtype)) + return 2; + else + return 2; +} + +static void iscsi_remove_failed_auth_entry(struct iscsi_conn *conn) +{ + kfree(conn->auth_protocol); +} + +static int iscsi_target_check_login_request( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int req_csg, req_nsg, rsp_csg, rsp_nsg; + u32 payload_length; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + switch (login_req->opcode & ISCSI_OPCODE_MASK) { + case ISCSI_OP_LOGIN: + break; + default: + pr_err("Received unknown opcode 0x%02x.\n", + login_req->opcode & ISCSI_OPCODE_MASK); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((login_req->flags & ISCSI_FLAG_LOGIN_CONTINUE) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + pr_err("Login request has both ISCSI_FLAG_LOGIN_CONTINUE" + " and ISCSI_FLAG_LOGIN_TRANSIT set, protocol error.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + req_csg = (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + rsp_csg = (login_rsp->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + req_nsg = (login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK); + rsp_nsg = (login_rsp->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK); + + if (req_csg != login->current_stage) { + pr_err("Initiator unexpectedly changed login stage" + " from %d to %d, login failed.\n", login->current_stage, + req_csg); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((req_nsg == 2) || (req_csg >= 2) || + ((login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT) && + (req_nsg <= req_csg))) { + pr_err("Illegal login_req->flags Combination, CSG: %d," + " NSG: %d, ISCSI_FLAG_LOGIN_TRANSIT: %d.\n", req_csg, + req_nsg, (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if ((login_req->max_version != login->version_max) || + (login_req->min_version != login->version_min)) { + pr_err("Login request changed Version Max/Nin" + " unexpectedly to 0x%02x/0x%02x, protocol error\n", + login_req->max_version, login_req->min_version); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (memcmp(login_req->isid, login->isid, 6) != 0) { + pr_err("Login request changed ISID unexpectedly," + " protocol error.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (login_req->itt != login->init_task_tag) { + pr_err("Login request changed ITT unexpectedly to" + " 0x%08x, protocol error.\n", login_req->itt); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + if (payload_length > MAX_KEY_VALUE_PAIRS) { + pr_err("Login request payload exceeds default" + " MaxRecvDataSegmentLength: %u, protocol error.\n", + MAX_KEY_VALUE_PAIRS); + return -1; + } + + return 0; +} + +static int iscsi_target_check_first_request( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + struct iscsi_param *param = NULL; + struct se_node_acl *se_nacl; + + login->first_request = 0; + + list_for_each_entry(param, &conn->param_list->param_list, p_list) { + if (!strncmp(param->name, SESSIONTYPE, 11)) { + if (!IS_PSTATE_ACCEPTOR(param)) { + pr_err("SessionType key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + return -1; + } + if (!strncmp(param->value, DISCOVERY, 9)) + return 0; + } + + if (!strncmp(param->name, INITIATORNAME, 13)) { + if (!IS_PSTATE_ACCEPTOR(param)) { + if (!login->leading_connection) + continue; + + pr_err("InitiatorName key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + return -1; + } + + /* + * For non-leading connections, double check that the + * received InitiatorName matches the existing session's + * struct iscsi_node_acl. + */ + if (!login->leading_connection) { + se_nacl = conn->sess->se_sess->se_node_acl; + if (!se_nacl) { + pr_err("Unable to locate" + " struct se_node_acl\n"); + iscsit_tx_login_rsp(conn, + ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_NOT_FOUND); + return -1; + } + + if (strcmp(param->value, + se_nacl->initiatorname)) { + pr_err("Incorrect" + " InitiatorName: %s for this" + " iSCSI Initiator Node.\n", + param->value); + iscsit_tx_login_rsp(conn, + ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_NOT_FOUND); + return -1; + } + } + } + } + + return 0; +} + +static int iscsi_target_do_tx_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + u32 padding = 0; + struct iscsi_session *sess = conn->sess; + struct iscsi_login_rsp *login_rsp; + + login_rsp = (struct iscsi_login_rsp *) login->rsp; + + login_rsp->opcode = ISCSI_OP_LOGIN_RSP; + hton24(login_rsp->dlength, login->rsp_length); + memcpy(login_rsp->isid, login->isid, 6); + login_rsp->tsih = cpu_to_be16(login->tsih); + login_rsp->itt = cpu_to_be32(login->init_task_tag); + login_rsp->statsn = cpu_to_be32(conn->stat_sn++); + login_rsp->exp_cmdsn = cpu_to_be32(conn->sess->exp_cmd_sn); + login_rsp->max_cmdsn = cpu_to_be32(conn->sess->max_cmd_sn); + + pr_debug("Sending Login Response, Flags: 0x%02x, ITT: 0x%08x," + " ExpCmdSN; 0x%08x, MaxCmdSN: 0x%08x, StatSN: 0x%08x, Length:" + " %u\n", login_rsp->flags, ntohl(login_rsp->itt), + ntohl(login_rsp->exp_cmdsn), ntohl(login_rsp->max_cmdsn), + ntohl(login_rsp->statsn), login->rsp_length); + + padding = ((-login->rsp_length) & 3); + + if (iscsi_login_tx_data( + conn, + login->rsp, + login->rsp_buf, + login->rsp_length + padding) < 0) + return -1; + + login->rsp_length = 0; + login_rsp->tsih = be16_to_cpu(login_rsp->tsih); + login_rsp->itt = be32_to_cpu(login_rsp->itt); + login_rsp->statsn = be32_to_cpu(login_rsp->statsn); + mutex_lock(&sess->cmdsn_mutex); + login_rsp->exp_cmdsn = be32_to_cpu(sess->exp_cmd_sn); + login_rsp->max_cmdsn = be32_to_cpu(sess->max_cmd_sn); + mutex_unlock(&sess->cmdsn_mutex); + + return 0; +} + +static int iscsi_target_do_rx_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + u32 padding = 0, payload_length; + struct iscsi_login_req *login_req; + + if (iscsi_login_rx_data(conn, login->req, ISCSI_HDR_LEN) < 0) + return -1; + + login_req = (struct iscsi_login_req *) login->req; + payload_length = ntoh24(login_req->dlength); + login_req->tsih = be16_to_cpu(login_req->tsih); + login_req->itt = be32_to_cpu(login_req->itt); + login_req->cid = be16_to_cpu(login_req->cid); + login_req->cmdsn = be32_to_cpu(login_req->cmdsn); + login_req->exp_statsn = be32_to_cpu(login_req->exp_statsn); + + pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, CID: %hu, Length: %u\n", + login_req->flags, login_req->itt, login_req->cmdsn, + login_req->exp_statsn, login_req->cid, payload_length); + + if (iscsi_target_check_login_request(conn, login) < 0) + return -1; + + padding = ((-payload_length) & 3); + memset(login->req_buf, 0, MAX_KEY_VALUE_PAIRS); + + if (iscsi_login_rx_data( + conn, + login->req_buf, + payload_length + padding) < 0) + return -1; + + return 0; +} + +static int iscsi_target_do_login_io(struct iscsi_conn *conn, struct iscsi_login *login) +{ + if (iscsi_target_do_tx_login_io(conn, login) < 0) + return -1; + + if (iscsi_target_do_rx_login_io(conn, login) < 0) + return -1; + + return 0; +} + +static int iscsi_target_get_initial_payload( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + u32 padding = 0, payload_length; + struct iscsi_login_req *login_req; + + login_req = (struct iscsi_login_req *) login->req; + payload_length = ntoh24(login_req->dlength); + + pr_debug("Got Login Command, Flags 0x%02x, ITT: 0x%08x," + " CmdSN: 0x%08x, ExpStatSN: 0x%08x, Length: %u\n", + login_req->flags, login_req->itt, login_req->cmdsn, + login_req->exp_statsn, payload_length); + + if (iscsi_target_check_login_request(conn, login) < 0) + return -1; + + padding = ((-payload_length) & 3); + + if (iscsi_login_rx_data( + conn, + login->req_buf, + payload_length + padding) < 0) + return -1; + + return 0; +} + +/* + * NOTE: We check for existing sessions or connections AFTER the initiator + * has been successfully authenticated in order to protect against faked + * ISID/TSIH combinations. + */ +static int iscsi_target_check_for_existing_instances( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + if (login->checked_for_existing) + return 0; + + login->checked_for_existing = 1; + + if (!login->tsih) + return iscsi_check_for_session_reinstatement(conn); + else + return iscsi_login_post_auth_non_zero_tsih(conn, login->cid, + login->initial_exp_statsn); +} + +static int iscsi_target_do_authentication( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int authret; + u32 payload_length; + struct iscsi_param *param; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list); + if (!param) + return -1; + + authret = iscsi_handle_authentication( + conn, + login->req_buf, + login->rsp_buf, + payload_length, + &login->rsp_length, + param->value); + switch (authret) { + case 0: + pr_debug("Received OK response" + " from LIO Authentication, continuing.\n"); + break; + case 1: + pr_debug("iSCSI security negotiation" + " completed sucessfully.\n"); + login->auth_complete = 1; + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + login_rsp->flags |= (ISCSI_FLAG_LOGIN_NEXT_STAGE1 | + ISCSI_FLAG_LOGIN_TRANSIT); + login->current_stage = 1; + } + return iscsi_target_check_for_existing_instances( + conn, login); + case 2: + pr_err("Security negotiation" + " failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + default: + pr_err("Received unknown error %d from LIO" + " Authentication\n", authret); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + return -1; + } + + return 0; +} + +static int iscsi_target_handle_csg_zero( + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + int ret; + u32 payload_length; + struct iscsi_param *param; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + param = iscsi_find_param_from_key(AUTHMETHOD, conn->param_list); + if (!param) + return -1; + + ret = iscsi_decode_text_input( + PHASE_SECURITY|PHASE_DECLARATIVE, + SENDER_INITIATOR|SENDER_RECEIVER, + login->req_buf, + payload_length, + conn->param_list); + if (ret < 0) + return -1; + + if (ret > 0) { + if (login->auth_complete) { + pr_err("Initiator has already been" + " successfully authenticated, but is still" + " sending %s keys.\n", param->value); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_INIT_ERR); + return -1; + } + + goto do_auth; + } + + if (login->first_request) + if (iscsi_target_check_first_request(conn, login) < 0) + return -1; + + ret = iscsi_encode_text_output( + PHASE_SECURITY|PHASE_DECLARATIVE, + SENDER_TARGET, + login->rsp_buf, + &login->rsp_length, + conn->param_list); + if (ret < 0) + return -1; + + if (!iscsi_check_negotiated_keys(conn->param_list)) { + if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + !strncmp(param->value, NONE, 4)) { + pr_err("Initiator sent AuthMethod=None but" + " Target is enforcing iSCSI Authentication," + " login failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + } + + if (ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication && + !login->auth_complete) + return 0; + + if (strncmp(param->value, NONE, 4) && !login->auth_complete) + return 0; + + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE1) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) { + login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE1 | + ISCSI_FLAG_LOGIN_TRANSIT; + login->current_stage = 1; + } + } + + return 0; +do_auth: + return iscsi_target_do_authentication(conn, login); +} + +static int iscsi_target_handle_csg_one(struct iscsi_conn *conn, struct iscsi_login *login) +{ + int ret; + u32 payload_length; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + ret = iscsi_decode_text_input( + PHASE_OPERATIONAL|PHASE_DECLARATIVE, + SENDER_INITIATOR|SENDER_RECEIVER, + login->req_buf, + payload_length, + conn->param_list); + if (ret < 0) + return -1; + + if (login->first_request) + if (iscsi_target_check_first_request(conn, login) < 0) + return -1; + + if (iscsi_target_check_for_existing_instances(conn, login) < 0) + return -1; + + ret = iscsi_encode_text_output( + PHASE_OPERATIONAL|PHASE_DECLARATIVE, + SENDER_TARGET, + login->rsp_buf, + &login->rsp_length, + conn->param_list); + if (ret < 0) + return -1; + + if (!login->auth_complete && + ISCSI_TPG_ATTRIB(ISCSI_TPG_C(conn))->authentication) { + pr_err("Initiator is requesting CSG: 1, has not been" + " successfully authenticated, and the Target is" + " enforcing iSCSI Authentication, login failed.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_AUTH_FAILED); + return -1; + } + + if (!iscsi_check_negotiated_keys(conn->param_list)) + if ((login_req->flags & ISCSI_FLAG_LOGIN_NEXT_STAGE3) && + (login_req->flags & ISCSI_FLAG_LOGIN_TRANSIT)) + login_rsp->flags |= ISCSI_FLAG_LOGIN_NEXT_STAGE3 | + ISCSI_FLAG_LOGIN_TRANSIT; + + return 0; +} + +static int iscsi_target_do_login(struct iscsi_conn *conn, struct iscsi_login *login) +{ + int pdu_count = 0; + struct iscsi_login_req *login_req; + struct iscsi_login_rsp *login_rsp; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_login_rsp *) login->rsp; + + while (1) { + if (++pdu_count > MAX_LOGIN_PDUS) { + pr_err("MAX_LOGIN_PDUS count reached.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_TARGET_ERROR); + return -1; + } + + switch ((login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) { + case 0: + login_rsp->flags |= (0 & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK); + if (iscsi_target_handle_csg_zero(conn, login) < 0) + return -1; + break; + case 1: + login_rsp->flags |= ISCSI_FLAG_LOGIN_CURRENT_STAGE1; + if (iscsi_target_handle_csg_one(conn, login) < 0) + return -1; + if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + login->tsih = conn->sess->tsih; + if (iscsi_target_do_tx_login_io(conn, + login) < 0) + return -1; + return 0; + } + break; + default: + pr_err("Illegal CSG: %d received from" + " Initiator, protocol error.\n", + (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) + >> 2); + break; + } + + if (iscsi_target_do_login_io(conn, login) < 0) + return -1; + + if (login_rsp->flags & ISCSI_FLAG_LOGIN_TRANSIT) { + login_rsp->flags &= ~ISCSI_FLAG_LOGIN_TRANSIT; + login_rsp->flags &= ~ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK; + } + } + + return 0; +} + +static void iscsi_initiatorname_tolower( + char *param_buf) +{ + char *c; + u32 iqn_size = strlen(param_buf), i; + + for (i = 0; i < iqn_size; i++) { + c = (char *)¶m_buf[i]; + if (!isupper(*c)) + continue; + + *c = tolower(*c); + } +} + +/* + * Processes the first Login Request.. + */ +static int iscsi_target_locate_portal( + struct iscsi_np *np, + struct iscsi_conn *conn, + struct iscsi_login *login) +{ + char *i_buf = NULL, *s_buf = NULL, *t_buf = NULL; + char *tmpbuf, *start = NULL, *end = NULL, *key, *value; + struct iscsi_session *sess = conn->sess; + struct iscsi_tiqn *tiqn; + struct iscsi_login_req *login_req; + struct iscsi_targ_login_rsp *login_rsp; + u32 payload_length; + int sessiontype = 0, ret = 0; + + login_req = (struct iscsi_login_req *) login->req; + login_rsp = (struct iscsi_targ_login_rsp *) login->rsp; + payload_length = ntoh24(login_req->dlength); + + login->first_request = 1; + login->leading_connection = (!login_req->tsih) ? 1 : 0; + login->current_stage = + (login_req->flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2; + login->version_min = login_req->min_version; + login->version_max = login_req->max_version; + memcpy(login->isid, login_req->isid, 6); + login->cmd_sn = login_req->cmdsn; + login->init_task_tag = login_req->itt; + login->initial_exp_statsn = login_req->exp_statsn; + login->cid = login_req->cid; + login->tsih = login_req->tsih; + + if (iscsi_target_get_initial_payload(conn, login) < 0) + return -1; + + tmpbuf = kzalloc(payload_length + 1, GFP_KERNEL); + if (!tmpbuf) { + pr_err("Unable to allocate memory for tmpbuf.\n"); + return -1; + } + + memcpy(tmpbuf, login->req_buf, payload_length); + tmpbuf[payload_length] = '\0'; + start = tmpbuf; + end = (start + payload_length); + + /* + * Locate the initial keys expected from the Initiator node in + * the first login request in order to progress with the login phase. + */ + while (start < end) { + if (iscsi_extract_key_value(start, &key, &value) < 0) { + ret = -1; + goto out; + } + + if (!strncmp(key, "InitiatorName", 13)) + i_buf = value; + else if (!strncmp(key, "SessionType", 11)) + s_buf = value; + else if (!strncmp(key, "TargetName", 10)) + t_buf = value; + + start += strlen(key) + strlen(value) + 2; + } + + /* + * See 5.3. Login Phase. + */ + if (!i_buf) { + pr_err("InitiatorName key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + /* + * Convert the incoming InitiatorName to lowercase following + * RFC-3720 3.2.6.1. section c) that says that iSCSI IQNs + * are NOT case sensitive. + */ + iscsi_initiatorname_tolower(i_buf); + + if (!s_buf) { + if (!login->leading_connection) + goto get_target; + + pr_err("SessionType key not received" + " in first login request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + + /* + * Use default portal group for discovery sessions. + */ + sessiontype = strncmp(s_buf, DISCOVERY, 9); + if (!sessiontype) { + conn->tpg = iscsit_global->discovery_tpg; + if (!login->leading_connection) + goto get_target; + + sess->sess_ops->SessionType = 1; + /* + * Setup crc32c modules from libcrypto + */ + if (iscsi_login_setup_crypto(conn) < 0) { + pr_err("iscsi_login_setup_crypto() failed\n"); + ret = -1; + goto out; + } + /* + * Serialize access across the discovery struct iscsi_portal_group to + * process login attempt. + */ + if (iscsit_access_np(np, conn->tpg) < 0) { + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + ret = 0; + goto out; + } + +get_target: + if (!t_buf) { + pr_err("TargetName key not received" + " in first login request while" + " SessionType=Normal.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_MISSING_FIELDS); + ret = -1; + goto out; + } + + /* + * Locate Target IQN from Storage Node. + */ + tiqn = iscsit_get_tiqn_for_login(t_buf); + if (!tiqn) { + pr_err("Unable to locate Target IQN: %s in" + " Storage Node\n", t_buf); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + pr_debug("Located Storage Object: %s\n", tiqn->tiqn); + + /* + * Locate Target Portal Group from Storage Node. + */ + conn->tpg = iscsit_get_tpg_from_np(tiqn, np); + if (!conn->tpg) { + pr_err("Unable to locate Target Portal Group" + " on %s\n", tiqn->tiqn); + iscsit_put_tiqn_for_login(tiqn); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + goto out; + } + pr_debug("Located Portal Group Object: %hu\n", conn->tpg->tpgt); + /* + * Setup crc32c modules from libcrypto + */ + if (iscsi_login_setup_crypto(conn) < 0) { + pr_err("iscsi_login_setup_crypto() failed\n"); + ret = -1; + goto out; + } + /* + * Serialize access across the struct iscsi_portal_group to + * process login attempt. + */ + if (iscsit_access_np(np, conn->tpg) < 0) { + iscsit_put_tiqn_for_login(tiqn); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_SVC_UNAVAILABLE); + ret = -1; + conn->tpg = NULL; + goto out; + } + + /* + * conn->sess->node_acl will be set when the referenced + * struct iscsi_session is located from received ISID+TSIH in + * iscsi_login_non_zero_tsih_s2(). + */ + if (!login->leading_connection) { + ret = 0; + goto out; + } + + /* + * This value is required in iscsi_login_zero_tsih_s2() + */ + sess->sess_ops->SessionType = 0; + + /* + * Locate incoming Initiator IQN reference from Storage Node. + */ + sess->se_sess->se_node_acl = core_tpg_check_initiator_node_acl( + &conn->tpg->tpg_se_tpg, i_buf); + if (!sess->se_sess->se_node_acl) { + pr_err("iSCSI Initiator Node: %s is not authorized to" + " access iSCSI target portal group: %hu.\n", + i_buf, conn->tpg->tpgt); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_INITIATOR_ERR, + ISCSI_LOGIN_STATUS_TGT_FORBIDDEN); + ret = -1; + goto out; + } + + ret = 0; +out: + kfree(tmpbuf); + return ret; +} + +struct iscsi_login *iscsi_target_init_negotiation( + struct iscsi_np *np, + struct iscsi_conn *conn, + char *login_pdu) +{ + struct iscsi_login *login; + + login = kzalloc(sizeof(struct iscsi_login), GFP_KERNEL); + if (!login) { + pr_err("Unable to allocate memory for struct iscsi_login.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + + login->req = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!login->req) { + pr_err("Unable to allocate memory for Login Request.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + memcpy(login->req, login_pdu, ISCSI_HDR_LEN); + + login->req_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL); + if (!login->req_buf) { + pr_err("Unable to allocate memory for response buffer.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + goto out; + } + /* + * SessionType: Discovery + * + * Locates Default Portal + * + * SessionType: Normal + * + * Locates Target Portal from NP -> Target IQN + */ + if (iscsi_target_locate_portal(np, conn, login) < 0) { + pr_err("iSCSI Login negotiation failed.\n"); + goto out; + } + + return login; +out: + kfree(login->req); + kfree(login->req_buf); + kfree(login); + + return NULL; +} + +int iscsi_target_start_negotiation( + struct iscsi_login *login, + struct iscsi_conn *conn) +{ + int ret = -1; + + login->rsp = kzalloc(ISCSI_HDR_LEN, GFP_KERNEL); + if (!login->rsp) { + pr_err("Unable to allocate memory for" + " Login Response.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + goto out; + } + + login->rsp_buf = kzalloc(MAX_KEY_VALUE_PAIRS, GFP_KERNEL); + if (!login->rsp_buf) { + pr_err("Unable to allocate memory for" + " request buffer.\n"); + iscsit_tx_login_rsp(conn, ISCSI_STATUS_CLS_TARGET_ERR, + ISCSI_LOGIN_STATUS_NO_RESOURCES); + ret = -1; + goto out; + } + + ret = iscsi_target_do_login(conn, login); +out: + if (ret != 0) + iscsi_remove_failed_auth_entry(conn); + + iscsi_target_nego_release(login, conn); + return ret; +} + +void iscsi_target_nego_release( + struct iscsi_login *login, + struct iscsi_conn *conn) +{ + kfree(login->req); + kfree(login->rsp); + kfree(login->req_buf); + kfree(login->rsp_buf); + kfree(login); +} diff --git a/drivers/target/iscsi/iscsi_target_nego.h b/drivers/target/iscsi/iscsi_target_nego.h new file mode 100644 index 000000000000..92e133a5158f --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nego.h @@ -0,0 +1,17 @@ +#ifndef ISCSI_TARGET_NEGO_H +#define ISCSI_TARGET_NEGO_H + +#define DECIMAL 0 +#define HEX 1 + +extern void convert_null_to_semi(char *, int); +extern int extract_param(const char *, const char *, unsigned int, char *, + unsigned char *); +extern struct iscsi_login *iscsi_target_init_negotiation( + struct iscsi_np *, struct iscsi_conn *, char *); +extern int iscsi_target_start_negotiation( + struct iscsi_login *, struct iscsi_conn *); +extern void iscsi_target_nego_release( + struct iscsi_login *, struct iscsi_conn *); + +#endif /* ISCSI_TARGET_NEGO_H */ diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.c b/drivers/target/iscsi/iscsi_target_nodeattrib.c new file mode 100644 index 000000000000..aeafbe0cd7d1 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.c @@ -0,0 +1,263 @@ +/******************************************************************************* + * This file contains the main functions related to Initiator Node Attributes. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_nodeattrib.h" + +static inline char *iscsit_na_get_initiatorname( + struct iscsi_node_acl *nacl) +{ + struct se_node_acl *se_nacl = &nacl->se_node_acl; + + return &se_nacl->initiatorname[0]; +} + +void iscsit_set_default_node_attribues( + struct iscsi_node_acl *acl) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + a->dataout_timeout = NA_DATAOUT_TIMEOUT; + a->dataout_timeout_retries = NA_DATAOUT_TIMEOUT_RETRIES; + a->nopin_timeout = NA_NOPIN_TIMEOUT; + a->nopin_response_timeout = NA_NOPIN_RESPONSE_TIMEOUT; + a->random_datain_pdu_offsets = NA_RANDOM_DATAIN_PDU_OFFSETS; + a->random_datain_seq_offsets = NA_RANDOM_DATAIN_SEQ_OFFSETS; + a->random_r2t_offsets = NA_RANDOM_R2T_OFFSETS; + a->default_erl = NA_DEFAULT_ERL; +} + +extern int iscsit_na_dataout_timeout( + struct iscsi_node_acl *acl, + u32 dataout_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (dataout_timeout > NA_DATAOUT_TIMEOUT_MAX) { + pr_err("Requested DataOut Timeout %u larger than" + " maximum %u\n", dataout_timeout, + NA_DATAOUT_TIMEOUT_MAX); + return -EINVAL; + } else if (dataout_timeout < NA_DATAOUT_TIMEOUT_MIX) { + pr_err("Requested DataOut Timeout %u smaller than" + " minimum %u\n", dataout_timeout, + NA_DATAOUT_TIMEOUT_MIX); + return -EINVAL; + } + + a->dataout_timeout = dataout_timeout; + pr_debug("Set DataOut Timeout to %u for Initiator Node" + " %s\n", a->dataout_timeout, iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_dataout_timeout_retries( + struct iscsi_node_acl *acl, + u32 dataout_timeout_retries) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (dataout_timeout_retries > NA_DATAOUT_TIMEOUT_RETRIES_MAX) { + pr_err("Requested DataOut Timeout Retries %u larger" + " than maximum %u", dataout_timeout_retries, + NA_DATAOUT_TIMEOUT_RETRIES_MAX); + return -EINVAL; + } else if (dataout_timeout_retries < NA_DATAOUT_TIMEOUT_RETRIES_MIN) { + pr_err("Requested DataOut Timeout Retries %u smaller" + " than minimum %u", dataout_timeout_retries, + NA_DATAOUT_TIMEOUT_RETRIES_MIN); + return -EINVAL; + } + + a->dataout_timeout_retries = dataout_timeout_retries; + pr_debug("Set DataOut Timeout Retries to %u for" + " Initiator Node %s\n", a->dataout_timeout_retries, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_nopin_timeout( + struct iscsi_node_acl *acl, + u32 nopin_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + struct iscsi_session *sess; + struct iscsi_conn *conn; + struct se_node_acl *se_nacl = &a->nacl->se_node_acl; + struct se_session *se_sess; + u32 orig_nopin_timeout = a->nopin_timeout; + + if (nopin_timeout > NA_NOPIN_TIMEOUT_MAX) { + pr_err("Requested NopIn Timeout %u larger than maximum" + " %u\n", nopin_timeout, NA_NOPIN_TIMEOUT_MAX); + return -EINVAL; + } else if ((nopin_timeout < NA_NOPIN_TIMEOUT_MIN) && + (nopin_timeout != 0)) { + pr_err("Requested NopIn Timeout %u smaller than" + " minimum %u and not 0\n", nopin_timeout, + NA_NOPIN_TIMEOUT_MIN); + return -EINVAL; + } + + a->nopin_timeout = nopin_timeout; + pr_debug("Set NopIn Timeout to %u for Initiator" + " Node %s\n", a->nopin_timeout, + iscsit_na_get_initiatorname(acl)); + /* + * Reenable disabled nopin_timeout timer for all iSCSI connections. + */ + if (!orig_nopin_timeout) { + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + + spin_lock(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, + conn_list) { + if (conn->conn_state != + TARG_CONN_STATE_LOGGED_IN) + continue; + + spin_lock(&conn->nopin_timer_lock); + __iscsit_start_nopin_timer(conn); + spin_unlock(&conn->nopin_timer_lock); + } + spin_unlock(&sess->conn_lock); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + } + + return 0; +} + +extern int iscsit_na_nopin_response_timeout( + struct iscsi_node_acl *acl, + u32 nopin_response_timeout) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (nopin_response_timeout > NA_NOPIN_RESPONSE_TIMEOUT_MAX) { + pr_err("Requested NopIn Response Timeout %u larger" + " than maximum %u\n", nopin_response_timeout, + NA_NOPIN_RESPONSE_TIMEOUT_MAX); + return -EINVAL; + } else if (nopin_response_timeout < NA_NOPIN_RESPONSE_TIMEOUT_MIN) { + pr_err("Requested NopIn Response Timeout %u smaller" + " than minimum %u\n", nopin_response_timeout, + NA_NOPIN_RESPONSE_TIMEOUT_MIN); + return -EINVAL; + } + + a->nopin_response_timeout = nopin_response_timeout; + pr_debug("Set NopIn Response Timeout to %u for" + " Initiator Node %s\n", a->nopin_timeout, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_datain_pdu_offsets( + struct iscsi_node_acl *acl, + u32 random_datain_pdu_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_datain_pdu_offsets != 0 && random_datain_pdu_offsets != 1) { + pr_err("Requested Random DataIN PDU Offsets: %u not" + " 0 or 1\n", random_datain_pdu_offsets); + return -EINVAL; + } + + a->random_datain_pdu_offsets = random_datain_pdu_offsets; + pr_debug("Set Random DataIN PDU Offsets to %u for" + " Initiator Node %s\n", a->random_datain_pdu_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_datain_seq_offsets( + struct iscsi_node_acl *acl, + u32 random_datain_seq_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_datain_seq_offsets != 0 && random_datain_seq_offsets != 1) { + pr_err("Requested Random DataIN Sequence Offsets: %u" + " not 0 or 1\n", random_datain_seq_offsets); + return -EINVAL; + } + + a->random_datain_seq_offsets = random_datain_seq_offsets; + pr_debug("Set Random DataIN Sequence Offsets to %u for" + " Initiator Node %s\n", a->random_datain_seq_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_random_r2t_offsets( + struct iscsi_node_acl *acl, + u32 random_r2t_offsets) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (random_r2t_offsets != 0 && random_r2t_offsets != 1) { + pr_err("Requested Random R2T Offsets: %u not" + " 0 or 1\n", random_r2t_offsets); + return -EINVAL; + } + + a->random_r2t_offsets = random_r2t_offsets; + pr_debug("Set Random R2T Offsets to %u for" + " Initiator Node %s\n", a->random_r2t_offsets, + iscsit_na_get_initiatorname(acl)); + + return 0; +} + +extern int iscsit_na_default_erl( + struct iscsi_node_acl *acl, + u32 default_erl) +{ + struct iscsi_node_attrib *a = &acl->node_attrib; + + if (default_erl != 0 && default_erl != 1 && default_erl != 2) { + pr_err("Requested default ERL: %u not 0, 1, or 2\n", + default_erl); + return -EINVAL; + } + + a->default_erl = default_erl; + pr_debug("Set use ERL0 flag to %u for Initiator" + " Node %s\n", a->default_erl, + iscsit_na_get_initiatorname(acl)); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_nodeattrib.h b/drivers/target/iscsi/iscsi_target_nodeattrib.h new file mode 100644 index 000000000000..c970b326ef23 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_nodeattrib.h @@ -0,0 +1,14 @@ +#ifndef ISCSI_TARGET_NODEATTRIB_H +#define ISCSI_TARGET_NODEATTRIB_H + +extern void iscsit_set_default_node_attribues(struct iscsi_node_acl *); +extern int iscsit_na_dataout_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_dataout_timeout_retries(struct iscsi_node_acl *, u32); +extern int iscsit_na_nopin_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_nopin_response_timeout(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_datain_pdu_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_datain_seq_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_random_r2t_offsets(struct iscsi_node_acl *, u32); +extern int iscsit_na_default_erl(struct iscsi_node_acl *, u32); + +#endif /* ISCSI_TARGET_NODEATTRIB_H */ diff --git a/drivers/target/iscsi/iscsi_target_parameters.c b/drivers/target/iscsi/iscsi_target_parameters.c new file mode 100644 index 000000000000..252e246cf51e --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_parameters.c @@ -0,0 +1,1905 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI Parameter negotiation. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/slab.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_util.h" +#include "iscsi_target_parameters.h" + +int iscsi_login_rx_data( + struct iscsi_conn *conn, + char *buf, + int length) +{ + int rx_got; + struct kvec iov; + + memset(&iov, 0, sizeof(struct kvec)); + iov.iov_len = length; + iov.iov_base = buf; + + /* + * Initial Marker-less Interval. + * Add the values regardless of IFMarker/OFMarker, considering + * it may not be negoitated yet. + */ + conn->of_marker += length; + + rx_got = rx_data(conn, &iov, 1, length); + if (rx_got != length) { + pr_err("rx_data returned %d, expecting %d.\n", + rx_got, length); + return -1; + } + + return 0 ; +} + +int iscsi_login_tx_data( + struct iscsi_conn *conn, + char *pdu_buf, + char *text_buf, + int text_length) +{ + int length, tx_sent; + struct kvec iov[2]; + + length = (ISCSI_HDR_LEN + text_length); + + memset(&iov[0], 0, 2 * sizeof(struct kvec)); + iov[0].iov_len = ISCSI_HDR_LEN; + iov[0].iov_base = pdu_buf; + iov[1].iov_len = text_length; + iov[1].iov_base = text_buf; + + /* + * Initial Marker-less Interval. + * Add the values regardless of IFMarker/OFMarker, considering + * it may not be negoitated yet. + */ + conn->if_marker += length; + + tx_sent = tx_data(conn, &iov[0], 2, length); + if (tx_sent != length) { + pr_err("tx_data returned %d, expecting %d.\n", + tx_sent, length); + return -1; + } + + return 0; +} + +void iscsi_dump_conn_ops(struct iscsi_conn_ops *conn_ops) +{ + pr_debug("HeaderDigest: %s\n", (conn_ops->HeaderDigest) ? + "CRC32C" : "None"); + pr_debug("DataDigest: %s\n", (conn_ops->DataDigest) ? + "CRC32C" : "None"); + pr_debug("MaxRecvDataSegmentLength: %u\n", + conn_ops->MaxRecvDataSegmentLength); + pr_debug("OFMarker: %s\n", (conn_ops->OFMarker) ? "Yes" : "No"); + pr_debug("IFMarker: %s\n", (conn_ops->IFMarker) ? "Yes" : "No"); + if (conn_ops->OFMarker) + pr_debug("OFMarkInt: %u\n", conn_ops->OFMarkInt); + if (conn_ops->IFMarker) + pr_debug("IFMarkInt: %u\n", conn_ops->IFMarkInt); +} + +void iscsi_dump_sess_ops(struct iscsi_sess_ops *sess_ops) +{ + pr_debug("InitiatorName: %s\n", sess_ops->InitiatorName); + pr_debug("InitiatorAlias: %s\n", sess_ops->InitiatorAlias); + pr_debug("TargetName: %s\n", sess_ops->TargetName); + pr_debug("TargetAlias: %s\n", sess_ops->TargetAlias); + pr_debug("TargetPortalGroupTag: %hu\n", + sess_ops->TargetPortalGroupTag); + pr_debug("MaxConnections: %hu\n", sess_ops->MaxConnections); + pr_debug("InitialR2T: %s\n", + (sess_ops->InitialR2T) ? "Yes" : "No"); + pr_debug("ImmediateData: %s\n", (sess_ops->ImmediateData) ? + "Yes" : "No"); + pr_debug("MaxBurstLength: %u\n", sess_ops->MaxBurstLength); + pr_debug("FirstBurstLength: %u\n", sess_ops->FirstBurstLength); + pr_debug("DefaultTime2Wait: %hu\n", sess_ops->DefaultTime2Wait); + pr_debug("DefaultTime2Retain: %hu\n", + sess_ops->DefaultTime2Retain); + pr_debug("MaxOutstandingR2T: %hu\n", + sess_ops->MaxOutstandingR2T); + pr_debug("DataPDUInOrder: %s\n", + (sess_ops->DataPDUInOrder) ? "Yes" : "No"); + pr_debug("DataSequenceInOrder: %s\n", + (sess_ops->DataSequenceInOrder) ? "Yes" : "No"); + pr_debug("ErrorRecoveryLevel: %hu\n", + sess_ops->ErrorRecoveryLevel); + pr_debug("SessionType: %s\n", (sess_ops->SessionType) ? + "Discovery" : "Normal"); +} + +void iscsi_print_params(struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) + pr_debug("%s: %s\n", param->name, param->value); +} + +static struct iscsi_param *iscsi_set_default_param(struct iscsi_param_list *param_list, + char *name, char *value, u8 phase, u8 scope, u8 sender, + u16 type_range, u8 use) +{ + struct iscsi_param *param = NULL; + + param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); + if (!param) { + pr_err("Unable to allocate memory for parameter.\n"); + goto out; + } + INIT_LIST_HEAD(¶m->p_list); + + param->name = kzalloc(strlen(name) + 1, GFP_KERNEL); + if (!param->name) { + pr_err("Unable to allocate memory for parameter name.\n"); + goto out; + } + + param->value = kzalloc(strlen(value) + 1, GFP_KERNEL); + if (!param->value) { + pr_err("Unable to allocate memory for parameter value.\n"); + goto out; + } + + memcpy(param->name, name, strlen(name)); + param->name[strlen(name)] = '\0'; + memcpy(param->value, value, strlen(value)); + param->value[strlen(value)] = '\0'; + param->phase = phase; + param->scope = scope; + param->sender = sender; + param->use = use; + param->type_range = type_range; + + switch (param->type_range) { + case TYPERANGE_BOOL_AND: + param->type = TYPE_BOOL_AND; + break; + case TYPERANGE_BOOL_OR: + param->type = TYPE_BOOL_OR; + break; + case TYPERANGE_0_TO_2: + case TYPERANGE_0_TO_3600: + case TYPERANGE_0_TO_32767: + case TYPERANGE_0_TO_65535: + case TYPERANGE_1_TO_65535: + case TYPERANGE_2_TO_3600: + case TYPERANGE_512_TO_16777215: + param->type = TYPE_NUMBER; + break; + case TYPERANGE_AUTH: + case TYPERANGE_DIGEST: + param->type = TYPE_VALUE_LIST | TYPE_STRING; + break; + case TYPERANGE_MARKINT: + param->type = TYPE_NUMBER_RANGE; + param->type_range |= TYPERANGE_1_TO_65535; + break; + case TYPERANGE_ISCSINAME: + case TYPERANGE_SESSIONTYPE: + case TYPERANGE_TARGETADDRESS: + case TYPERANGE_UTF8: + param->type = TYPE_STRING; + break; + default: + pr_err("Unknown type_range 0x%02x\n", + param->type_range); + goto out; + } + list_add_tail(¶m->p_list, ¶m_list->param_list); + + return param; +out: + if (param) { + kfree(param->value); + kfree(param->name); + kfree(param); + } + + return NULL; +} + +/* #warning Add extension keys */ +int iscsi_create_default_params(struct iscsi_param_list **param_list_ptr) +{ + struct iscsi_param *param = NULL; + struct iscsi_param_list *pl; + + pl = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); + if (!pl) { + pr_err("Unable to allocate memory for" + " struct iscsi_param_list.\n"); + return -1 ; + } + INIT_LIST_HEAD(&pl->param_list); + INIT_LIST_HEAD(&pl->extra_response_list); + + /* + * The format for setting the initial parameter definitions are: + * + * Parameter name: + * Initial value: + * Allowable phase: + * Scope: + * Allowable senders: + * Typerange: + * Use: + */ + param = iscsi_set_default_param(pl, AUTHMETHOD, INITIAL_AUTHMETHOD, + PHASE_SECURITY, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_AUTH, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, HEADERDIGEST, INITIAL_HEADERDIGEST, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_DIGEST, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATADIGEST, INITIAL_DATADIGEST, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_DIGEST, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXCONNECTIONS, + INITIAL_MAXCONNECTIONS, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_1_TO_65535, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, SENDTARGETS, INITIAL_SENDTARGETS, + PHASE_FFP0, SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_UTF8, 0); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETNAME, INITIAL_TARGETNAME, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_ISCSINAME, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIATORNAME, + INITIAL_INITIATORNAME, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_ISCSINAME, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETALIAS, INITIAL_TARGETALIAS, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_UTF8, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIATORALIAS, + INITIAL_INITIATORALIAS, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_INITIATOR, TYPERANGE_UTF8, + USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETADDRESS, + INITIAL_TARGETADDRESS, PHASE_DECLARATIVE, + SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_TARGETADDRESS, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, TARGETPORTALGROUPTAG, + INITIAL_TARGETPORTALGROUPTAG, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_TARGET, + TYPERANGE_0_TO_65535, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, INITIALR2T, INITIAL_INITIALR2T, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_BOOL_OR, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IMMEDIATEDATA, + INITIAL_IMMEDIATEDATA, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_AND, + USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXRECVDATASEGMENTLENGTH, + INITIAL_MAXRECVDATASEGMENTLENGTH, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_ALL); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXBURSTLENGTH, + INITIAL_MAXBURSTLENGTH, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, FIRSTBURSTLENGTH, + INITIAL_FIRSTBURSTLENGTH, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_512_TO_16777215, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DEFAULTTIME2WAIT, + INITIAL_DEFAULTTIME2WAIT, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_3600, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DEFAULTTIME2RETAIN, + INITIAL_DEFAULTTIME2RETAIN, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_3600, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, MAXOUTSTANDINGR2T, + INITIAL_MAXOUTSTANDINGR2T, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_1_TO_65535, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATAPDUINORDER, + INITIAL_DATAPDUINORDER, PHASE_OPERATIONAL, + SCOPE_SESSION_WIDE, SENDER_BOTH, TYPERANGE_BOOL_OR, + USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, DATASEQUENCEINORDER, + INITIAL_DATASEQUENCEINORDER, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_BOOL_OR, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, ERRORRECOVERYLEVEL, + INITIAL_ERRORRECOVERYLEVEL, + PHASE_OPERATIONAL, SCOPE_SESSION_WIDE, SENDER_BOTH, + TYPERANGE_0_TO_2, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, SESSIONTYPE, INITIAL_SESSIONTYPE, + PHASE_DECLARATIVE, SCOPE_SESSION_WIDE, SENDER_INITIATOR, + TYPERANGE_SESSIONTYPE, USE_LEADING_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IFMARKER, INITIAL_IFMARKER, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_BOOL_AND, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, OFMARKER, INITIAL_OFMARKER, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_BOOL_AND, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, IFMARKINT, INITIAL_IFMARKINT, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_MARKINT, USE_INITIAL_ONLY); + if (!param) + goto out; + + param = iscsi_set_default_param(pl, OFMARKINT, INITIAL_OFMARKINT, + PHASE_OPERATIONAL, SCOPE_CONNECTION_ONLY, SENDER_BOTH, + TYPERANGE_MARKINT, USE_INITIAL_ONLY); + if (!param) + goto out; + + *param_list_ptr = pl; + return 0; +out: + iscsi_release_param_list(pl); + return -1; +} + +int iscsi_set_keys_to_negotiate( + int sessiontype, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + param->state = 0; + if (!strcmp(param->name, AUTHMETHOD)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, HEADERDIGEST)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATADIGEST)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXCONNECTIONS)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, TARGETNAME)) { + continue; + } else if (!strcmp(param->name, INITIATORNAME)) { + continue; + } else if (!strcmp(param->name, TARGETALIAS)) { + if (param->value) + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, INITIATORALIAS)) { + continue; + } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, INITIALR2T)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IMMEDIATEDATA)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXBURSTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATAPDUINORDER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, DATASEQUENCEINORDER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, SESSIONTYPE)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IFMARKER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, OFMARKER)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, IFMARKINT)) { + SET_PSTATE_NEGOTIATE(param); + } else if (!strcmp(param->name, OFMARKINT)) { + SET_PSTATE_NEGOTIATE(param); + } + } + + return 0; +} + +int iscsi_set_keys_irrelevant_for_discovery( + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!strcmp(param->name, MAXCONNECTIONS)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, INITIALR2T)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IMMEDIATEDATA)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, MAXBURSTLENGTH)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, FIRSTBURSTLENGTH)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DATAPDUINORDER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DATASEQUENCEINORDER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DEFAULTTIME2WAIT)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IFMARKER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, OFMARKER)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, IFMARKINT)) + param->state &= ~PSTATE_NEGOTIATE; + else if (!strcmp(param->name, OFMARKINT)) + param->state &= ~PSTATE_NEGOTIATE; + } + + return 0; +} + +int iscsi_copy_param_list( + struct iscsi_param_list **dst_param_list, + struct iscsi_param_list *src_param_list, + int leading) +{ + struct iscsi_param *new_param = NULL, *param = NULL; + struct iscsi_param_list *param_list = NULL; + + param_list = kzalloc(sizeof(struct iscsi_param_list), GFP_KERNEL); + if (!param_list) { + pr_err("Unable to allocate memory for" + " struct iscsi_param_list.\n"); + goto err_out; + } + INIT_LIST_HEAD(¶m_list->param_list); + INIT_LIST_HEAD(¶m_list->extra_response_list); + + list_for_each_entry(param, &src_param_list->param_list, p_list) { + if (!leading && (param->scope & SCOPE_SESSION_WIDE)) { + if ((strcmp(param->name, "TargetName") != 0) && + (strcmp(param->name, "InitiatorName") != 0) && + (strcmp(param->name, "TargetPortalGroupTag") != 0)) + continue; + } + + new_param = kzalloc(sizeof(struct iscsi_param), GFP_KERNEL); + if (!new_param) { + pr_err("Unable to allocate memory for" + " struct iscsi_param.\n"); + goto err_out; + } + + new_param->set_param = param->set_param; + new_param->phase = param->phase; + new_param->scope = param->scope; + new_param->sender = param->sender; + new_param->type = param->type; + new_param->use = param->use; + new_param->type_range = param->type_range; + + new_param->name = kzalloc(strlen(param->name) + 1, GFP_KERNEL); + if (!new_param->name) { + pr_err("Unable to allocate memory for" + " parameter name.\n"); + goto err_out; + } + + new_param->value = kzalloc(strlen(param->value) + 1, + GFP_KERNEL); + if (!new_param->value) { + pr_err("Unable to allocate memory for" + " parameter value.\n"); + goto err_out; + } + + memcpy(new_param->name, param->name, strlen(param->name)); + new_param->name[strlen(param->name)] = '\0'; + memcpy(new_param->value, param->value, strlen(param->value)); + new_param->value[strlen(param->value)] = '\0'; + + list_add_tail(&new_param->p_list, ¶m_list->param_list); + } + + if (!list_empty(¶m_list->param_list)) + *dst_param_list = param_list; + else { + pr_err("No parameters allocated.\n"); + goto err_out; + } + + return 0; + +err_out: + iscsi_release_param_list(param_list); + return -1; +} + +static void iscsi_release_extra_responses(struct iscsi_param_list *param_list) +{ + struct iscsi_extra_response *er, *er_tmp; + + list_for_each_entry_safe(er, er_tmp, ¶m_list->extra_response_list, + er_list) { + list_del(&er->er_list); + kfree(er); + } +} + +void iscsi_release_param_list(struct iscsi_param_list *param_list) +{ + struct iscsi_param *param, *param_tmp; + + list_for_each_entry_safe(param, param_tmp, ¶m_list->param_list, + p_list) { + list_del(¶m->p_list); + + kfree(param->name); + param->name = NULL; + kfree(param->value); + param->value = NULL; + kfree(param); + param = NULL; + } + + iscsi_release_extra_responses(param_list); + + kfree(param_list); +} + +struct iscsi_param *iscsi_find_param_from_key( + char *key, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + if (!key || !param_list) { + pr_err("Key or parameter list pointer is NULL.\n"); + return NULL; + } + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!strcmp(key, param->name)) + return param; + } + + pr_err("Unable to locate key \"%s\".\n", key); + return NULL; +} + +int iscsi_extract_key_value(char *textbuf, char **key, char **value) +{ + *value = strchr(textbuf, '='); + if (!*value) { + pr_err("Unable to locate \"=\" seperator for key," + " ignoring request.\n"); + return -1; + } + + *key = textbuf; + **value = '\0'; + *value = *value + 1; + + return 0; +} + +int iscsi_update_param_value(struct iscsi_param *param, char *value) +{ + kfree(param->value); + + param->value = kzalloc(strlen(value) + 1, GFP_KERNEL); + if (!param->value) { + pr_err("Unable to allocate memory for value.\n"); + return -1; + } + + memcpy(param->value, value, strlen(value)); + param->value[strlen(value)] = '\0'; + + pr_debug("iSCSI Parameter updated to %s=%s\n", + param->name, param->value); + return 0; +} + +static int iscsi_add_notunderstood_response( + char *key, + char *value, + struct iscsi_param_list *param_list) +{ + struct iscsi_extra_response *extra_response; + + if (strlen(value) > VALUE_MAXLEN) { + pr_err("Value for notunderstood key \"%s\" exceeds %d," + " protocol error.\n", key, VALUE_MAXLEN); + return -1; + } + + extra_response = kzalloc(sizeof(struct iscsi_extra_response), GFP_KERNEL); + if (!extra_response) { + pr_err("Unable to allocate memory for" + " struct iscsi_extra_response.\n"); + return -1; + } + INIT_LIST_HEAD(&extra_response->er_list); + + strncpy(extra_response->key, key, strlen(key) + 1); + strncpy(extra_response->value, NOTUNDERSTOOD, + strlen(NOTUNDERSTOOD) + 1); + + list_add_tail(&extra_response->er_list, + ¶m_list->extra_response_list); + return 0; +} + +static int iscsi_check_for_auth_key(char *key) +{ + /* + * RFC 1994 + */ + if (!strcmp(key, "CHAP_A") || !strcmp(key, "CHAP_I") || + !strcmp(key, "CHAP_C") || !strcmp(key, "CHAP_N") || + !strcmp(key, "CHAP_R")) + return 1; + + /* + * RFC 2945 + */ + if (!strcmp(key, "SRP_U") || !strcmp(key, "SRP_N") || + !strcmp(key, "SRP_g") || !strcmp(key, "SRP_s") || + !strcmp(key, "SRP_A") || !strcmp(key, "SRP_B") || + !strcmp(key, "SRP_M") || !strcmp(key, "SRP_HM")) + return 1; + + return 0; +} + +static void iscsi_check_proposer_for_optional_reply(struct iscsi_param *param) +{ + if (IS_TYPE_BOOL_AND(param)) { + if (!strcmp(param->value, NO)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_BOOL_OR(param)) { + if (!strcmp(param->value, YES)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, IMMEDIATEDATA)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_NUMBER(param)) { + if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * The GlobalSAN iSCSI Initiator for MacOSX does + * not respond to MaxBurstLength, FirstBurstLength, + * DefaultTime2Wait or DefaultTime2Retain parameter keys. + * So, we set them to 'reply optional' here, and assume the + * the defaults from iscsi_parameters.h if the initiator + * is not RFC compliant and the keys are not negotiated. + */ + if (!strcmp(param->name, MAXBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, FIRSTBURSTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, DEFAULTTIME2WAIT)) + SET_PSTATE_REPLY_OPTIONAL(param); + if (!strcmp(param->name, DEFAULTTIME2RETAIN)) + SET_PSTATE_REPLY_OPTIONAL(param); + /* + * Required for gPXE iSCSI boot client + */ + if (!strcmp(param->name, MAXCONNECTIONS)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_PHASE_DECLARATIVE(param)) + SET_PSTATE_REPLY_OPTIONAL(param); +} + +static int iscsi_check_boolean_value(struct iscsi_param *param, char *value) +{ + if (strcmp(value, YES) && strcmp(value, NO)) { + pr_err("Illegal value for \"%s\", must be either" + " \"%s\" or \"%s\".\n", param->name, YES, NO); + return -1; + } + + return 0; +} + +static int iscsi_check_numerical_value(struct iscsi_param *param, char *value_ptr) +{ + char *tmpptr; + int value = 0; + + value = simple_strtoul(value_ptr, &tmpptr, 0); + +/* #warning FIXME: Fix this */ +#if 0 + if (strspn(endptr, WHITE_SPACE) != strlen(endptr)) { + pr_err("Illegal value \"%s\" for \"%s\".\n", + value, param->name); + return -1; + } +#endif + if (IS_TYPERANGE_0_TO_2(param)) { + if ((value < 0) || (value > 2)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 2.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_3600(param)) { + if ((value < 0) || (value > 3600)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 3600.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_32767(param)) { + if ((value < 0) || (value > 32767)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 32767.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_0_TO_65535(param)) { + if ((value < 0) || (value > 65535)) { + pr_err("Illegal value for \"%s\", must be" + " between 0 and 65535.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_1_TO_65535(param)) { + if ((value < 1) || (value > 65535)) { + pr_err("Illegal value for \"%s\", must be" + " between 1 and 65535.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_2_TO_3600(param)) { + if ((value < 2) || (value > 3600)) { + pr_err("Illegal value for \"%s\", must be" + " between 2 and 3600.\n", param->name); + return -1; + } + return 0; + } + if (IS_TYPERANGE_512_TO_16777215(param)) { + if ((value < 512) || (value > 16777215)) { + pr_err("Illegal value for \"%s\", must be" + " between 512 and 16777215.\n", param->name); + return -1; + } + return 0; + } + + return 0; +} + +static int iscsi_check_numerical_range_value(struct iscsi_param *param, char *value) +{ + char *left_val_ptr = NULL, *right_val_ptr = NULL; + char *tilde_ptr = NULL, *tmp_ptr = NULL; + u32 left_val, right_val, local_left_val, local_right_val; + + if (strcmp(param->name, IFMARKINT) && + strcmp(param->name, OFMARKINT)) { + pr_err("Only parameters \"%s\" or \"%s\" may contain a" + " numerical range value.\n", IFMARKINT, OFMARKINT); + return -1; + } + + if (IS_PSTATE_PROPOSER(param)) + return 0; + + tilde_ptr = strchr(value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range indicator" + " \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = value; + right_val_ptr = value + strlen(left_val_ptr) + 1; + + if (iscsi_check_numerical_value(param, left_val_ptr) < 0) + return -1; + if (iscsi_check_numerical_value(param, right_val_ptr) < 0) + return -1; + + left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + *tilde_ptr = '~'; + + if (right_val < left_val) { + pr_err("Numerical range for parameter \"%s\" contains" + " a right value which is less than the left.\n", + param->name); + return -1; + } + + /* + * For now, enforce reasonable defaults for [I,O]FMarkInt. + */ + tilde_ptr = strchr(param->value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range indicator" + " \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = param->value; + right_val_ptr = param->value + strlen(left_val_ptr) + 1; + + local_left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + local_right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + *tilde_ptr = '~'; + + if (param->set_param) { + if ((left_val < local_left_val) || + (right_val < local_left_val)) { + pr_err("Passed value range \"%u~%u\" is below" + " minimum left value \"%u\" for key \"%s\"," + " rejecting.\n", left_val, right_val, + local_left_val, param->name); + return -1; + } + } else { + if ((left_val < local_left_val) && + (right_val < local_left_val)) { + pr_err("Received value range \"%u~%u\" is" + " below minimum left value \"%u\" for key" + " \"%s\", rejecting.\n", left_val, right_val, + local_left_val, param->name); + SET_PSTATE_REJECT(param); + if (iscsi_update_param_value(param, REJECT) < 0) + return -1; + } + } + + return 0; +} + +static int iscsi_check_string_or_list_value(struct iscsi_param *param, char *value) +{ + if (IS_PSTATE_PROPOSER(param)) + return 0; + + if (IS_TYPERANGE_AUTH_PARAM(param)) { + if (strcmp(value, KRB5) && strcmp(value, SPKM1) && + strcmp(value, SPKM2) && strcmp(value, SRP) && + strcmp(value, CHAP) && strcmp(value, NONE)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\", \"%s\", \"%s\", \"%s\", \"%s\"" + " or \"%s\".\n", param->name, KRB5, + SPKM1, SPKM2, SRP, CHAP, NONE); + return -1; + } + } + if (IS_TYPERANGE_DIGEST_PARAM(param)) { + if (strcmp(value, CRC32C) && strcmp(value, NONE)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\" or \"%s\".\n", param->name, + CRC32C, NONE); + return -1; + } + } + if (IS_TYPERANGE_SESSIONTYPE(param)) { + if (strcmp(value, DISCOVERY) && strcmp(value, NORMAL)) { + pr_err("Illegal value for \"%s\", must be" + " \"%s\" or \"%s\".\n", param->name, + DISCOVERY, NORMAL); + return -1; + } + } + + return 0; +} + +/* + * This function is used to pick a value range number, currently just + * returns the lesser of both right values. + */ +static char *iscsi_get_value_from_number_range( + struct iscsi_param *param, + char *value) +{ + char *end_ptr, *tilde_ptr1 = NULL, *tilde_ptr2 = NULL; + u32 acceptor_right_value, proposer_right_value; + + tilde_ptr1 = strchr(value, '~'); + if (!tilde_ptr1) + return NULL; + *tilde_ptr1++ = '\0'; + proposer_right_value = simple_strtoul(tilde_ptr1, &end_ptr, 0); + + tilde_ptr2 = strchr(param->value, '~'); + if (!tilde_ptr2) + return NULL; + *tilde_ptr2++ = '\0'; + acceptor_right_value = simple_strtoul(tilde_ptr2, &end_ptr, 0); + + return (acceptor_right_value >= proposer_right_value) ? + tilde_ptr1 : tilde_ptr2; +} + +static char *iscsi_check_valuelist_for_support( + struct iscsi_param *param, + char *value) +{ + char *tmp1 = NULL, *tmp2 = NULL; + char *acceptor_values = NULL, *proposer_values = NULL; + + acceptor_values = param->value; + proposer_values = value; + + do { + if (!proposer_values) + return NULL; + tmp1 = strchr(proposer_values, ','); + if (tmp1) + *tmp1 = '\0'; + acceptor_values = param->value; + do { + if (!acceptor_values) { + if (tmp1) + *tmp1 = ','; + return NULL; + } + tmp2 = strchr(acceptor_values, ','); + if (tmp2) + *tmp2 = '\0'; + if (!acceptor_values || !proposer_values) { + if (tmp1) + *tmp1 = ','; + if (tmp2) + *tmp2 = ','; + return NULL; + } + if (!strcmp(acceptor_values, proposer_values)) { + if (tmp2) + *tmp2 = ','; + goto out; + } + if (tmp2) + *tmp2++ = ','; + + acceptor_values = tmp2; + if (!acceptor_values) + break; + } while (acceptor_values); + if (tmp1) + *tmp1++ = ','; + proposer_values = tmp1; + } while (proposer_values); + +out: + return proposer_values; +} + +static int iscsi_check_acceptor_state(struct iscsi_param *param, char *value) +{ + u8 acceptor_boolean_value = 0, proposer_boolean_value = 0; + char *negoitated_value = NULL; + + if (IS_PSTATE_ACCEPTOR(param)) { + pr_err("Received key \"%s\" twice, protocol error.\n", + param->name); + return -1; + } + + if (IS_PSTATE_REJECT(param)) + return 0; + + if (IS_TYPE_BOOL_AND(param)) { + if (!strcmp(value, YES)) + proposer_boolean_value = 1; + if (!strcmp(param->value, YES)) + acceptor_boolean_value = 1; + if (acceptor_boolean_value && proposer_boolean_value) + do {} while (0); + else { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + if (!proposer_boolean_value) + SET_PSTATE_REPLY_OPTIONAL(param); + } + } else if (IS_TYPE_BOOL_OR(param)) { + if (!strcmp(value, YES)) + proposer_boolean_value = 1; + if (!strcmp(param->value, YES)) + acceptor_boolean_value = 1; + if (acceptor_boolean_value || proposer_boolean_value) { + if (iscsi_update_param_value(param, YES) < 0) + return -1; + if (proposer_boolean_value) + SET_PSTATE_REPLY_OPTIONAL(param); + } + } else if (IS_TYPE_NUMBER(param)) { + char *tmpptr, buf[10]; + u32 acceptor_value = simple_strtoul(param->value, &tmpptr, 0); + u32 proposer_value = simple_strtoul(value, &tmpptr, 0); + + memset(buf, 0, 10); + + if (!strcmp(param->name, MAXCONNECTIONS) || + !strcmp(param->name, MAXBURSTLENGTH) || + !strcmp(param->name, FIRSTBURSTLENGTH) || + !strcmp(param->name, MAXOUTSTANDINGR2T) || + !strcmp(param->name, DEFAULTTIME2RETAIN) || + !strcmp(param->name, ERRORRECOVERYLEVEL)) { + if (proposer_value > acceptor_value) { + sprintf(buf, "%u", acceptor_value); + if (iscsi_update_param_value(param, + &buf[0]) < 0) + return -1; + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + if (acceptor_value > proposer_value) { + sprintf(buf, "%u", acceptor_value); + if (iscsi_update_param_value(param, + &buf[0]) < 0) + return -1; + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + } else { + if (iscsi_update_param_value(param, value) < 0) + return -1; + } + + if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) + SET_PSTATE_REPLY_OPTIONAL(param); + } else if (IS_TYPE_NUMBER_RANGE(param)) { + negoitated_value = iscsi_get_value_from_number_range( + param, value); + if (!negoitated_value) + return -1; + if (iscsi_update_param_value(param, negoitated_value) < 0) + return -1; + } else if (IS_TYPE_VALUE_LIST(param)) { + negoitated_value = iscsi_check_valuelist_for_support( + param, value); + if (!negoitated_value) { + pr_err("Proposer's value list \"%s\" contains" + " no valid values from Acceptor's value list" + " \"%s\".\n", value, param->value); + return -1; + } + if (iscsi_update_param_value(param, negoitated_value) < 0) + return -1; + } else if (IS_PHASE_DECLARATIVE(param)) { + if (iscsi_update_param_value(param, value) < 0) + return -1; + SET_PSTATE_REPLY_OPTIONAL(param); + } + + return 0; +} + +static int iscsi_check_proposer_state(struct iscsi_param *param, char *value) +{ + if (IS_PSTATE_RESPONSE_GOT(param)) { + pr_err("Received key \"%s\" twice, protocol error.\n", + param->name); + return -1; + } + + if (IS_TYPE_NUMBER_RANGE(param)) { + u32 left_val = 0, right_val = 0, recieved_value = 0; + char *left_val_ptr = NULL, *right_val_ptr = NULL; + char *tilde_ptr = NULL, *tmp_ptr = NULL; + + if (!strcmp(value, IRRELEVANT) || !strcmp(value, REJECT)) { + if (iscsi_update_param_value(param, value) < 0) + return -1; + return 0; + } + + tilde_ptr = strchr(value, '~'); + if (tilde_ptr) { + pr_err("Illegal \"~\" in response for \"%s\".\n", + param->name); + return -1; + } + tilde_ptr = strchr(param->value, '~'); + if (!tilde_ptr) { + pr_err("Unable to locate numerical range" + " indicator \"~\" for \"%s\".\n", param->name); + return -1; + } + *tilde_ptr = '\0'; + + left_val_ptr = param->value; + right_val_ptr = param->value + strlen(left_val_ptr) + 1; + left_val = simple_strtoul(left_val_ptr, &tmp_ptr, 0); + right_val = simple_strtoul(right_val_ptr, &tmp_ptr, 0); + recieved_value = simple_strtoul(value, &tmp_ptr, 0); + + *tilde_ptr = '~'; + + if ((recieved_value < left_val) || + (recieved_value > right_val)) { + pr_err("Illegal response \"%s=%u\", value must" + " be between %u and %u.\n", param->name, + recieved_value, left_val, right_val); + return -1; + } + } else if (IS_TYPE_VALUE_LIST(param)) { + char *comma_ptr = NULL, *tmp_ptr = NULL; + + comma_ptr = strchr(value, ','); + if (comma_ptr) { + pr_err("Illegal \",\" in response for \"%s\".\n", + param->name); + return -1; + } + + tmp_ptr = iscsi_check_valuelist_for_support(param, value); + if (!tmp_ptr) + return -1; + } + + if (iscsi_update_param_value(param, value) < 0) + return -1; + + return 0; +} + +static int iscsi_check_value(struct iscsi_param *param, char *value) +{ + char *comma_ptr = NULL; + + if (!strcmp(value, REJECT)) { + if (!strcmp(param->name, IFMARKINT) || + !strcmp(param->name, OFMARKINT)) { + /* + * Reject is not fatal for [I,O]FMarkInt, and causes + * [I,O]FMarker to be reset to No. (See iSCSI v20 A.3.2) + */ + SET_PSTATE_REJECT(param); + return 0; + } + pr_err("Received %s=%s\n", param->name, value); + return -1; + } + if (!strcmp(value, IRRELEVANT)) { + pr_debug("Received %s=%s\n", param->name, value); + SET_PSTATE_IRRELEVANT(param); + return 0; + } + if (!strcmp(value, NOTUNDERSTOOD)) { + if (!IS_PSTATE_PROPOSER(param)) { + pr_err("Received illegal offer %s=%s\n", + param->name, value); + return -1; + } + +/* #warning FIXME: Add check for X-ExtensionKey here */ + pr_err("Standard iSCSI key \"%s\" cannot be answered" + " with \"%s\", protocol error.\n", param->name, value); + return -1; + } + + do { + comma_ptr = NULL; + comma_ptr = strchr(value, ','); + + if (comma_ptr && !IS_TYPE_VALUE_LIST(param)) { + pr_err("Detected value seperator \",\", but" + " key \"%s\" does not allow a value list," + " protocol error.\n", param->name); + return -1; + } + if (comma_ptr) + *comma_ptr = '\0'; + + if (strlen(value) > VALUE_MAXLEN) { + pr_err("Value for key \"%s\" exceeds %d," + " protocol error.\n", param->name, + VALUE_MAXLEN); + return -1; + } + + if (IS_TYPE_BOOL_AND(param) || IS_TYPE_BOOL_OR(param)) { + if (iscsi_check_boolean_value(param, value) < 0) + return -1; + } else if (IS_TYPE_NUMBER(param)) { + if (iscsi_check_numerical_value(param, value) < 0) + return -1; + } else if (IS_TYPE_NUMBER_RANGE(param)) { + if (iscsi_check_numerical_range_value(param, value) < 0) + return -1; + } else if (IS_TYPE_STRING(param) || IS_TYPE_VALUE_LIST(param)) { + if (iscsi_check_string_or_list_value(param, value) < 0) + return -1; + } else { + pr_err("Huh? 0x%02x\n", param->type); + return -1; + } + + if (comma_ptr) + *comma_ptr++ = ','; + + value = comma_ptr; + } while (value); + + return 0; +} + +static struct iscsi_param *__iscsi_check_key( + char *key, + int sender, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + + if (strlen(key) > KEY_MAXLEN) { + pr_err("Length of key name \"%s\" exceeds %d.\n", + key, KEY_MAXLEN); + return NULL; + } + + param = iscsi_find_param_from_key(key, param_list); + if (!param) + return NULL; + + if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "target" : "initiator"); + return NULL; + } + + if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "initiator" : "target"); + return NULL; + } + + return param; +} + +static struct iscsi_param *iscsi_check_key( + char *key, + int phase, + int sender, + struct iscsi_param_list *param_list) +{ + struct iscsi_param *param; + /* + * Key name length must not exceed 63 bytes. (See iSCSI v20 5.1) + */ + if (strlen(key) > KEY_MAXLEN) { + pr_err("Length of key name \"%s\" exceeds %d.\n", + key, KEY_MAXLEN); + return NULL; + } + + param = iscsi_find_param_from_key(key, param_list); + if (!param) + return NULL; + + if ((sender & SENDER_INITIATOR) && !IS_SENDER_INITIATOR(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "target" : "initiator"); + return NULL; + } + if ((sender & SENDER_TARGET) && !IS_SENDER_TARGET(param)) { + pr_err("Key \"%s\" may not be sent to %s," + " protocol error.\n", param->name, + (sender & SENDER_RECEIVER) ? "initiator" : "target"); + return NULL; + } + + if (IS_PSTATE_ACCEPTOR(param)) { + pr_err("Key \"%s\" received twice, protocol error.\n", + key); + return NULL; + } + + if (!phase) + return param; + + if (!(param->phase & phase)) { + pr_err("Key \"%s\" may not be negotiated during ", + param->name); + switch (phase) { + case PHASE_SECURITY: + pr_debug("Security phase.\n"); + break; + case PHASE_OPERATIONAL: + pr_debug("Operational phase.\n"); + default: + pr_debug("Unknown phase.\n"); + } + return NULL; + } + + return param; +} + +static int iscsi_enforce_integrity_rules( + u8 phase, + struct iscsi_param_list *param_list) +{ + char *tmpptr; + u8 DataSequenceInOrder = 0; + u8 ErrorRecoveryLevel = 0, SessionType = 0; + u8 IFMarker = 0, OFMarker = 0; + u8 IFMarkInt_Reject = 0, OFMarkInt_Reject = 0; + u32 FirstBurstLength = 0, MaxBurstLength = 0; + struct iscsi_param *param = NULL; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->phase & phase)) + continue; + if (!strcmp(param->name, SESSIONTYPE)) + if (!strcmp(param->value, NORMAL)) + SessionType = 1; + if (!strcmp(param->name, ERRORRECOVERYLEVEL)) + ErrorRecoveryLevel = simple_strtoul(param->value, + &tmpptr, 0); + if (!strcmp(param->name, DATASEQUENCEINORDER)) + if (!strcmp(param->value, YES)) + DataSequenceInOrder = 1; + if (!strcmp(param->name, MAXBURSTLENGTH)) + MaxBurstLength = simple_strtoul(param->value, + &tmpptr, 0); + if (!strcmp(param->name, IFMARKER)) + if (!strcmp(param->value, YES)) + IFMarker = 1; + if (!strcmp(param->name, OFMARKER)) + if (!strcmp(param->value, YES)) + OFMarker = 1; + if (!strcmp(param->name, IFMARKINT)) + if (!strcmp(param->value, REJECT)) + IFMarkInt_Reject = 1; + if (!strcmp(param->name, OFMARKINT)) + if (!strcmp(param->value, REJECT)) + OFMarkInt_Reject = 1; + } + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->phase & phase)) + continue; + if (!SessionType && (!IS_PSTATE_ACCEPTOR(param) && + (strcmp(param->name, IFMARKER) && + strcmp(param->name, OFMARKER) && + strcmp(param->name, IFMARKINT) && + strcmp(param->name, OFMARKINT)))) + continue; + if (!strcmp(param->name, MAXOUTSTANDINGR2T) && + DataSequenceInOrder && (ErrorRecoveryLevel > 0)) { + if (strcmp(param->value, "1")) { + if (iscsi_update_param_value(param, "1") < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, MAXCONNECTIONS) && !SessionType) { + if (strcmp(param->value, "1")) { + if (iscsi_update_param_value(param, "1") < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + FirstBurstLength = simple_strtoul(param->value, + &tmpptr, 0); + if (FirstBurstLength > MaxBurstLength) { + char tmpbuf[10]; + memset(tmpbuf, 0, 10); + sprintf(tmpbuf, "%u", MaxBurstLength); + if (iscsi_update_param_value(param, tmpbuf)) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + if (!strcmp(param->name, IFMARKER) && IFMarkInt_Reject) { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + IFMarker = 0; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, OFMARKER) && OFMarkInt_Reject) { + if (iscsi_update_param_value(param, NO) < 0) + return -1; + OFMarker = 0; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, IFMARKINT) && !IFMarker) { + if (!strcmp(param->value, REJECT)) + continue; + param->state &= ~PSTATE_NEGOTIATE; + if (iscsi_update_param_value(param, IRRELEVANT) < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + if (!strcmp(param->name, OFMARKINT) && !OFMarker) { + if (!strcmp(param->value, REJECT)) + continue; + param->state &= ~PSTATE_NEGOTIATE; + if (iscsi_update_param_value(param, IRRELEVANT) < 0) + return -1; + pr_debug("Reset \"%s\" to \"%s\".\n", + param->name, param->value); + } + } + + return 0; +} + +int iscsi_decode_text_input( + u8 phase, + u8 sender, + char *textbuf, + u32 length, + struct iscsi_param_list *param_list) +{ + char *tmpbuf, *start = NULL, *end = NULL; + + tmpbuf = kzalloc(length + 1, GFP_KERNEL); + if (!tmpbuf) { + pr_err("Unable to allocate memory for tmpbuf.\n"); + return -1; + } + + memcpy(tmpbuf, textbuf, length); + tmpbuf[length] = '\0'; + start = tmpbuf; + end = (start + length); + + while (start < end) { + char *key, *value; + struct iscsi_param *param; + + if (iscsi_extract_key_value(start, &key, &value) < 0) { + kfree(tmpbuf); + return -1; + } + + pr_debug("Got key: %s=%s\n", key, value); + + if (phase & PHASE_SECURITY) { + if (iscsi_check_for_auth_key(key) > 0) { + char *tmpptr = key + strlen(key); + *tmpptr = '='; + kfree(tmpbuf); + return 1; + } + } + + param = iscsi_check_key(key, phase, sender, param_list); + if (!param) { + if (iscsi_add_notunderstood_response(key, + value, param_list) < 0) { + kfree(tmpbuf); + return -1; + } + start += strlen(key) + strlen(value) + 2; + continue; + } + if (iscsi_check_value(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + + start += strlen(key) + strlen(value) + 2; + + if (IS_PSTATE_PROPOSER(param)) { + if (iscsi_check_proposer_state(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + SET_PSTATE_RESPONSE_GOT(param); + } else { + if (iscsi_check_acceptor_state(param, value) < 0) { + kfree(tmpbuf); + return -1; + } + SET_PSTATE_ACCEPTOR(param); + } + } + + kfree(tmpbuf); + return 0; +} + +int iscsi_encode_text_output( + u8 phase, + u8 sender, + char *textbuf, + u32 *length, + struct iscsi_param_list *param_list) +{ + char *output_buf = NULL; + struct iscsi_extra_response *er; + struct iscsi_param *param; + + output_buf = textbuf + *length; + + if (iscsi_enforce_integrity_rules(phase, param_list) < 0) + return -1; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!(param->sender & sender)) + continue; + if (IS_PSTATE_ACCEPTOR(param) && + !IS_PSTATE_RESPONSE_SENT(param) && + !IS_PSTATE_REPLY_OPTIONAL(param) && + (param->phase & phase)) { + *length += sprintf(output_buf, "%s=%s", + param->name, param->value); + *length += 1; + output_buf = textbuf + *length; + SET_PSTATE_RESPONSE_SENT(param); + pr_debug("Sending key: %s=%s\n", + param->name, param->value); + continue; + } + if (IS_PSTATE_NEGOTIATE(param) && + !IS_PSTATE_ACCEPTOR(param) && + !IS_PSTATE_PROPOSER(param) && + (param->phase & phase)) { + *length += sprintf(output_buf, "%s=%s", + param->name, param->value); + *length += 1; + output_buf = textbuf + *length; + SET_PSTATE_PROPOSER(param); + iscsi_check_proposer_for_optional_reply(param); + pr_debug("Sending key: %s=%s\n", + param->name, param->value); + } + } + + list_for_each_entry(er, ¶m_list->extra_response_list, er_list) { + *length += sprintf(output_buf, "%s=%s", er->key, er->value); + *length += 1; + output_buf = textbuf + *length; + pr_debug("Sending key: %s=%s\n", er->key, er->value); + } + iscsi_release_extra_responses(param_list); + + return 0; +} + +int iscsi_check_negotiated_keys(struct iscsi_param_list *param_list) +{ + int ret = 0; + struct iscsi_param *param; + + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (IS_PSTATE_NEGOTIATE(param) && + IS_PSTATE_PROPOSER(param) && + !IS_PSTATE_RESPONSE_GOT(param) && + !IS_PSTATE_REPLY_OPTIONAL(param) && + !IS_PHASE_DECLARATIVE(param)) { + pr_err("No response for proposed key \"%s\".\n", + param->name); + ret = -1; + } + } + + return ret; +} + +int iscsi_change_param_value( + char *keyvalue, + struct iscsi_param_list *param_list, + int check_key) +{ + char *key = NULL, *value = NULL; + struct iscsi_param *param; + int sender = 0; + + if (iscsi_extract_key_value(keyvalue, &key, &value) < 0) + return -1; + + if (!check_key) { + param = __iscsi_check_key(keyvalue, sender, param_list); + if (!param) + return -1; + } else { + param = iscsi_check_key(keyvalue, 0, sender, param_list); + if (!param) + return -1; + + param->set_param = 1; + if (iscsi_check_value(param, value) < 0) { + param->set_param = 0; + return -1; + } + param->set_param = 0; + } + + if (iscsi_update_param_value(param, value) < 0) + return -1; + + return 0; +} + +void iscsi_set_connection_parameters( + struct iscsi_conn_ops *ops, + struct iscsi_param_list *param_list) +{ + char *tmpptr; + struct iscsi_param *param; + + pr_debug("---------------------------------------------------" + "---------------\n"); + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param)) + continue; + if (!strcmp(param->name, AUTHMETHOD)) { + pr_debug("AuthMethod: %s\n", + param->value); + } else if (!strcmp(param->name, HEADERDIGEST)) { + ops->HeaderDigest = !strcmp(param->value, CRC32C); + pr_debug("HeaderDigest: %s\n", + param->value); + } else if (!strcmp(param->name, DATADIGEST)) { + ops->DataDigest = !strcmp(param->value, CRC32C); + pr_debug("DataDigest: %s\n", + param->value); + } else if (!strcmp(param->name, MAXRECVDATASEGMENTLENGTH)) { + ops->MaxRecvDataSegmentLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxRecvDataSegmentLength: %s\n", + param->value); + } else if (!strcmp(param->name, OFMARKER)) { + ops->OFMarker = !strcmp(param->value, YES); + pr_debug("OFMarker: %s\n", + param->value); + } else if (!strcmp(param->name, IFMARKER)) { + ops->IFMarker = !strcmp(param->value, YES); + pr_debug("IFMarker: %s\n", + param->value); + } else if (!strcmp(param->name, OFMARKINT)) { + ops->OFMarkInt = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("OFMarkInt: %s\n", + param->value); + } else if (!strcmp(param->name, IFMARKINT)) { + ops->IFMarkInt = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("IFMarkInt: %s\n", + param->value); + } + } + pr_debug("----------------------------------------------------" + "--------------\n"); +} + +void iscsi_set_session_parameters( + struct iscsi_sess_ops *ops, + struct iscsi_param_list *param_list, + int leading) +{ + char *tmpptr; + struct iscsi_param *param; + + pr_debug("----------------------------------------------------" + "--------------\n"); + list_for_each_entry(param, ¶m_list->param_list, p_list) { + if (!IS_PSTATE_ACCEPTOR(param) && !IS_PSTATE_PROPOSER(param)) + continue; + if (!strcmp(param->name, INITIATORNAME)) { + if (!param->value) + continue; + if (leading) + snprintf(ops->InitiatorName, + sizeof(ops->InitiatorName), + "%s", param->value); + pr_debug("InitiatorName: %s\n", + param->value); + } else if (!strcmp(param->name, INITIATORALIAS)) { + if (!param->value) + continue; + snprintf(ops->InitiatorAlias, + sizeof(ops->InitiatorAlias), + "%s", param->value); + pr_debug("InitiatorAlias: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETNAME)) { + if (!param->value) + continue; + if (leading) + snprintf(ops->TargetName, + sizeof(ops->TargetName), + "%s", param->value); + pr_debug("TargetName: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETALIAS)) { + if (!param->value) + continue; + snprintf(ops->TargetAlias, sizeof(ops->TargetAlias), + "%s", param->value); + pr_debug("TargetAlias: %s\n", + param->value); + } else if (!strcmp(param->name, TARGETPORTALGROUPTAG)) { + ops->TargetPortalGroupTag = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("TargetPortalGroupTag: %s\n", + param->value); + } else if (!strcmp(param->name, MAXCONNECTIONS)) { + ops->MaxConnections = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxConnections: %s\n", + param->value); + } else if (!strcmp(param->name, INITIALR2T)) { + ops->InitialR2T = !strcmp(param->value, YES); + pr_debug("InitialR2T: %s\n", + param->value); + } else if (!strcmp(param->name, IMMEDIATEDATA)) { + ops->ImmediateData = !strcmp(param->value, YES); + pr_debug("ImmediateData: %s\n", + param->value); + } else if (!strcmp(param->name, MAXBURSTLENGTH)) { + ops->MaxBurstLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxBurstLength: %s\n", + param->value); + } else if (!strcmp(param->name, FIRSTBURSTLENGTH)) { + ops->FirstBurstLength = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("FirstBurstLength: %s\n", + param->value); + } else if (!strcmp(param->name, DEFAULTTIME2WAIT)) { + ops->DefaultTime2Wait = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("DefaultTime2Wait: %s\n", + param->value); + } else if (!strcmp(param->name, DEFAULTTIME2RETAIN)) { + ops->DefaultTime2Retain = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("DefaultTime2Retain: %s\n", + param->value); + } else if (!strcmp(param->name, MAXOUTSTANDINGR2T)) { + ops->MaxOutstandingR2T = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("MaxOutstandingR2T: %s\n", + param->value); + } else if (!strcmp(param->name, DATAPDUINORDER)) { + ops->DataPDUInOrder = !strcmp(param->value, YES); + pr_debug("DataPDUInOrder: %s\n", + param->value); + } else if (!strcmp(param->name, DATASEQUENCEINORDER)) { + ops->DataSequenceInOrder = !strcmp(param->value, YES); + pr_debug("DataSequenceInOrder: %s\n", + param->value); + } else if (!strcmp(param->name, ERRORRECOVERYLEVEL)) { + ops->ErrorRecoveryLevel = + simple_strtoul(param->value, &tmpptr, 0); + pr_debug("ErrorRecoveryLevel: %s\n", + param->value); + } else if (!strcmp(param->name, SESSIONTYPE)) { + ops->SessionType = !strcmp(param->value, DISCOVERY); + pr_debug("SessionType: %s\n", + param->value); + } + } + pr_debug("----------------------------------------------------" + "--------------\n"); + +} diff --git a/drivers/target/iscsi/iscsi_target_parameters.h b/drivers/target/iscsi/iscsi_target_parameters.h new file mode 100644 index 000000000000..6a37fd6f1285 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_parameters.h @@ -0,0 +1,269 @@ +#ifndef ISCSI_PARAMETERS_H +#define ISCSI_PARAMETERS_H + +struct iscsi_extra_response { + char key[64]; + char value[32]; + struct list_head er_list; +} ____cacheline_aligned; + +struct iscsi_param { + char *name; + char *value; + u8 set_param; + u8 phase; + u8 scope; + u8 sender; + u8 type; + u8 use; + u16 type_range; + u32 state; + struct list_head p_list; +} ____cacheline_aligned; + +extern int iscsi_login_rx_data(struct iscsi_conn *, char *, int); +extern int iscsi_login_tx_data(struct iscsi_conn *, char *, char *, int); +extern void iscsi_dump_conn_ops(struct iscsi_conn_ops *); +extern void iscsi_dump_sess_ops(struct iscsi_sess_ops *); +extern void iscsi_print_params(struct iscsi_param_list *); +extern int iscsi_create_default_params(struct iscsi_param_list **); +extern int iscsi_set_keys_to_negotiate(int, struct iscsi_param_list *); +extern int iscsi_set_keys_irrelevant_for_discovery(struct iscsi_param_list *); +extern int iscsi_copy_param_list(struct iscsi_param_list **, + struct iscsi_param_list *, int); +extern int iscsi_change_param_value(char *, struct iscsi_param_list *, int); +extern void iscsi_release_param_list(struct iscsi_param_list *); +extern struct iscsi_param *iscsi_find_param_from_key(char *, struct iscsi_param_list *); +extern int iscsi_extract_key_value(char *, char **, char **); +extern int iscsi_update_param_value(struct iscsi_param *, char *); +extern int iscsi_decode_text_input(u8, u8, char *, u32, struct iscsi_param_list *); +extern int iscsi_encode_text_output(u8, u8, char *, u32 *, + struct iscsi_param_list *); +extern int iscsi_check_negotiated_keys(struct iscsi_param_list *); +extern void iscsi_set_connection_parameters(struct iscsi_conn_ops *, + struct iscsi_param_list *); +extern void iscsi_set_session_parameters(struct iscsi_sess_ops *, + struct iscsi_param_list *, int); + +#define YES "Yes" +#define NO "No" +#define ALL "All" +#define IRRELEVANT "Irrelevant" +#define NONE "None" +#define NOTUNDERSTOOD "NotUnderstood" +#define REJECT "Reject" + +/* + * The Parameter Names. + */ +#define AUTHMETHOD "AuthMethod" +#define HEADERDIGEST "HeaderDigest" +#define DATADIGEST "DataDigest" +#define MAXCONNECTIONS "MaxConnections" +#define SENDTARGETS "SendTargets" +#define TARGETNAME "TargetName" +#define INITIATORNAME "InitiatorName" +#define TARGETALIAS "TargetAlias" +#define INITIATORALIAS "InitiatorAlias" +#define TARGETADDRESS "TargetAddress" +#define TARGETPORTALGROUPTAG "TargetPortalGroupTag" +#define INITIALR2T "InitialR2T" +#define IMMEDIATEDATA "ImmediateData" +#define MAXRECVDATASEGMENTLENGTH "MaxRecvDataSegmentLength" +#define MAXBURSTLENGTH "MaxBurstLength" +#define FIRSTBURSTLENGTH "FirstBurstLength" +#define DEFAULTTIME2WAIT "DefaultTime2Wait" +#define DEFAULTTIME2RETAIN "DefaultTime2Retain" +#define MAXOUTSTANDINGR2T "MaxOutstandingR2T" +#define DATAPDUINORDER "DataPDUInOrder" +#define DATASEQUENCEINORDER "DataSequenceInOrder" +#define ERRORRECOVERYLEVEL "ErrorRecoveryLevel" +#define SESSIONTYPE "SessionType" +#define IFMARKER "IFMarker" +#define OFMARKER "OFMarker" +#define IFMARKINT "IFMarkInt" +#define OFMARKINT "OFMarkInt" +#define X_EXTENSIONKEY "X-com.sbei.version" +#define X_EXTENSIONKEY_CISCO_NEW "X-com.cisco.protocol" +#define X_EXTENSIONKEY_CISCO_OLD "X-com.cisco.iscsi.draft" + +/* + * For AuthMethod. + */ +#define KRB5 "KRB5" +#define SPKM1 "SPKM1" +#define SPKM2 "SPKM2" +#define SRP "SRP" +#define CHAP "CHAP" + +/* + * Initial values for Parameter Negotiation. + */ +#define INITIAL_AUTHMETHOD CHAP +#define INITIAL_HEADERDIGEST "CRC32C,None" +#define INITIAL_DATADIGEST "CRC32C,None" +#define INITIAL_MAXCONNECTIONS "1" +#define INITIAL_SENDTARGETS ALL +#define INITIAL_TARGETNAME "LIO.Target" +#define INITIAL_INITIATORNAME "LIO.Initiator" +#define INITIAL_TARGETALIAS "LIO Target" +#define INITIAL_INITIATORALIAS "LIO Initiator" +#define INITIAL_TARGETADDRESS "0.0.0.0:0000,0" +#define INITIAL_TARGETPORTALGROUPTAG "1" +#define INITIAL_INITIALR2T YES +#define INITIAL_IMMEDIATEDATA YES +#define INITIAL_MAXRECVDATASEGMENTLENGTH "8192" +#define INITIAL_MAXBURSTLENGTH "262144" +#define INITIAL_FIRSTBURSTLENGTH "65536" +#define INITIAL_DEFAULTTIME2WAIT "2" +#define INITIAL_DEFAULTTIME2RETAIN "20" +#define INITIAL_MAXOUTSTANDINGR2T "1" +#define INITIAL_DATAPDUINORDER YES +#define INITIAL_DATASEQUENCEINORDER YES +#define INITIAL_ERRORRECOVERYLEVEL "0" +#define INITIAL_SESSIONTYPE NORMAL +#define INITIAL_IFMARKER NO +#define INITIAL_OFMARKER NO +#define INITIAL_IFMARKINT "2048~65535" +#define INITIAL_OFMARKINT "2048~65535" + +/* + * For [Header,Data]Digests. + */ +#define CRC32C "CRC32C" + +/* + * For SessionType. + */ +#define DISCOVERY "Discovery" +#define NORMAL "Normal" + +/* + * struct iscsi_param->use + */ +#define USE_LEADING_ONLY 0x01 +#define USE_INITIAL_ONLY 0x02 +#define USE_ALL 0x04 + +#define IS_USE_LEADING_ONLY(p) ((p)->use & USE_LEADING_ONLY) +#define IS_USE_INITIAL_ONLY(p) ((p)->use & USE_INITIAL_ONLY) +#define IS_USE_ALL(p) ((p)->use & USE_ALL) + +#define SET_USE_INITIAL_ONLY(p) ((p)->use |= USE_INITIAL_ONLY) + +/* + * struct iscsi_param->sender + */ +#define SENDER_INITIATOR 0x01 +#define SENDER_TARGET 0x02 +#define SENDER_BOTH 0x03 +/* Used in iscsi_check_key() */ +#define SENDER_RECEIVER 0x04 + +#define IS_SENDER_INITIATOR(p) ((p)->sender & SENDER_INITIATOR) +#define IS_SENDER_TARGET(p) ((p)->sender & SENDER_TARGET) +#define IS_SENDER_BOTH(p) ((p)->sender & SENDER_BOTH) + +/* + * struct iscsi_param->scope + */ +#define SCOPE_CONNECTION_ONLY 0x01 +#define SCOPE_SESSION_WIDE 0x02 + +#define IS_SCOPE_CONNECTION_ONLY(p) ((p)->scope & SCOPE_CONNECTION_ONLY) +#define IS_SCOPE_SESSION_WIDE(p) ((p)->scope & SCOPE_SESSION_WIDE) + +/* + * struct iscsi_param->phase + */ +#define PHASE_SECURITY 0x01 +#define PHASE_OPERATIONAL 0x02 +#define PHASE_DECLARATIVE 0x04 +#define PHASE_FFP0 0x08 + +#define IS_PHASE_SECURITY(p) ((p)->phase & PHASE_SECURITY) +#define IS_PHASE_OPERATIONAL(p) ((p)->phase & PHASE_OPERATIONAL) +#define IS_PHASE_DECLARATIVE(p) ((p)->phase & PHASE_DECLARATIVE) +#define IS_PHASE_FFP0(p) ((p)->phase & PHASE_FFP0) + +/* + * struct iscsi_param->type + */ +#define TYPE_BOOL_AND 0x01 +#define TYPE_BOOL_OR 0x02 +#define TYPE_NUMBER 0x04 +#define TYPE_NUMBER_RANGE 0x08 +#define TYPE_STRING 0x10 +#define TYPE_VALUE_LIST 0x20 + +#define IS_TYPE_BOOL_AND(p) ((p)->type & TYPE_BOOL_AND) +#define IS_TYPE_BOOL_OR(p) ((p)->type & TYPE_BOOL_OR) +#define IS_TYPE_NUMBER(p) ((p)->type & TYPE_NUMBER) +#define IS_TYPE_NUMBER_RANGE(p) ((p)->type & TYPE_NUMBER_RANGE) +#define IS_TYPE_STRING(p) ((p)->type & TYPE_STRING) +#define IS_TYPE_VALUE_LIST(p) ((p)->type & TYPE_VALUE_LIST) + +/* + * struct iscsi_param->type_range + */ +#define TYPERANGE_BOOL_AND 0x0001 +#define TYPERANGE_BOOL_OR 0x0002 +#define TYPERANGE_0_TO_2 0x0004 +#define TYPERANGE_0_TO_3600 0x0008 +#define TYPERANGE_0_TO_32767 0x0010 +#define TYPERANGE_0_TO_65535 0x0020 +#define TYPERANGE_1_TO_65535 0x0040 +#define TYPERANGE_2_TO_3600 0x0080 +#define TYPERANGE_512_TO_16777215 0x0100 +#define TYPERANGE_AUTH 0x0200 +#define TYPERANGE_DIGEST 0x0400 +#define TYPERANGE_ISCSINAME 0x0800 +#define TYPERANGE_MARKINT 0x1000 +#define TYPERANGE_SESSIONTYPE 0x2000 +#define TYPERANGE_TARGETADDRESS 0x4000 +#define TYPERANGE_UTF8 0x8000 + +#define IS_TYPERANGE_0_TO_2(p) ((p)->type_range & TYPERANGE_0_TO_2) +#define IS_TYPERANGE_0_TO_3600(p) ((p)->type_range & TYPERANGE_0_TO_3600) +#define IS_TYPERANGE_0_TO_32767(p) ((p)->type_range & TYPERANGE_0_TO_32767) +#define IS_TYPERANGE_0_TO_65535(p) ((p)->type_range & TYPERANGE_0_TO_65535) +#define IS_TYPERANGE_1_TO_65535(p) ((p)->type_range & TYPERANGE_1_TO_65535) +#define IS_TYPERANGE_2_TO_3600(p) ((p)->type_range & TYPERANGE_2_TO_3600) +#define IS_TYPERANGE_512_TO_16777215(p) ((p)->type_range & \ + TYPERANGE_512_TO_16777215) +#define IS_TYPERANGE_AUTH_PARAM(p) ((p)->type_range & TYPERANGE_AUTH) +#define IS_TYPERANGE_DIGEST_PARAM(p) ((p)->type_range & TYPERANGE_DIGEST) +#define IS_TYPERANGE_SESSIONTYPE(p) ((p)->type_range & \ + TYPERANGE_SESSIONTYPE) + +/* + * struct iscsi_param->state + */ +#define PSTATE_ACCEPTOR 0x01 +#define PSTATE_NEGOTIATE 0x02 +#define PSTATE_PROPOSER 0x04 +#define PSTATE_IRRELEVANT 0x08 +#define PSTATE_REJECT 0x10 +#define PSTATE_REPLY_OPTIONAL 0x20 +#define PSTATE_RESPONSE_GOT 0x40 +#define PSTATE_RESPONSE_SENT 0x80 + +#define IS_PSTATE_ACCEPTOR(p) ((p)->state & PSTATE_ACCEPTOR) +#define IS_PSTATE_NEGOTIATE(p) ((p)->state & PSTATE_NEGOTIATE) +#define IS_PSTATE_PROPOSER(p) ((p)->state & PSTATE_PROPOSER) +#define IS_PSTATE_IRRELEVANT(p) ((p)->state & PSTATE_IRRELEVANT) +#define IS_PSTATE_REJECT(p) ((p)->state & PSTATE_REJECT) +#define IS_PSTATE_REPLY_OPTIONAL(p) ((p)->state & PSTATE_REPLY_OPTIONAL) +#define IS_PSTATE_RESPONSE_GOT(p) ((p)->state & PSTATE_RESPONSE_GOT) +#define IS_PSTATE_RESPONSE_SENT(p) ((p)->state & PSTATE_RESPONSE_SENT) + +#define SET_PSTATE_ACCEPTOR(p) ((p)->state |= PSTATE_ACCEPTOR) +#define SET_PSTATE_NEGOTIATE(p) ((p)->state |= PSTATE_NEGOTIATE) +#define SET_PSTATE_PROPOSER(p) ((p)->state |= PSTATE_PROPOSER) +#define SET_PSTATE_IRRELEVANT(p) ((p)->state |= PSTATE_IRRELEVANT) +#define SET_PSTATE_REJECT(p) ((p)->state |= PSTATE_REJECT) +#define SET_PSTATE_REPLY_OPTIONAL(p) ((p)->state |= PSTATE_REPLY_OPTIONAL) +#define SET_PSTATE_RESPONSE_GOT(p) ((p)->state |= PSTATE_RESPONSE_GOT) +#define SET_PSTATE_RESPONSE_SENT(p) ((p)->state |= PSTATE_RESPONSE_SENT) + +#endif /* ISCSI_PARAMETERS_H */ diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.c b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c new file mode 100644 index 000000000000..fc694082bfc0 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.c @@ -0,0 +1,664 @@ +/******************************************************************************* + * This file contains main functions related to iSCSI DataSequenceInOrder=No + * and DataPDUInOrder=No. + * + \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/slab.h> +#include <linux/random.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_util.h" +#include "iscsi_target_seq_pdu_list.h" + +#define OFFLOAD_BUF_SIZE 32768 + +void iscsit_dump_seq_list(struct iscsi_cmd *cmd) +{ + int i; + struct iscsi_seq *seq; + + pr_debug("Dumping Sequence List for ITT: 0x%08x:\n", + cmd->init_task_tag); + + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + pr_debug("i: %d, pdu_start: %d, pdu_count: %d," + " offset: %d, xfer_len: %d, seq_send_order: %d," + " seq_no: %d\n", i, seq->pdu_start, seq->pdu_count, + seq->offset, seq->xfer_len, seq->seq_send_order, + seq->seq_no); + } +} + +void iscsit_dump_pdu_list(struct iscsi_cmd *cmd) +{ + int i; + struct iscsi_pdu *pdu; + + pr_debug("Dumping PDU List for ITT: 0x%08x:\n", + cmd->init_task_tag); + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + pr_debug("i: %d, offset: %d, length: %d," + " pdu_send_order: %d, seq_no: %d\n", i, pdu->offset, + pdu->length, pdu->pdu_send_order, pdu->seq_no); + } +} + +static void iscsit_ordered_seq_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + u32 i, seq_count = 0; + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + cmd->seq_list[i].seq_send_order = seq_count++; + } +} + +static void iscsit_ordered_pdu_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + u32 i, pdu_send_order = 0, seq_no = 0; + + for (i = 0; i < cmd->pdu_count; i++) { +redo: + if (cmd->pdu_list[i].seq_no == seq_no) { + cmd->pdu_list[i].pdu_send_order = pdu_send_order++; + continue; + } + seq_no++; + pdu_send_order = 0; + goto redo; + } +} + +/* + * Generate count random values into array. + * Use 0x80000000 to mark generates valued in array[]. + */ +static void iscsit_create_random_array(u32 *array, u32 count) +{ + int i, j, k; + + if (count == 1) { + array[0] = 0; + return; + } + + for (i = 0; i < count; i++) { +redo: + get_random_bytes(&j, sizeof(u32)); + j = (1 + (int) (9999 + 1) - j) % count; + for (k = 0; k < i + 1; k++) { + j |= 0x80000000; + if ((array[k] & 0x80000000) && (array[k] == j)) + goto redo; + } + array[i] = j; + } + + for (i = 0; i < count; i++) + array[i] &= ~0x80000000; +} + +static int iscsit_randomize_pdu_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + int i = 0; + u32 *array, pdu_count, seq_count = 0, seq_no = 0, seq_offset = 0; + + for (pdu_count = 0; pdu_count < cmd->pdu_count; pdu_count++) { +redo: + if (cmd->pdu_list[pdu_count].seq_no == seq_no) { + seq_count++; + continue; + } + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory" + " for random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < seq_count; i++) + cmd->pdu_list[seq_offset+i].pdu_send_order = array[i]; + + kfree(array); + + seq_offset += seq_count; + seq_count = 0; + seq_no++; + goto redo; + } + + if (seq_count) { + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory for" + " random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < seq_count; i++) + cmd->pdu_list[seq_offset+i].pdu_send_order = array[i]; + + kfree(array); + } + + return 0; +} + +static int iscsit_randomize_seq_lists( + struct iscsi_cmd *cmd, + u8 type) +{ + int i, j = 0; + u32 *array, seq_count = cmd->seq_count; + + if ((type == PDULIST_IMMEDIATE) || (type == PDULIST_UNSOLICITED)) + seq_count--; + else if (type == PDULIST_IMMEDIATE_AND_UNSOLICITED) + seq_count -= 2; + + if (!seq_count) + return 0; + + array = kzalloc(seq_count * sizeof(u32), GFP_KERNEL); + if (!array) { + pr_err("Unable to allocate memory for random array.\n"); + return -1; + } + iscsit_create_random_array(array, seq_count); + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + cmd->seq_list[i].seq_send_order = array[j++]; + } + + kfree(array); + return 0; +} + +static void iscsit_determine_counts_for_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl, + u32 *seq_count, + u32 *pdu_count) +{ + int check_immediate = 0; + u32 burstlength = 0, offset = 0; + u32 unsolicited_data_length = 0; + struct iscsi_conn *conn = cmd->conn; + + if ((bl->type == PDULIST_IMMEDIATE) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + check_immediate = 1; + + if ((bl->type == PDULIST_UNSOLICITED) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + unsolicited_data_length = (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length; + + while (offset < cmd->data_length) { + *pdu_count += 1; + + if (check_immediate) { + check_immediate = 0; + offset += bl->immediate_data_length; + *seq_count += 1; + if (unsolicited_data_length) + unsolicited_data_length -= + bl->immediate_data_length; + continue; + } + if (unsolicited_data_length > 0) { + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) + >= cmd->data_length) { + unsolicited_data_length -= + (cmd->data_length - offset); + offset += (cmd->data_length - offset); + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) + >= conn->sess->sess_ops->FirstBurstLength) { + unsolicited_data_length -= + (conn->sess->sess_ops->FirstBurstLength - + offset); + offset += (conn->sess->sess_ops->FirstBurstLength - + offset); + burstlength = 0; + *seq_count += 1; + continue; + } + + offset += conn->conn_ops->MaxRecvDataSegmentLength; + unsolicited_data_length -= + conn->conn_ops->MaxRecvDataSegmentLength; + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + offset += (cmd->data_length - offset); + continue; + } + if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->MaxBurstLength) { + offset += (conn->sess->sess_ops->MaxBurstLength - + burstlength); + burstlength = 0; + *seq_count += 1; + continue; + } + + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + } +} + + +/* + * Builds PDU and/or Sequence list, called while DataSequenceInOrder=No + * and DataPDUInOrder=No. + */ +static int iscsit_build_pdu_and_seq_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl) +{ + int check_immediate = 0, datapduinorder, datasequenceinorder; + u32 burstlength = 0, offset = 0, i = 0; + u32 pdu_count = 0, seq_no = 0, unsolicited_data_length = 0; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = cmd->pdu_list; + struct iscsi_seq *seq = cmd->seq_list; + + datapduinorder = conn->sess->sess_ops->DataPDUInOrder; + datasequenceinorder = conn->sess->sess_ops->DataSequenceInOrder; + + if ((bl->type == PDULIST_IMMEDIATE) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + check_immediate = 1; + + if ((bl->type == PDULIST_UNSOLICITED) || + (bl->type == PDULIST_IMMEDIATE_AND_UNSOLICITED)) + unsolicited_data_length = (cmd->data_length > + conn->sess->sess_ops->FirstBurstLength) ? + conn->sess->sess_ops->FirstBurstLength : cmd->data_length; + + while (offset < cmd->data_length) { + pdu_count++; + if (!datapduinorder) { + pdu[i].offset = offset; + pdu[i].seq_no = seq_no; + } + if (!datasequenceinorder && (pdu_count == 1)) { + seq[seq_no].pdu_start = i; + seq[seq_no].seq_no = seq_no; + seq[seq_no].offset = offset; + seq[seq_no].orig_offset = offset; + } + + if (check_immediate) { + check_immediate = 0; + if (!datapduinorder) { + pdu[i].type = PDUTYPE_IMMEDIATE; + pdu[i++].length = bl->immediate_data_length; + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_IMMEDIATE; + seq[seq_no].pdu_count = 1; + seq[seq_no].xfer_len = + bl->immediate_data_length; + } + offset += bl->immediate_data_length; + pdu_count = 0; + seq_no++; + if (unsolicited_data_length) + unsolicited_data_length -= + bl->immediate_data_length; + continue; + } + if (unsolicited_data_length > 0) { + if ((offset + + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i].length = + (cmd->data_length - offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_UNSOLICITED; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (cmd->data_length - offset)); + } + unsolicited_data_length -= + (cmd->data_length - offset); + offset += (cmd->data_length - offset); + continue; + } + if ((offset + + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->FirstBurstLength) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i++].length = + (conn->sess->sess_ops->FirstBurstLength - + offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_UNSOLICITED; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (conn->sess->sess_ops->FirstBurstLength - + offset)); + } + unsolicited_data_length -= + (conn->sess->sess_ops->FirstBurstLength - + offset); + offset += (conn->sess->sess_ops->FirstBurstLength - + offset); + burstlength = 0; + pdu_count = 0; + seq_no++; + continue; + } + + if (!datapduinorder) { + pdu[i].type = PDUTYPE_UNSOLICITED; + pdu[i++].length = + conn->conn_ops->MaxRecvDataSegmentLength; + } + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + unsolicited_data_length -= + conn->conn_ops->MaxRecvDataSegmentLength; + continue; + } + if ((offset + conn->conn_ops->MaxRecvDataSegmentLength) >= + cmd->data_length) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i].length = (cmd->data_length - offset); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_NORMAL; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (cmd->data_length - offset)); + } + offset += (cmd->data_length - offset); + continue; + } + if ((burstlength + conn->conn_ops->MaxRecvDataSegmentLength) >= + conn->sess->sess_ops->MaxBurstLength) { + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i++].length = + (conn->sess->sess_ops->MaxBurstLength - + burstlength); + } + if (!datasequenceinorder) { + seq[seq_no].type = SEQTYPE_NORMAL; + seq[seq_no].pdu_count = pdu_count; + seq[seq_no].xfer_len = (burstlength + + (conn->sess->sess_ops->MaxBurstLength - + burstlength)); + } + offset += (conn->sess->sess_ops->MaxBurstLength - + burstlength); + burstlength = 0; + pdu_count = 0; + seq_no++; + continue; + } + + if (!datapduinorder) { + pdu[i].type = PDUTYPE_NORMAL; + pdu[i++].length = + conn->conn_ops->MaxRecvDataSegmentLength; + } + burstlength += conn->conn_ops->MaxRecvDataSegmentLength; + offset += conn->conn_ops->MaxRecvDataSegmentLength; + } + + if (!datasequenceinorder) { + if (bl->data_direction & ISCSI_PDU_WRITE) { + if (bl->randomize & RANDOM_R2T_OFFSETS) { + if (iscsit_randomize_seq_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_seq_lists(cmd, bl->type); + } else if (bl->data_direction & ISCSI_PDU_READ) { + if (bl->randomize & RANDOM_DATAIN_SEQ_OFFSETS) { + if (iscsit_randomize_seq_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_seq_lists(cmd, bl->type); + } +#if 0 + iscsit_dump_seq_list(cmd); +#endif + } + if (!datapduinorder) { + if (bl->data_direction & ISCSI_PDU_WRITE) { + if (bl->randomize & RANDOM_DATAOUT_PDU_OFFSETS) { + if (iscsit_randomize_pdu_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_pdu_lists(cmd, bl->type); + } else if (bl->data_direction & ISCSI_PDU_READ) { + if (bl->randomize & RANDOM_DATAIN_PDU_OFFSETS) { + if (iscsit_randomize_pdu_lists(cmd, bl->type) + < 0) + return -1; + } else + iscsit_ordered_pdu_lists(cmd, bl->type); + } +#if 0 + iscsit_dump_pdu_list(cmd); +#endif + } + + return 0; +} + +/* + * Only called while DataSequenceInOrder=No or DataPDUInOrder=No. + */ +int iscsit_do_build_list( + struct iscsi_cmd *cmd, + struct iscsi_build_list *bl) +{ + u32 pdu_count = 0, seq_count = 1; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + struct iscsi_seq *seq = NULL; + + iscsit_determine_counts_for_list(cmd, bl, &seq_count, &pdu_count); + + if (!conn->sess->sess_ops->DataSequenceInOrder) { + seq = kzalloc(seq_count * sizeof(struct iscsi_seq), GFP_ATOMIC); + if (!seq) { + pr_err("Unable to allocate struct iscsi_seq list\n"); + return -1; + } + cmd->seq_list = seq; + cmd->seq_count = seq_count; + } + + if (!conn->sess->sess_ops->DataPDUInOrder) { + pdu = kzalloc(pdu_count * sizeof(struct iscsi_pdu), GFP_ATOMIC); + if (!pdu) { + pr_err("Unable to allocate struct iscsi_pdu list.\n"); + kfree(seq); + return -1; + } + cmd->pdu_list = pdu; + cmd->pdu_count = pdu_count; + } + + return iscsit_build_pdu_and_seq_list(cmd, bl); +} + +struct iscsi_pdu *iscsit_get_pdu_holder( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + u32 i; + struct iscsi_pdu *pdu = NULL; + + if (!cmd->pdu_list) { + pr_err("struct iscsi_cmd->pdu_list is NULL!\n"); + return NULL; + } + + pdu = &cmd->pdu_list[0]; + + for (i = 0; i < cmd->pdu_count; i++) + if ((pdu[i].offset == offset) && (pdu[i].length == length)) + return &pdu[i]; + + pr_err("Unable to locate PDU holder for ITT: 0x%08x, Offset:" + " %u, Length: %u\n", cmd->init_task_tag, offset, length); + return NULL; +} + +struct iscsi_pdu *iscsit_get_pdu_holder_for_seq( + struct iscsi_cmd *cmd, + struct iscsi_seq *seq) +{ + u32 i; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_pdu *pdu = NULL; + + if (!cmd->pdu_list) { + pr_err("struct iscsi_cmd->pdu_list is NULL!\n"); + return NULL; + } + + if (conn->sess->sess_ops->DataSequenceInOrder) { +redo: + pdu = &cmd->pdu_list[cmd->pdu_start]; + + for (i = 0; pdu[i].seq_no != cmd->seq_no; i++) { +#if 0 + pr_debug("pdu[i].seq_no: %d, pdu[i].pdu" + "_send_order: %d, pdu[i].offset: %d," + " pdu[i].length: %d\n", pdu[i].seq_no, + pdu[i].pdu_send_order, pdu[i].offset, + pdu[i].length); +#endif + if (pdu[i].pdu_send_order == cmd->pdu_send_order) { + cmd->pdu_send_order++; + return &pdu[i]; + } + } + + cmd->pdu_start += cmd->pdu_send_order; + cmd->pdu_send_order = 0; + cmd->seq_no++; + + if (cmd->pdu_start < cmd->pdu_count) + goto redo; + + pr_err("Command ITT: 0x%08x unable to locate" + " struct iscsi_pdu for cmd->pdu_send_order: %u.\n", + cmd->init_task_tag, cmd->pdu_send_order); + return NULL; + } else { + if (!seq) { + pr_err("struct iscsi_seq is NULL!\n"); + return NULL; + } +#if 0 + pr_debug("seq->pdu_start: %d, seq->pdu_count: %d," + " seq->seq_no: %d\n", seq->pdu_start, seq->pdu_count, + seq->seq_no); +#endif + pdu = &cmd->pdu_list[seq->pdu_start]; + + if (seq->pdu_send_order == seq->pdu_count) { + pr_err("Command ITT: 0x%08x seq->pdu_send" + "_order: %u equals seq->pdu_count: %u\n", + cmd->init_task_tag, seq->pdu_send_order, + seq->pdu_count); + return NULL; + } + + for (i = 0; i < seq->pdu_count; i++) { + if (pdu[i].pdu_send_order == seq->pdu_send_order) { + seq->pdu_send_order++; + return &pdu[i]; + } + } + + pr_err("Command ITT: 0x%08x unable to locate iscsi" + "_pdu_t for seq->pdu_send_order: %u.\n", + cmd->init_task_tag, seq->pdu_send_order); + return NULL; + } + + return NULL; +} + +struct iscsi_seq *iscsit_get_seq_holder( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + u32 i; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return NULL; + } + + for (i = 0; i < cmd->seq_count; i++) { +#if 0 + pr_debug("seq_list[i].orig_offset: %d, seq_list[i]." + "xfer_len: %d, seq_list[i].seq_no %u\n", + cmd->seq_list[i].orig_offset, cmd->seq_list[i].xfer_len, + cmd->seq_list[i].seq_no); +#endif + if ((cmd->seq_list[i].orig_offset + + cmd->seq_list[i].xfer_len) >= + (offset + length)) + return &cmd->seq_list[i]; + } + + pr_err("Unable to locate Sequence holder for ITT: 0x%08x," + " Offset: %u, Length: %u\n", cmd->init_task_tag, offset, + length); + return NULL; +} diff --git a/drivers/target/iscsi/iscsi_target_seq_pdu_list.h b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h new file mode 100644 index 000000000000..0d52a10e3069 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_seq_pdu_list.h @@ -0,0 +1,86 @@ +#ifndef ISCSI_SEQ_AND_PDU_LIST_H +#define ISCSI_SEQ_AND_PDU_LIST_H + +/* struct iscsi_pdu->status */ +#define DATAOUT_PDU_SENT 1 + +/* struct iscsi_seq->type */ +#define SEQTYPE_IMMEDIATE 1 +#define SEQTYPE_UNSOLICITED 2 +#define SEQTYPE_NORMAL 3 + +/* struct iscsi_seq->status */ +#define DATAOUT_SEQUENCE_GOT_R2T 1 +#define DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY 2 +#define DATAOUT_SEQUENCE_COMPLETE 3 + +/* iscsi_determine_counts_for_list() type */ +#define PDULIST_NORMAL 1 +#define PDULIST_IMMEDIATE 2 +#define PDULIST_UNSOLICITED 3 +#define PDULIST_IMMEDIATE_AND_UNSOLICITED 4 + +/* struct iscsi_pdu->type */ +#define PDUTYPE_IMMEDIATE 1 +#define PDUTYPE_UNSOLICITED 2 +#define PDUTYPE_NORMAL 3 + +/* struct iscsi_pdu->status */ +#define ISCSI_PDU_NOT_RECEIVED 0 +#define ISCSI_PDU_RECEIVED_OK 1 +#define ISCSI_PDU_CRC_FAILED 2 +#define ISCSI_PDU_TIMED_OUT 3 + +/* struct iscsi_build_list->randomize */ +#define RANDOM_DATAIN_PDU_OFFSETS 0x01 +#define RANDOM_DATAIN_SEQ_OFFSETS 0x02 +#define RANDOM_DATAOUT_PDU_OFFSETS 0x04 +#define RANDOM_R2T_OFFSETS 0x08 + +/* struct iscsi_build_list->data_direction */ +#define ISCSI_PDU_READ 0x01 +#define ISCSI_PDU_WRITE 0x02 + +struct iscsi_build_list { + int data_direction; + int randomize; + int type; + int immediate_data_length; +}; + +struct iscsi_pdu { + int status; + int type; + u8 flags; + u32 data_sn; + u32 length; + u32 offset; + u32 pdu_send_order; + u32 seq_no; +} ____cacheline_aligned; + +struct iscsi_seq { + int sent; + int status; + int type; + u32 data_sn; + u32 first_datasn; + u32 last_datasn; + u32 next_burst_len; + u32 pdu_start; + u32 pdu_count; + u32 offset; + u32 orig_offset; + u32 pdu_send_order; + u32 r2t_sn; + u32 seq_send_order; + u32 seq_no; + u32 xfer_len; +} ____cacheline_aligned; + +extern int iscsit_do_build_list(struct iscsi_cmd *, struct iscsi_build_list *); +extern struct iscsi_pdu *iscsit_get_pdu_holder(struct iscsi_cmd *, u32, u32); +extern struct iscsi_pdu *iscsit_get_pdu_holder_for_seq(struct iscsi_cmd *, struct iscsi_seq *); +extern struct iscsi_seq *iscsit_get_seq_holder(struct iscsi_cmd *, u32, u32); + +#endif /* ISCSI_SEQ_AND_PDU_LIST_H */ diff --git a/drivers/target/iscsi/iscsi_target_stat.c b/drivers/target/iscsi/iscsi_target_stat.c new file mode 100644 index 000000000000..bbdbe9301b27 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_stat.c @@ -0,0 +1,950 @@ +/******************************************************************************* + * Modern ConfigFS group context specific iSCSI statistics based on original + * iscsi_target_mib.c code + * + * Copyright (c) 2011 Rising Tide Systems + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/configfs.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/configfs_macros.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_device.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target_stat.h" + +#ifndef INITIAL_JIFFIES +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) +#endif + +/* Instance Attributes Table */ +#define ISCSI_INST_NUM_NODES 1 +#define ISCSI_INST_DESCR "Storage Engine Target" +#define ISCSI_INST_LAST_FAILURE_TYPE 0 +#define ISCSI_DISCONTINUITY_TIME 0 + +#define ISCSI_NODE_INDEX 1 + +#define ISPRINT(a) ((a >= ' ') && (a <= '~')) + +/**************************************************************************** + * iSCSI MIB Tables + ****************************************************************************/ +/* + * Instance Attributes Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_instance, iscsi_wwn_stat_grps); +#define ISCSI_STAT_INSTANCE_ATTR(_name, _mode) \ +static struct iscsi_stat_instance_attribute \ + iscsi_stat_instance_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_instance_show_attr_##_name, \ + iscsi_stat_instance_store_attr_##_name); + +#define ISCSI_STAT_INSTANCE_ATTR_RO(_name) \ +static struct iscsi_stat_instance_attribute \ + iscsi_stat_instance_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_instance_show_attr_##_name); + +static ssize_t iscsi_stat_instance_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_INSTANCE_ATTR_RO(inst); + +static ssize_t iscsi_stat_instance_show_attr_min_ver( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(min_ver); + +static ssize_t iscsi_stat_instance_show_attr_max_ver( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DRAFT20_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(max_ver); + +static ssize_t iscsi_stat_instance_show_attr_portals( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_num_tpg_nps); +} +ISCSI_STAT_INSTANCE_ATTR_RO(portals); + +static ssize_t iscsi_stat_instance_show_attr_nodes( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_INST_NUM_NODES); +} +ISCSI_STAT_INSTANCE_ATTR_RO(nodes); + +static ssize_t iscsi_stat_instance_show_attr_sessions( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_nsessions); +} +ISCSI_STAT_INSTANCE_ATTR_RO(sessions); + +static ssize_t iscsi_stat_instance_show_attr_fail_sess( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + u32 sess_err_count; + + spin_lock_bh(&sess_err->lock); + sess_err_count = (sess_err->digest_errors + + sess_err->cxn_timeout_errors + + sess_err->pdu_format_errors); + spin_unlock_bh(&sess_err->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err_count); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_sess); + +static ssize_t iscsi_stat_instance_show_attr_fail_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", + sess_err->last_sess_failure_type); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_type); + +static ssize_t iscsi_stat_instance_show_attr_fail_rem_name( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%s\n", + sess_err->last_sess_fail_rem_name[0] ? + sess_err->last_sess_fail_rem_name : NONE); +} +ISCSI_STAT_INSTANCE_ATTR_RO(fail_rem_name); + +static ssize_t iscsi_stat_instance_show_attr_disc_time( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_DISCONTINUITY_TIME); +} +ISCSI_STAT_INSTANCE_ATTR_RO(disc_time); + +static ssize_t iscsi_stat_instance_show_attr_description( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", ISCSI_INST_DESCR); +} +ISCSI_STAT_INSTANCE_ATTR_RO(description); + +static ssize_t iscsi_stat_instance_show_attr_vendor( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "RisingTide Systems iSCSI-Target\n"); +} +ISCSI_STAT_INSTANCE_ATTR_RO(vendor); + +static ssize_t iscsi_stat_instance_show_attr_version( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%s\n", ISCSIT_VERSION); +} +ISCSI_STAT_INSTANCE_ATTR_RO(version); + +CONFIGFS_EATTR_OPS(iscsi_stat_instance, iscsi_wwn_stat_grps, + iscsi_instance_group); + +static struct configfs_attribute *iscsi_stat_instance_attrs[] = { + &iscsi_stat_instance_inst.attr, + &iscsi_stat_instance_min_ver.attr, + &iscsi_stat_instance_max_ver.attr, + &iscsi_stat_instance_portals.attr, + &iscsi_stat_instance_nodes.attr, + &iscsi_stat_instance_sessions.attr, + &iscsi_stat_instance_fail_sess.attr, + &iscsi_stat_instance_fail_type.attr, + &iscsi_stat_instance_fail_rem_name.attr, + &iscsi_stat_instance_disc_time.attr, + &iscsi_stat_instance_description.attr, + &iscsi_stat_instance_vendor.attr, + &iscsi_stat_instance_version.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_instance_item_ops = { + .show_attribute = iscsi_stat_instance_attr_show, + .store_attribute = iscsi_stat_instance_attr_store, +}; + +struct config_item_type iscsi_stat_instance_cit = { + .ct_item_ops = &iscsi_stat_instance_item_ops, + .ct_attrs = iscsi_stat_instance_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Instance Session Failure Stats Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_sess_err, iscsi_wwn_stat_grps); +#define ISCSI_STAT_SESS_ERR_ATTR(_name, _mode) \ +static struct iscsi_stat_sess_err_attribute \ + iscsi_stat_sess_err_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_sess_err_show_attr_##_name, \ + iscsi_stat_sess_err_store_attr_##_name); + +#define ISCSI_STAT_SESS_ERR_ATTR_RO(_name) \ +static struct iscsi_stat_sess_err_attribute \ + iscsi_stat_sess_err_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_sess_err_show_attr_##_name); + +static ssize_t iscsi_stat_sess_err_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(inst); + +static ssize_t iscsi_stat_sess_err_show_attr_digest_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->digest_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(digest_errors); + +static ssize_t iscsi_stat_sess_err_show_attr_cxn_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->cxn_timeout_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(cxn_errors); + +static ssize_t iscsi_stat_sess_err_show_attr_format_errors( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_sess_err_stats *sess_err = &tiqn->sess_err_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", sess_err->pdu_format_errors); +} +ISCSI_STAT_SESS_ERR_ATTR_RO(format_errors); + +CONFIGFS_EATTR_OPS(iscsi_stat_sess_err, iscsi_wwn_stat_grps, + iscsi_sess_err_group); + +static struct configfs_attribute *iscsi_stat_sess_err_attrs[] = { + &iscsi_stat_sess_err_inst.attr, + &iscsi_stat_sess_err_digest_errors.attr, + &iscsi_stat_sess_err_cxn_errors.attr, + &iscsi_stat_sess_err_format_errors.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_sess_err_item_ops = { + .show_attribute = iscsi_stat_sess_err_attr_show, + .store_attribute = iscsi_stat_sess_err_attr_store, +}; + +struct config_item_type iscsi_stat_sess_err_cit = { + .ct_item_ops = &iscsi_stat_sess_err_item_ops, + .ct_attrs = iscsi_stat_sess_err_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Attributes Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps); +#define ISCSI_STAT_TGT_ATTR(_name, _mode) \ +static struct iscsi_stat_tgt_attr_attribute \ + iscsi_stat_tgt_attr_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_tgt-attr_show_attr_##_name, \ + iscsi_stat_tgt_attr_store_attr_##_name); + +#define ISCSI_STAT_TGT_ATTR_RO(_name) \ +static struct iscsi_stat_tgt_attr_attribute \ + iscsi_stat_tgt_attr_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_tgt_attr_show_attr_##_name); + +static ssize_t iscsi_stat_tgt_attr_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_TGT_ATTR_RO(inst); + +static ssize_t iscsi_stat_tgt_attr_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_TGT_ATTR_RO(indx); + +static ssize_t iscsi_stat_tgt_attr_show_attr_login_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 fail_count; + + spin_lock(&lstat->lock); + fail_count = (lstat->redirects + lstat->authorize_fails + + lstat->authenticate_fails + lstat->negotiate_fails + + lstat->other_fails); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", fail_count); +} +ISCSI_STAT_TGT_ATTR_RO(login_fails); + +static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_time( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 last_fail_time; + + spin_lock(&lstat->lock); + last_fail_time = lstat->last_fail_time ? + (u32)(((u32)lstat->last_fail_time - + INITIAL_JIFFIES) * 100 / HZ) : 0; + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", last_fail_time); +} +ISCSI_STAT_TGT_ATTR_RO(last_fail_time); + +static ssize_t iscsi_stat_tgt_attr_show_attr_last_fail_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + u32 last_fail_type; + + spin_lock(&lstat->lock); + last_fail_type = lstat->last_fail_type; + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%u\n", last_fail_type); +} +ISCSI_STAT_TGT_ATTR_RO(last_fail_type); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_name( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[224]; + + spin_lock(&lstat->lock); + snprintf(buf, 224, "%s", lstat->last_intr_fail_name[0] ? + lstat->last_intr_fail_name : NONE); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_name); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr_type( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[8]; + + spin_lock(&lstat->lock); + snprintf(buf, 8, "%s", (lstat->last_intr_fail_ip_addr != NULL) ? + "ipv6" : "ipv4"); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr_type); + +static ssize_t iscsi_stat_tgt_attr_show_attr_fail_intr_addr( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + unsigned char buf[32]; + + spin_lock(&lstat->lock); + if (lstat->last_intr_fail_ip_family == AF_INET6) + snprintf(buf, 32, "[%s]", lstat->last_intr_fail_ip_addr); + else + snprintf(buf, 32, "%s", lstat->last_intr_fail_ip_addr); + spin_unlock(&lstat->lock); + + return snprintf(page, PAGE_SIZE, "%s\n", buf); +} +ISCSI_STAT_TGT_ATTR_RO(fail_intr_addr); + +CONFIGFS_EATTR_OPS(iscsi_stat_tgt_attr, iscsi_wwn_stat_grps, + iscsi_tgt_attr_group); + +static struct configfs_attribute *iscsi_stat_tgt_attr_attrs[] = { + &iscsi_stat_tgt_attr_inst.attr, + &iscsi_stat_tgt_attr_indx.attr, + &iscsi_stat_tgt_attr_login_fails.attr, + &iscsi_stat_tgt_attr_last_fail_time.attr, + &iscsi_stat_tgt_attr_last_fail_type.attr, + &iscsi_stat_tgt_attr_fail_intr_name.attr, + &iscsi_stat_tgt_attr_fail_intr_addr_type.attr, + &iscsi_stat_tgt_attr_fail_intr_addr.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_tgt_attr_item_ops = { + .show_attribute = iscsi_stat_tgt_attr_attr_show, + .store_attribute = iscsi_stat_tgt_attr_attr_store, +}; + +struct config_item_type iscsi_stat_tgt_attr_cit = { + .ct_item_ops = &iscsi_stat_tgt_attr_item_ops, + .ct_attrs = iscsi_stat_tgt_attr_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Login Stats Table + */ +CONFIGFS_EATTR_STRUCT(iscsi_stat_login, iscsi_wwn_stat_grps); +#define ISCSI_STAT_LOGIN(_name, _mode) \ +static struct iscsi_stat_login_attribute \ + iscsi_stat_login_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_login_show_attr_##_name, \ + iscsi_stat_login_store_attr_##_name); + +#define ISCSI_STAT_LOGIN_RO(_name) \ +static struct iscsi_stat_login_attribute \ + iscsi_stat_login_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_login_show_attr_##_name); + +static ssize_t iscsi_stat_login_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_LOGIN_RO(inst); + +static ssize_t iscsi_stat_login_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_LOGIN_RO(indx); + +static ssize_t iscsi_stat_login_show_attr_accepts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->accepts); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(accepts); + +static ssize_t iscsi_stat_login_show_attr_other_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->other_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(other_fails); + +static ssize_t iscsi_stat_login_show_attr_redirects( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->redirects); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(redirects); + +static ssize_t iscsi_stat_login_show_attr_authorize_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authorize_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(authorize_fails); + +static ssize_t iscsi_stat_login_show_attr_authenticate_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->authenticate_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(authenticate_fails); + +static ssize_t iscsi_stat_login_show_attr_negotiate_fails( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_login_stats *lstat = &tiqn->login_stats; + ssize_t ret; + + spin_lock(&lstat->lock); + ret = snprintf(page, PAGE_SIZE, "%u\n", lstat->negotiate_fails); + spin_unlock(&lstat->lock); + + return ret; +} +ISCSI_STAT_LOGIN_RO(negotiate_fails); + +CONFIGFS_EATTR_OPS(iscsi_stat_login, iscsi_wwn_stat_grps, + iscsi_login_stats_group); + +static struct configfs_attribute *iscsi_stat_login_stats_attrs[] = { + &iscsi_stat_login_inst.attr, + &iscsi_stat_login_indx.attr, + &iscsi_stat_login_accepts.attr, + &iscsi_stat_login_other_fails.attr, + &iscsi_stat_login_redirects.attr, + &iscsi_stat_login_authorize_fails.attr, + &iscsi_stat_login_authenticate_fails.attr, + &iscsi_stat_login_negotiate_fails.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_login_stats_item_ops = { + .show_attribute = iscsi_stat_login_attr_show, + .store_attribute = iscsi_stat_login_attr_store, +}; + +struct config_item_type iscsi_stat_login_cit = { + .ct_item_ops = &iscsi_stat_login_stats_item_ops, + .ct_attrs = iscsi_stat_login_stats_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Target Logout Stats Table + */ + +CONFIGFS_EATTR_STRUCT(iscsi_stat_logout, iscsi_wwn_stat_grps); +#define ISCSI_STAT_LOGOUT(_name, _mode) \ +static struct iscsi_stat_logout_attribute \ + iscsi_stat_logout_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_logout_show_attr_##_name, \ + iscsi_stat_logout_store_attr_##_name); + +#define ISCSI_STAT_LOGOUT_RO(_name) \ +static struct iscsi_stat_logout_attribute \ + iscsi_stat_logout_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_logout_show_attr_##_name); + +static ssize_t iscsi_stat_logout_show_attr_inst( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_LOGOUT_RO(inst); + +static ssize_t iscsi_stat_logout_show_attr_indx( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + return snprintf(page, PAGE_SIZE, "%u\n", ISCSI_NODE_INDEX); +} +ISCSI_STAT_LOGOUT_RO(indx); + +static ssize_t iscsi_stat_logout_show_attr_normal_logouts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_logout_stats *lstats = &tiqn->logout_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", lstats->normal_logouts); +} +ISCSI_STAT_LOGOUT_RO(normal_logouts); + +static ssize_t iscsi_stat_logout_show_attr_abnormal_logouts( + struct iscsi_wwn_stat_grps *igrps, char *page) +{ + struct iscsi_tiqn *tiqn = container_of(igrps, + struct iscsi_tiqn, tiqn_stat_grps); + struct iscsi_logout_stats *lstats = &tiqn->logout_stats; + + return snprintf(page, PAGE_SIZE, "%u\n", lstats->abnormal_logouts); +} +ISCSI_STAT_LOGOUT_RO(abnormal_logouts); + +CONFIGFS_EATTR_OPS(iscsi_stat_logout, iscsi_wwn_stat_grps, + iscsi_logout_stats_group); + +static struct configfs_attribute *iscsi_stat_logout_stats_attrs[] = { + &iscsi_stat_logout_inst.attr, + &iscsi_stat_logout_indx.attr, + &iscsi_stat_logout_normal_logouts.attr, + &iscsi_stat_logout_abnormal_logouts.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_logout_stats_item_ops = { + .show_attribute = iscsi_stat_logout_attr_show, + .store_attribute = iscsi_stat_logout_attr_store, +}; + +struct config_item_type iscsi_stat_logout_cit = { + .ct_item_ops = &iscsi_stat_logout_stats_item_ops, + .ct_attrs = iscsi_stat_logout_stats_attrs, + .ct_owner = THIS_MODULE, +}; + +/* + * Session Stats Table + */ + +CONFIGFS_EATTR_STRUCT(iscsi_stat_sess, iscsi_node_stat_grps); +#define ISCSI_STAT_SESS(_name, _mode) \ +static struct iscsi_stat_sess_attribute \ + iscsi_stat_sess_##_name = \ + __CONFIGFS_EATTR(_name, _mode, \ + iscsi_stat_sess_show_attr_##_name, \ + iscsi_stat_sess_store_attr_##_name); + +#define ISCSI_STAT_SESS_RO(_name) \ +static struct iscsi_stat_sess_attribute \ + iscsi_stat_sess_##_name = \ + __CONFIGFS_EATTR_RO(_name, \ + iscsi_stat_sess_show_attr_##_name); + +static ssize_t iscsi_stat_sess_show_attr_inst( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_wwn *wwn = acl->se_node_acl.se_tpg->se_tpg_wwn; + struct iscsi_tiqn *tiqn = container_of(wwn, + struct iscsi_tiqn, tiqn_wwn); + + return snprintf(page, PAGE_SIZE, "%u\n", tiqn->tiqn_index); +} +ISCSI_STAT_SESS_RO(inst); + +static ssize_t iscsi_stat_sess_show_attr_node( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->sess_ops->SessionType ? 0 : ISCSI_NODE_INDEX); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(node); + +static ssize_t iscsi_stat_sess_show_attr_indx( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->session_index); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(indx); + +static ssize_t iscsi_stat_sess_show_attr_cmd_pdus( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", sess->cmd_pdus); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(cmd_pdus); + +static ssize_t iscsi_stat_sess_show_attr_rsp_pdus( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", sess->rsp_pdus); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(rsp_pdus); + +static ssize_t iscsi_stat_sess_show_attr_txdata_octs( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%llu\n", + (unsigned long long)sess->tx_data_octets); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(txdata_octs); + +static ssize_t iscsi_stat_sess_show_attr_rxdata_octs( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%llu\n", + (unsigned long long)sess->rx_data_octets); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(rxdata_octs); + +static ssize_t iscsi_stat_sess_show_attr_conn_digest_errors( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->conn_digest_errors); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(conn_digest_errors); + +static ssize_t iscsi_stat_sess_show_attr_conn_timeout_errors( + struct iscsi_node_stat_grps *igrps, char *page) +{ + struct iscsi_node_acl *acl = container_of(igrps, + struct iscsi_node_acl, node_stat_grps); + struct se_node_acl *se_nacl = &acl->se_node_acl; + struct iscsi_session *sess; + struct se_session *se_sess; + ssize_t ret = 0; + + spin_lock_bh(&se_nacl->nacl_sess_lock); + se_sess = se_nacl->nacl_sess; + if (se_sess) { + sess = (struct iscsi_session *)se_sess->fabric_sess_ptr; + if (sess) + ret = snprintf(page, PAGE_SIZE, "%u\n", + sess->conn_timeout_errors); + } + spin_unlock_bh(&se_nacl->nacl_sess_lock); + + return ret; +} +ISCSI_STAT_SESS_RO(conn_timeout_errors); + +CONFIGFS_EATTR_OPS(iscsi_stat_sess, iscsi_node_stat_grps, + iscsi_sess_stats_group); + +static struct configfs_attribute *iscsi_stat_sess_stats_attrs[] = { + &iscsi_stat_sess_inst.attr, + &iscsi_stat_sess_node.attr, + &iscsi_stat_sess_indx.attr, + &iscsi_stat_sess_cmd_pdus.attr, + &iscsi_stat_sess_rsp_pdus.attr, + &iscsi_stat_sess_txdata_octs.attr, + &iscsi_stat_sess_rxdata_octs.attr, + &iscsi_stat_sess_conn_digest_errors.attr, + &iscsi_stat_sess_conn_timeout_errors.attr, + NULL, +}; + +static struct configfs_item_operations iscsi_stat_sess_stats_item_ops = { + .show_attribute = iscsi_stat_sess_attr_show, + .store_attribute = iscsi_stat_sess_attr_store, +}; + +struct config_item_type iscsi_stat_sess_cit = { + .ct_item_ops = &iscsi_stat_sess_stats_item_ops, + .ct_attrs = iscsi_stat_sess_stats_attrs, + .ct_owner = THIS_MODULE, +}; diff --git a/drivers/target/iscsi/iscsi_target_stat.h b/drivers/target/iscsi/iscsi_target_stat.h new file mode 100644 index 000000000000..3ff76b4faad3 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_stat.h @@ -0,0 +1,64 @@ +#ifndef ISCSI_TARGET_STAT_H +#define ISCSI_TARGET_STAT_H + +/* + * For struct iscsi_tiqn->tiqn_wwn default groups + */ +extern struct config_item_type iscsi_stat_instance_cit; +extern struct config_item_type iscsi_stat_sess_err_cit; +extern struct config_item_type iscsi_stat_tgt_attr_cit; +extern struct config_item_type iscsi_stat_login_cit; +extern struct config_item_type iscsi_stat_logout_cit; + +/* + * For struct iscsi_session->se_sess default groups + */ +extern struct config_item_type iscsi_stat_sess_cit; + +/* iSCSI session error types */ +#define ISCSI_SESS_ERR_UNKNOWN 0 +#define ISCSI_SESS_ERR_DIGEST 1 +#define ISCSI_SESS_ERR_CXN_TIMEOUT 2 +#define ISCSI_SESS_ERR_PDU_FORMAT 3 + +/* iSCSI session error stats */ +struct iscsi_sess_err_stats { + spinlock_t lock; + u32 digest_errors; + u32 cxn_timeout_errors; + u32 pdu_format_errors; + u32 last_sess_failure_type; + char last_sess_fail_rem_name[224]; +} ____cacheline_aligned; + +/* iSCSI login failure types (sub oids) */ +#define ISCSI_LOGIN_FAIL_OTHER 2 +#define ISCSI_LOGIN_FAIL_REDIRECT 3 +#define ISCSI_LOGIN_FAIL_AUTHORIZE 4 +#define ISCSI_LOGIN_FAIL_AUTHENTICATE 5 +#define ISCSI_LOGIN_FAIL_NEGOTIATE 6 + +/* iSCSI login stats */ +struct iscsi_login_stats { + spinlock_t lock; + u32 accepts; + u32 other_fails; + u32 redirects; + u32 authorize_fails; + u32 authenticate_fails; + u32 negotiate_fails; /* used for notifications */ + u64 last_fail_time; /* time stamp (jiffies) */ + u32 last_fail_type; + int last_intr_fail_ip_family; + unsigned char last_intr_fail_ip_addr[IPV6_ADDRESS_SPACE]; + char last_intr_fail_name[224]; +} ____cacheline_aligned; + +/* iSCSI logout stats */ +struct iscsi_logout_stats { + spinlock_t lock; + u32 normal_logouts; + u32 abnormal_logouts; +} ____cacheline_aligned; + +#endif /*** ISCSI_TARGET_STAT_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_tmr.c b/drivers/target/iscsi/iscsi_target_tmr.c new file mode 100644 index 000000000000..db1fe1ec84df --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tmr.c @@ -0,0 +1,849 @@ +/******************************************************************************* + * This file contains the iSCSI Target specific Task Management functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <asm/unaligned.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_device.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_tmr.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +u8 iscsit_tmr_abort_task( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_cmd *ref_cmd; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm *hdr = (struct iscsi_tm *) buf; + + ref_cmd = iscsit_find_cmd_from_itt(conn, hdr->rtt); + if (!ref_cmd) { + pr_err("Unable to locate RefTaskTag: 0x%08x on CID:" + " %hu.\n", hdr->rtt, conn->cid); + return ((hdr->refcmdsn >= conn->sess->exp_cmd_sn) && + (hdr->refcmdsn <= conn->sess->max_cmd_sn)) ? + ISCSI_TMF_RSP_COMPLETE : ISCSI_TMF_RSP_NO_TASK; + } + if (ref_cmd->cmd_sn != hdr->refcmdsn) { + pr_err("RefCmdSN 0x%08x does not equal" + " task's CmdSN 0x%08x. Rejecting ABORT_TASK.\n", + hdr->refcmdsn, ref_cmd->cmd_sn); + return ISCSI_TMF_RSP_REJECTED; + } + + se_tmr->ref_task_tag = hdr->rtt; + se_tmr->ref_cmd = &ref_cmd->se_cmd; + tmr_req->ref_cmd_sn = hdr->refcmdsn; + tmr_req->exp_data_sn = hdr->exp_datasn; + + return ISCSI_TMF_RSP_COMPLETE; +} + +/* + * Called from iscsit_handle_task_mgt_cmd(). + */ +int iscsit_tmr_task_warm_reset( + struct iscsi_conn *conn, + struct iscsi_tmr_req *tmr_req, + unsigned char *buf) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); +#if 0 + struct iscsi_init_task_mgt_cmnd *hdr = + (struct iscsi_init_task_mgt_cmnd *) buf; +#endif + if (!na->tmr_warm_reset) { + pr_err("TMR Opcode TARGET_WARM_RESET authorization" + " failed for Initiator Node: %s\n", + sess->se_sess->se_node_acl->initiatorname); + return -1; + } + /* + * Do the real work in transport_generic_do_tmr(). + */ + return 0; +} + +int iscsit_tmr_task_cold_reset( + struct iscsi_conn *conn, + struct iscsi_tmr_req *tmr_req, + unsigned char *buf) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + if (!na->tmr_cold_reset) { + pr_err("TMR Opcode TARGET_COLD_RESET authorization" + " failed for Initiator Node: %s\n", + sess->se_sess->se_node_acl->initiatorname); + return -1; + } + /* + * Do the real work in transport_generic_do_tmr(). + */ + return 0; +} + +u8 iscsit_tmr_task_reassign( + struct iscsi_cmd *cmd, + unsigned char *buf) +{ + struct iscsi_cmd *ref_cmd = NULL; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_conn_recovery *cr = NULL; + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + struct iscsi_tm *hdr = (struct iscsi_tm *) buf; + int ret; + + pr_debug("Got TASK_REASSIGN TMR ITT: 0x%08x," + " RefTaskTag: 0x%08x, ExpDataSN: 0x%08x, CID: %hu\n", + hdr->itt, hdr->rtt, hdr->exp_datasn, conn->cid); + + if (conn->sess->sess_ops->ErrorRecoveryLevel != 2) { + pr_err("TMR TASK_REASSIGN not supported in ERL<2," + " ignoring request.\n"); + return ISCSI_TMF_RSP_NOT_SUPPORTED; + } + + ret = iscsit_find_cmd_for_recovery(conn->sess, &ref_cmd, &cr, hdr->rtt); + if (ret == -2) { + pr_err("Command ITT: 0x%08x is still alligent to CID:" + " %hu\n", ref_cmd->init_task_tag, cr->cid); + return ISCSI_TMF_RSP_TASK_ALLEGIANT; + } else if (ret == -1) { + pr_err("Unable to locate RefTaskTag: 0x%08x in" + " connection recovery command list.\n", hdr->rtt); + return ISCSI_TMF_RSP_NO_TASK; + } + /* + * Temporary check to prevent connection recovery for + * connections with a differing MaxRecvDataSegmentLength. + */ + if (cr->maxrecvdatasegmentlength != + conn->conn_ops->MaxRecvDataSegmentLength) { + pr_err("Unable to perform connection recovery for" + " differing MaxRecvDataSegmentLength, rejecting" + " TMR TASK_REASSIGN.\n"); + return ISCSI_TMF_RSP_REJECTED; + } + + se_tmr->ref_task_tag = hdr->rtt; + se_tmr->ref_cmd = &ref_cmd->se_cmd; + se_tmr->ref_task_lun = get_unaligned_le64(&hdr->lun); + tmr_req->ref_cmd_sn = hdr->refcmdsn; + tmr_req->exp_data_sn = hdr->exp_datasn; + tmr_req->conn_recovery = cr; + tmr_req->task_reassign = 1; + /* + * Command can now be reassigned to a new connection. + * The task management response must be sent before the + * reassignment actually happens. See iscsi_tmr_post_handler(). + */ + return ISCSI_TMF_RSP_COMPLETE; +} + +static void iscsit_task_reassign_remove_cmd( + struct iscsi_cmd *cmd, + struct iscsi_conn_recovery *cr, + struct iscsi_session *sess) +{ + int ret; + + spin_lock(&cr->conn_recovery_cmd_lock); + ret = iscsit_remove_cmd_from_connection_recovery(cmd, sess); + spin_unlock(&cr->conn_recovery_cmd_lock); + if (!ret) { + pr_debug("iSCSI connection recovery successful for CID:" + " %hu on SID: %u\n", cr->cid, sess->sid); + iscsit_remove_active_connection_recovery_entry(cr, sess); + } +} + +static int iscsit_task_reassign_complete_nop_out( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + return -1; + } + cr = cmd->cr; + + /* + * Reset the StatSN so a new one for this commands new connection + * will be assigned. + * Reset the ExpStatSN as well so we may receive Status SNACKs. + */ + cmd->stat_sn = cmd->exp_stat_sn = 0; + + iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + cmd->i_state = ISTATE_SEND_NOPIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_write( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + int no_build_r2ts = 0; + u32 length = 0, offset = 0; + struct iscsi_conn *conn = cmd->conn; + struct se_cmd *se_cmd = &cmd->se_cmd; + /* + * The Initiator must not send a R2T SNACK with a Begrun less than + * the TMR TASK_REASSIGN's ExpDataSN. + */ + if (!tmr_req->exp_data_sn) { + cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = 0; + } else { + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (tmr_req->exp_data_sn - 1); + } + + /* + * The TMR TASK_REASSIGN's ExpDataSN contains the next R2TSN the + * Initiator is expecting. The Target controls all WRITE operations + * so if we have received all DataOUT we can safety ignore Initiator. + */ + if (cmd->cmd_flags & ICF_GOT_LAST_DATAOUT) { + if (!atomic_read(&cmd->transport_sent)) { + pr_debug("WRITE ITT: 0x%08x: t_state: %d" + " never sent to transport\n", + cmd->init_task_tag, cmd->se_cmd.t_state); + return transport_generic_handle_data(se_cmd); + } + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + /* + * Special case to deal with DataSequenceInOrder=No and Non-Immeidate + * Unsolicited DataOut. + */ + if (cmd->unsolicited_data) { + cmd->unsolicited_data = 0; + + offset = cmd->next_burst_len = cmd->write_data_done; + + if ((conn->sess->sess_ops->FirstBurstLength - offset) >= + cmd->data_length) { + no_build_r2ts = 1; + length = (cmd->data_length - offset); + } else + length = (conn->sess->sess_ops->FirstBurstLength - offset); + + spin_lock_bh(&cmd->r2t_lock); + if (iscsit_add_r2t_to_list(cmd, offset, length, 0, 0) < 0) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + cmd->outstanding_r2ts++; + spin_unlock_bh(&cmd->r2t_lock); + + if (no_build_r2ts) + return 0; + } + /* + * iscsit_build_r2ts_for_cmd() can handle the rest from here. + */ + return iscsit_build_r2ts_for_cmd(cmd, conn, 2); +} + +static int iscsit_task_reassign_complete_read( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + struct iscsi_conn *conn = cmd->conn; + struct iscsi_datain_req *dr; + struct se_cmd *se_cmd = &cmd->se_cmd; + /* + * The Initiator must not send a Data SNACK with a BegRun less than + * the TMR TASK_REASSIGN's ExpDataSN. + */ + if (!tmr_req->exp_data_sn) { + cmd->cmd_flags &= ~ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = 0; + } else { + cmd->cmd_flags |= ICF_GOT_DATACK_SNACK; + cmd->acked_data_sn = (tmr_req->exp_data_sn - 1); + } + + if (!atomic_read(&cmd->transport_sent)) { + pr_debug("READ ITT: 0x%08x: t_state: %d never sent to" + " transport\n", cmd->init_task_tag, + cmd->se_cmd.t_state); + transport_generic_handle_cdb(se_cmd); + return 0; + } + + if (!atomic_read(&se_cmd->t_transport_complete)) { + pr_err("READ ITT: 0x%08x: t_state: %d, never returned" + " from transport\n", cmd->init_task_tag, + cmd->se_cmd.t_state); + return -1; + } + + dr = iscsit_allocate_datain_req(); + if (!dr) + return -1; + /* + * The TMR TASK_REASSIGN's ExpDataSN contains the next DataSN the + * Initiator is expecting. + */ + dr->data_sn = dr->begrun = tmr_req->exp_data_sn; + dr->runlength = 0; + dr->generate_recovery_values = 1; + dr->recovery = DATAIN_CONNECTION_RECOVERY; + + iscsit_attach_datain_req(cmd, dr); + + cmd->i_state = ISTATE_SEND_DATAIN; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_none( + struct iscsi_cmd *cmd, + struct iscsi_tmr_req *tmr_req) +{ + struct iscsi_conn *conn = cmd->conn; + + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; +} + +static int iscsit_task_reassign_complete_scsi_cmnd( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_conn_recovery *cr; + + if (!cmd->cr) { + pr_err("struct iscsi_conn_recovery pointer for ITT: 0x%08x" + " is NULL!\n", cmd->init_task_tag); + return -1; + } + cr = cmd->cr; + + /* + * Reset the StatSN so a new one for this commands new connection + * will be assigned. + * Reset the ExpStatSN as well so we may receive Status SNACKs. + */ + cmd->stat_sn = cmd->exp_stat_sn = 0; + + iscsit_task_reassign_remove_cmd(cmd, cr, conn->sess); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) { + cmd->i_state = ISTATE_SEND_STATUS; + iscsit_add_cmd_to_response_queue(cmd, conn, cmd->i_state); + return 0; + } + + switch (cmd->data_direction) { + case DMA_TO_DEVICE: + return iscsit_task_reassign_complete_write(cmd, tmr_req); + case DMA_FROM_DEVICE: + return iscsit_task_reassign_complete_read(cmd, tmr_req); + case DMA_NONE: + return iscsit_task_reassign_complete_none(cmd, tmr_req); + default: + pr_err("Unknown cmd->data_direction: 0x%02x\n", + cmd->data_direction); + return -1; + } + + return 0; +} + +static int iscsit_task_reassign_complete( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd; + struct iscsi_cmd *cmd; + int ret = 0; + + if (!se_tmr->ref_cmd) { + pr_err("TMR Request is missing a RefCmd struct iscsi_cmd.\n"); + return -1; + } + se_cmd = se_tmr->ref_cmd; + cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + cmd->conn = conn; + + switch (cmd->iscsi_opcode) { + case ISCSI_OP_NOOP_OUT: + ret = iscsit_task_reassign_complete_nop_out(tmr_req, conn); + break; + case ISCSI_OP_SCSI_CMD: + ret = iscsit_task_reassign_complete_scsi_cmnd(tmr_req, conn); + break; + default: + pr_err("Illegal iSCSI Opcode 0x%02x during" + " command realligence\n", cmd->iscsi_opcode); + return -1; + } + + if (ret != 0) + return ret; + + pr_debug("Completed connection realligence for Opcode: 0x%02x," + " ITT: 0x%08x to CID: %hu.\n", cmd->iscsi_opcode, + cmd->init_task_tag, conn->cid); + + return 0; +} + +/* + * Handles special after-the-fact actions related to TMRs. + * Right now the only one that its really needed for is + * connection recovery releated TASK_REASSIGN. + */ +extern int iscsit_tmr_post_handler(struct iscsi_cmd *cmd, struct iscsi_conn *conn) +{ + struct iscsi_tmr_req *tmr_req = cmd->tmr_req; + struct se_tmr_req *se_tmr = cmd->se_cmd.se_tmr_req; + + if (tmr_req->task_reassign && + (se_tmr->response == ISCSI_TMF_RSP_COMPLETE)) + return iscsit_task_reassign_complete(tmr_req, conn); + + return 0; +} + +/* + * Nothing to do here, but leave it for good measure. :-) + */ +int iscsit_task_reassign_prepare_read( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + return 0; +} + +static void iscsit_task_reassign_prepare_unsolicited_dataout( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + int i, j; + struct iscsi_pdu *pdu = NULL; + struct iscsi_seq *seq = NULL; + + if (conn->sess->sess_ops->DataSequenceInOrder) { + cmd->data_sn = 0; + + if (cmd->immediate_data) + cmd->r2t_offset += (cmd->first_burst_len - + cmd->seq_start_offset); + + if (conn->sess->sess_ops->DataPDUInOrder) { + cmd->write_data_done -= (cmd->immediate_data) ? + (cmd->first_burst_len - + cmd->seq_start_offset) : + cmd->first_burst_len; + cmd->first_burst_len = 0; + return; + } + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= cmd->seq_start_offset) && + ((pdu->offset + pdu->length) <= + cmd->seq_end_offset)) { + cmd->first_burst_len -= pdu->length; + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } else { + for (i = 0; i < cmd->seq_count; i++) { + seq = &cmd->seq_list[i]; + + if (seq->type != SEQTYPE_UNSOLICITED) + continue; + + cmd->write_data_done -= + (seq->offset - seq->orig_offset); + cmd->first_burst_len = 0; + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + if (conn->sess->sess_ops->DataPDUInOrder) + continue; + + for (j = 0; j < seq->pdu_count; j++) { + pdu = &cmd->pdu_list[j+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + } +} + +int iscsit_task_reassign_prepare_write( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + struct iscsi_pdu *pdu = NULL; + struct iscsi_r2t *r2t = NULL, *r2t_tmp; + int first_incomplete_r2t = 1, i = 0; + + /* + * The command was in the process of receiving Unsolicited DataOUT when + * the connection failed. + */ + if (cmd->unsolicited_data) + iscsit_task_reassign_prepare_unsolicited_dataout(cmd, conn); + + /* + * The Initiator is requesting R2Ts starting from zero, skip + * checking acknowledged R2Ts and start checking struct iscsi_r2ts + * greater than zero. + */ + if (!tmr_req->exp_data_sn) + goto drop_unacknowledged_r2ts; + + /* + * We now check that the PDUs in DataOUT sequences below + * the TMR TASK_REASSIGN ExpDataSN (R2TSN the Initiator is + * expecting next) have all the DataOUT they require to complete + * the DataOUT sequence. First scan from R2TSN 0 to TMR + * TASK_REASSIGN ExpDataSN-1. + * + * If we have not received all DataOUT in question, we must + * make sure to make the appropriate changes to values in + * struct iscsi_cmd (and elsewhere depending on session parameters) + * so iscsit_build_r2ts_for_cmd() in iscsit_task_reassign_complete_write() + * will resend a new R2T for the DataOUT sequences in question. + */ + spin_lock_bh(&cmd->r2t_lock); + if (list_empty(&cmd->cmd_r2t_list)) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + + if (r2t->r2t_sn >= tmr_req->exp_data_sn) + continue; + /* + * Safely ignore Recovery R2Ts and R2Ts that have completed + * DataOUT sequences. + */ + if (r2t->seq_complete) + continue; + + if (r2t->recovery_r2t) + continue; + + /* + * DataSequenceInOrder=Yes: + * + * Taking into account the iSCSI implementation requirement of + * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and + * DataSequenceInOrder=Yes, we must take into consideration + * the following: + * + * DataSequenceInOrder=No: + * + * Taking into account that the Initiator controls the (possibly + * random) PDU Order in (possibly random) Sequence Order of + * DataOUT the target requests with R2Ts, we must take into + * consideration the following: + * + * DataPDUInOrder=Yes for DataSequenceInOrder=[Yes,No]: + * + * While processing non-complete R2T DataOUT sequence requests + * the Target will re-request only the total sequence length + * minus current received offset. This is because we must + * assume the initiator will continue sending DataOUT from the + * last PDU before the connection failed. + * + * DataPDUInOrder=No for DataSequenceInOrder=[Yes,No]: + * + * While processing non-complete R2T DataOUT sequence requests + * the Target will re-request the entire DataOUT sequence if + * any single PDU is missing from the sequence. This is because + * we have no logical method to determine the next PDU offset, + * and we must assume the Initiator will be sending any random + * PDU offset in the current sequence after TASK_REASSIGN + * has completed. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) { + if (!first_incomplete_r2t) { + cmd->r2t_offset -= r2t->xfer_len; + goto next; + } + + if (conn->sess->sess_ops->DataPDUInOrder) { + cmd->data_sn = 0; + cmd->r2t_offset -= (r2t->xfer_len - + cmd->next_burst_len); + first_incomplete_r2t = 0; + goto next; + } + + cmd->data_sn = 0; + cmd->r2t_offset -= r2t->xfer_len; + + for (i = 0; i < cmd->pdu_count; i++) { + pdu = &cmd->pdu_list[i]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + if ((pdu->offset >= r2t->offset) && + (pdu->offset < (r2t->offset + + r2t->xfer_len))) { + cmd->next_burst_len -= pdu->length; + cmd->write_data_done -= pdu->length; + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + + first_incomplete_r2t = 0; + } else { + struct iscsi_seq *seq; + + seq = iscsit_get_seq_holder(cmd, r2t->offset, + r2t->xfer_len); + if (!seq) { + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + cmd->write_data_done -= + (seq->offset - seq->orig_offset); + seq->data_sn = 0; + seq->offset = seq->orig_offset; + seq->next_burst_len = 0; + seq->status = DATAOUT_SEQUENCE_WITHIN_COMMAND_RECOVERY; + + cmd->seq_send_order--; + + if (conn->sess->sess_ops->DataPDUInOrder) + goto next; + + for (i = 0; i < seq->pdu_count; i++) { + pdu = &cmd->pdu_list[i+seq->pdu_start]; + + if (pdu->status != ISCSI_PDU_RECEIVED_OK) + continue; + + pdu->status = ISCSI_PDU_NOT_RECEIVED; + } + } + +next: + cmd->outstanding_r2ts--; + } + spin_unlock_bh(&cmd->r2t_lock); + + /* + * We now drop all unacknowledged R2Ts, ie: ExpDataSN from TMR + * TASK_REASSIGN to the last R2T in the list.. We are also careful + * to check that the Initiator is not requesting R2Ts for DataOUT + * sequences it has already completed. + * + * Free each R2T in question and adjust values in struct iscsi_cmd + * accordingly so iscsit_build_r2ts_for_cmd() do the rest of + * the work after the TMR TASK_REASSIGN Response is sent. + */ +drop_unacknowledged_r2ts: + + cmd->cmd_flags &= ~ICF_SENT_LAST_R2T; + cmd->r2t_sn = tmr_req->exp_data_sn; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) { + /* + * Skip up to the R2T Sequence number provided by the + * iSCSI TASK_REASSIGN TMR + */ + if (r2t->r2t_sn < tmr_req->exp_data_sn) + continue; + + if (r2t->seq_complete) { + pr_err("Initiator is requesting R2Ts from" + " R2TSN: 0x%08x, but R2TSN: 0x%08x, Offset: %u," + " Length: %u is already complete." + " BAD INITIATOR ERL=2 IMPLEMENTATION!\n", + tmr_req->exp_data_sn, r2t->r2t_sn, + r2t->offset, r2t->xfer_len); + spin_unlock_bh(&cmd->r2t_lock); + return -1; + } + + if (r2t->recovery_r2t) { + iscsit_free_r2t(r2t, cmd); + continue; + } + + /* DataSequenceInOrder=Yes: + * + * Taking into account the iSCSI implementation requirement of + * MaxOutstandingR2T=1 while ErrorRecoveryLevel>0 and + * DataSequenceInOrder=Yes, it's safe to subtract the R2Ts + * entire transfer length from the commands R2T offset marker. + * + * DataSequenceInOrder=No: + * + * We subtract the difference from struct iscsi_seq between the + * current offset and original offset from cmd->write_data_done + * for account for DataOUT PDUs already received. Then reset + * the current offset to the original and zero out the current + * burst length, to make sure we re-request the entire DataOUT + * sequence. + */ + if (conn->sess->sess_ops->DataSequenceInOrder) + cmd->r2t_offset -= r2t->xfer_len; + else + cmd->seq_send_order--; + + cmd->outstanding_r2ts--; + iscsit_free_r2t(r2t, cmd); + } + spin_unlock_bh(&cmd->r2t_lock); + + return 0; +} + +/* + * Performs sanity checks TMR TASK_REASSIGN's ExpDataSN for + * a given struct iscsi_cmd. + */ +int iscsit_check_task_reassign_expdatasn( + struct iscsi_tmr_req *tmr_req, + struct iscsi_conn *conn) +{ + struct se_tmr_req *se_tmr = tmr_req->se_tmr_req; + struct se_cmd *se_cmd = se_tmr->ref_cmd; + struct iscsi_cmd *ref_cmd = container_of(se_cmd, struct iscsi_cmd, se_cmd); + + if (ref_cmd->iscsi_opcode != ISCSI_OP_SCSI_CMD) + return 0; + + if (se_cmd->se_cmd_flags & SCF_SENT_CHECK_CONDITION) + return 0; + + if (ref_cmd->data_direction == DMA_NONE) + return 0; + + /* + * For READs the TMR TASK_REASSIGNs ExpDataSN contains the next DataSN + * of DataIN the Initiator is expecting. + * + * Also check that the Initiator is not re-requesting DataIN that has + * already been acknowledged with a DataAck SNACK. + */ + if (ref_cmd->data_direction == DMA_FROM_DEVICE) { + if (tmr_req->exp_data_sn > ref_cmd->data_sn) { + pr_err("Received ExpDataSN: 0x%08x for READ" + " in TMR TASK_REASSIGN greater than command's" + " DataSN: 0x%08x.\n", tmr_req->exp_data_sn, + ref_cmd->data_sn); + return -1; + } + if ((ref_cmd->cmd_flags & ICF_GOT_DATACK_SNACK) && + (tmr_req->exp_data_sn <= ref_cmd->acked_data_sn)) { + pr_err("Received ExpDataSN: 0x%08x for READ" + " in TMR TASK_REASSIGN for previously" + " acknowledged DataIN: 0x%08x," + " protocol error\n", tmr_req->exp_data_sn, + ref_cmd->acked_data_sn); + return -1; + } + return iscsit_task_reassign_prepare_read(tmr_req, conn); + } + + /* + * For WRITEs the TMR TASK_REASSIGNs ExpDataSN contains the next R2TSN + * for R2Ts the Initiator is expecting. + * + * Do the magic in iscsit_task_reassign_prepare_write(). + */ + if (ref_cmd->data_direction == DMA_TO_DEVICE) { + if (tmr_req->exp_data_sn > ref_cmd->r2t_sn) { + pr_err("Received ExpDataSN: 0x%08x for WRITE" + " in TMR TASK_REASSIGN greater than command's" + " R2TSN: 0x%08x.\n", tmr_req->exp_data_sn, + ref_cmd->r2t_sn); + return -1; + } + return iscsit_task_reassign_prepare_write(tmr_req, conn); + } + + pr_err("Unknown iSCSI data_direction: 0x%02x\n", + ref_cmd->data_direction); + + return -1; +} diff --git a/drivers/target/iscsi/iscsi_target_tmr.h b/drivers/target/iscsi/iscsi_target_tmr.h new file mode 100644 index 000000000000..142e992cb097 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tmr.h @@ -0,0 +1,14 @@ +#ifndef ISCSI_TARGET_TMR_H +#define ISCSI_TARGET_TMR_H + +extern u8 iscsit_tmr_abort_task(struct iscsi_cmd *, unsigned char *); +extern int iscsit_tmr_task_warm_reset(struct iscsi_conn *, struct iscsi_tmr_req *, + unsigned char *); +extern int iscsit_tmr_task_cold_reset(struct iscsi_conn *, struct iscsi_tmr_req *, + unsigned char *); +extern u8 iscsit_tmr_task_reassign(struct iscsi_cmd *, unsigned char *); +extern int iscsit_tmr_post_handler(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_check_task_reassign_expdatasn(struct iscsi_tmr_req *, + struct iscsi_conn *); + +#endif /* ISCSI_TARGET_TMR_H */ diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c new file mode 100644 index 000000000000..d4cf2cd25c44 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -0,0 +1,759 @@ +/******************************************************************************* + * This file contains iSCSI Target Portal Group related functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_configfs.h> +#include <target/target_core_tpg.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_login.h" +#include "iscsi_target_nodeattrib.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" +#include "iscsi_target_parameters.h" + +struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *tiqn, u16 tpgt) +{ + struct iscsi_portal_group *tpg; + + tpg = kzalloc(sizeof(struct iscsi_portal_group), GFP_KERNEL); + if (!tpg) { + pr_err("Unable to allocate struct iscsi_portal_group\n"); + return NULL; + } + + tpg->tpgt = tpgt; + tpg->tpg_state = TPG_STATE_FREE; + tpg->tpg_tiqn = tiqn; + INIT_LIST_HEAD(&tpg->tpg_gnp_list); + INIT_LIST_HEAD(&tpg->tpg_list); + mutex_init(&tpg->tpg_access_lock); + mutex_init(&tpg->np_login_lock); + spin_lock_init(&tpg->tpg_state_lock); + spin_lock_init(&tpg->tpg_np_lock); + + return tpg; +} + +static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *); + +int iscsit_load_discovery_tpg(void) +{ + struct iscsi_param *param; + struct iscsi_portal_group *tpg; + int ret; + + tpg = iscsit_alloc_portal_group(NULL, 1); + if (!tpg) { + pr_err("Unable to allocate struct iscsi_portal_group\n"); + return -1; + } + + ret = core_tpg_register( + &lio_target_fabric_configfs->tf_ops, + NULL, &tpg->tpg_se_tpg, (void *)tpg, + TRANSPORT_TPG_TYPE_DISCOVERY); + if (ret < 0) { + kfree(tpg); + return -1; + } + + tpg->sid = 1; /* First Assigned LIO Session ID */ + iscsit_set_default_tpg_attribs(tpg); + + if (iscsi_create_default_params(&tpg->param_list) < 0) + goto out; + /* + * By default we disable authentication for discovery sessions, + * this can be changed with: + * + * /sys/kernel/config/target/iscsi/discovery_auth/enforce_discovery_auth + */ + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) + goto out; + + if (iscsi_update_param_value(param, "CHAP,None") < 0) + goto out; + + tpg->tpg_attrib.authentication = 0; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_ACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + iscsit_global->discovery_tpg = tpg; + pr_debug("CORE[0] - Allocated Discovery TPG\n"); + + return 0; +out: + if (tpg->sid == 1) + core_tpg_deregister(&tpg->tpg_se_tpg); + kfree(tpg); + return -1; +} + +void iscsit_release_discovery_tpg(void) +{ + struct iscsi_portal_group *tpg = iscsit_global->discovery_tpg; + + if (!tpg) + return; + + core_tpg_deregister(&tpg->tpg_se_tpg); + + kfree(tpg); + iscsit_global->discovery_tpg = NULL; +} + +struct iscsi_portal_group *iscsit_get_tpg_from_np( + struct iscsi_tiqn *tiqn, + struct iscsi_np *np) +{ + struct iscsi_portal_group *tpg = NULL; + struct iscsi_tpg_np *tpg_np; + + spin_lock(&tiqn->tiqn_tpg_lock); + list_for_each_entry(tpg, &tiqn->tiqn_tpg_list, tpg_list) { + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_FREE) { + spin_unlock(&tpg->tpg_state_lock); + continue; + } + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { + if (tpg_np->tpg_np == np) { + spin_unlock(&tpg->tpg_np_lock); + spin_unlock(&tiqn->tiqn_tpg_lock); + return tpg; + } + } + spin_unlock(&tpg->tpg_np_lock); + } + spin_unlock(&tiqn->tiqn_tpg_lock); + + return NULL; +} + +int iscsit_get_tpg( + struct iscsi_portal_group *tpg) +{ + int ret; + + ret = mutex_lock_interruptible(&tpg->tpg_access_lock); + return ((ret != 0) || signal_pending(current)) ? -1 : 0; +} + +void iscsit_put_tpg(struct iscsi_portal_group *tpg) +{ + mutex_unlock(&tpg->tpg_access_lock); +} + +static void iscsit_clear_tpg_np_login_thread( + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg) +{ + if (!tpg_np->tpg_np) { + pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); + return; + } + + iscsit_reset_np_thread(tpg_np->tpg_np, tpg_np, tpg); +} + +void iscsit_clear_tpg_np_login_threads( + struct iscsi_portal_group *tpg) +{ + struct iscsi_tpg_np *tpg_np; + + spin_lock(&tpg->tpg_np_lock); + list_for_each_entry(tpg_np, &tpg->tpg_gnp_list, tpg_np_list) { + if (!tpg_np->tpg_np) { + pr_err("struct iscsi_tpg_np->tpg_np is NULL!\n"); + continue; + } + spin_unlock(&tpg->tpg_np_lock); + iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + spin_lock(&tpg->tpg_np_lock); + } + spin_unlock(&tpg->tpg_np_lock); +} + +void iscsit_tpg_dump_params(struct iscsi_portal_group *tpg) +{ + iscsi_print_params(tpg->param_list); +} + +static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + a->authentication = TA_AUTHENTICATION; + a->login_timeout = TA_LOGIN_TIMEOUT; + a->netif_timeout = TA_NETIF_TIMEOUT; + a->default_cmdsn_depth = TA_DEFAULT_CMDSN_DEPTH; + a->generate_node_acls = TA_GENERATE_NODE_ACLS; + a->cache_dynamic_acls = TA_CACHE_DYNAMIC_ACLS; + a->demo_mode_write_protect = TA_DEMO_MODE_WRITE_PROTECT; + a->prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; +} + +int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) +{ + if (tpg->tpg_state != TPG_STATE_FREE) { + pr_err("Unable to add iSCSI Target Portal Group: %d" + " while not in TPG_STATE_FREE state.\n", tpg->tpgt); + return -EEXIST; + } + iscsit_set_default_tpg_attribs(tpg); + + if (iscsi_create_default_params(&tpg->param_list) < 0) + goto err_out; + + ISCSI_TPG_ATTRIB(tpg)->tpg = tpg; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + list_add_tail(&tpg->tpg_list, &tiqn->tiqn_tpg_list); + tiqn->tiqn_ntpgs++; + pr_debug("CORE[%s]_TPG[%hu] - Added iSCSI Target Portal Group\n", + tiqn->tiqn, tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +err_out: + if (tpg->param_list) { + iscsi_release_param_list(tpg->param_list); + tpg->param_list = NULL; + } + kfree(tpg); + return -ENOMEM; +} + +int iscsit_tpg_del_portal_group( + struct iscsi_tiqn *tiqn, + struct iscsi_portal_group *tpg, + int force) +{ + u8 old_state = tpg->tpg_state; + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { + pr_err("Unable to delete iSCSI Target Portal Group:" + " %hu while active sessions exist, and force=0\n", + tpg->tpgt); + tpg->tpg_state = old_state; + return -EPERM; + } + + core_tpg_clear_object_luns(&tpg->tpg_se_tpg); + + if (tpg->param_list) { + iscsi_release_param_list(tpg->param_list); + tpg->param_list = NULL; + } + + core_tpg_deregister(&tpg->tpg_se_tpg); + + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = TPG_STATE_FREE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_ntpgs--; + list_del(&tpg->tpg_list); + spin_unlock(&tiqn->tiqn_tpg_lock); + + pr_debug("CORE[%s]_TPG[%hu] - Deleted iSCSI Target Portal Group\n", + tiqn->tiqn, tpg->tpgt); + + kfree(tpg); + return 0; +} + +int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *tpg) +{ + struct iscsi_param *param; + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_ACTIVE) { + pr_err("iSCSI target portal group: %hu is already" + " active, ignoring request.\n", tpg->tpgt); + spin_unlock(&tpg->tpg_state_lock); + return -EINVAL; + } + /* + * Make sure that AuthMethod does not contain None as an option + * unless explictly disabled. Set the default to CHAP if authentication + * is enforced (as per default), and remove the NONE option. + */ + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + + if (ISCSI_TPG_ATTRIB(tpg)->authentication) { + if (!strcmp(param->value, NONE)) + if (iscsi_update_param_value(param, CHAP) < 0) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + if (iscsit_ta_authentication(tpg, 1) < 0) { + spin_unlock(&tpg->tpg_state_lock); + return -ENOMEM; + } + } + + tpg->tpg_state = TPG_STATE_ACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_active_tpgs++; + pr_debug("iSCSI_TPG[%hu] - Enabled iSCSI Target Portal Group\n", + tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +} + +int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *tpg, int force) +{ + struct iscsi_tiqn *tiqn; + u8 old_state = tpg->tpg_state; + + spin_lock(&tpg->tpg_state_lock); + if (tpg->tpg_state == TPG_STATE_INACTIVE) { + pr_err("iSCSI Target Portal Group: %hu is already" + " inactive, ignoring request.\n", tpg->tpgt); + spin_unlock(&tpg->tpg_state_lock); + return -EINVAL; + } + tpg->tpg_state = TPG_STATE_INACTIVE; + spin_unlock(&tpg->tpg_state_lock); + + iscsit_clear_tpg_np_login_threads(tpg); + + if (iscsit_release_sessions_for_tpg(tpg, force) < 0) { + spin_lock(&tpg->tpg_state_lock); + tpg->tpg_state = old_state; + spin_unlock(&tpg->tpg_state_lock); + pr_err("Unable to disable iSCSI Target Portal Group:" + " %hu while active sessions exist, and force=0\n", + tpg->tpgt); + return -EPERM; + } + + tiqn = tpg->tpg_tiqn; + if (!tiqn || (tpg == iscsit_global->discovery_tpg)) + return 0; + + spin_lock(&tiqn->tiqn_tpg_lock); + tiqn->tiqn_active_tpgs--; + pr_debug("iSCSI_TPG[%hu] - Disabled iSCSI Target Portal Group\n", + tpg->tpgt); + spin_unlock(&tiqn->tiqn_tpg_lock); + + return 0; +} + +struct iscsi_node_attrib *iscsit_tpg_get_node_attrib( + struct iscsi_session *sess) +{ + struct se_session *se_sess = sess->se_sess; + struct se_node_acl *se_nacl = se_sess->se_node_acl; + struct iscsi_node_acl *acl = container_of(se_nacl, struct iscsi_node_acl, + se_node_acl); + + return &acl->node_attrib; +} + +struct iscsi_tpg_np *iscsit_tpg_locate_child_np( + struct iscsi_tpg_np *tpg_np, + int network_transport) +{ + struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp; + + spin_lock(&tpg_np->tpg_np_parent_lock); + list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp, + &tpg_np->tpg_np_parent_list, tpg_np_child_list) { + if (tpg_np_child->tpg_np->np_network_transport == + network_transport) { + spin_unlock(&tpg_np->tpg_np_parent_lock); + return tpg_np_child; + } + } + spin_unlock(&tpg_np->tpg_np_parent_lock); + + return NULL; +} + +struct iscsi_tpg_np *iscsit_tpg_add_network_portal( + struct iscsi_portal_group *tpg, + struct __kernel_sockaddr_storage *sockaddr, + char *ip_str, + struct iscsi_tpg_np *tpg_np_parent, + int network_transport) +{ + struct iscsi_np *np; + struct iscsi_tpg_np *tpg_np; + + tpg_np = kzalloc(sizeof(struct iscsi_tpg_np), GFP_KERNEL); + if (!tpg_np) { + pr_err("Unable to allocate memory for" + " struct iscsi_tpg_np.\n"); + return ERR_PTR(-ENOMEM); + } + + np = iscsit_add_np(sockaddr, ip_str, network_transport); + if (IS_ERR(np)) { + kfree(tpg_np); + return ERR_CAST(np); + } + + INIT_LIST_HEAD(&tpg_np->tpg_np_list); + INIT_LIST_HEAD(&tpg_np->tpg_np_child_list); + INIT_LIST_HEAD(&tpg_np->tpg_np_parent_list); + spin_lock_init(&tpg_np->tpg_np_parent_lock); + tpg_np->tpg_np = np; + tpg_np->tpg = tpg; + + spin_lock(&tpg->tpg_np_lock); + list_add_tail(&tpg_np->tpg_np_list, &tpg->tpg_gnp_list); + tpg->num_tpg_nps++; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_num_tpg_nps++; + spin_unlock(&tpg->tpg_np_lock); + + if (tpg_np_parent) { + tpg_np->tpg_np_parent = tpg_np_parent; + spin_lock(&tpg_np_parent->tpg_np_parent_lock); + list_add_tail(&tpg_np->tpg_np_child_list, + &tpg_np_parent->tpg_np_parent_list); + spin_unlock(&tpg_np_parent->tpg_np_parent_lock); + } + + pr_debug("CORE[%s] - Added Network Portal: %s:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); + + return tpg_np; +} + +static int iscsit_tpg_release_np( + struct iscsi_tpg_np *tpg_np, + struct iscsi_portal_group *tpg, + struct iscsi_np *np) +{ + iscsit_clear_tpg_np_login_thread(tpg_np, tpg); + + pr_debug("CORE[%s] - Removed Network Portal: %s:%hu,%hu on %s\n", + tpg->tpg_tiqn->tiqn, np->np_ip, np->np_port, tpg->tpgt, + (np->np_network_transport == ISCSI_TCP) ? "TCP" : "SCTP"); + + tpg_np->tpg_np = NULL; + tpg_np->tpg = NULL; + kfree(tpg_np); + /* + * iscsit_del_np() will shutdown struct iscsi_np when last TPG reference is released. + */ + return iscsit_del_np(np); +} + +int iscsit_tpg_del_network_portal( + struct iscsi_portal_group *tpg, + struct iscsi_tpg_np *tpg_np) +{ + struct iscsi_np *np; + struct iscsi_tpg_np *tpg_np_child, *tpg_np_child_tmp; + int ret = 0; + + np = tpg_np->tpg_np; + if (!np) { + pr_err("Unable to locate struct iscsi_np from" + " struct iscsi_tpg_np\n"); + return -EINVAL; + } + + if (!tpg_np->tpg_np_parent) { + /* + * We are the parent tpg network portal. Release all of the + * child tpg_np's (eg: the non ISCSI_TCP ones) on our parent + * list first. + */ + list_for_each_entry_safe(tpg_np_child, tpg_np_child_tmp, + &tpg_np->tpg_np_parent_list, + tpg_np_child_list) { + ret = iscsit_tpg_del_network_portal(tpg, tpg_np_child); + if (ret < 0) + pr_err("iscsit_tpg_del_network_portal()" + " failed: %d\n", ret); + } + } else { + /* + * We are not the parent ISCSI_TCP tpg network portal. Release + * our own network portals from the child list. + */ + spin_lock(&tpg_np->tpg_np_parent->tpg_np_parent_lock); + list_del(&tpg_np->tpg_np_child_list); + spin_unlock(&tpg_np->tpg_np_parent->tpg_np_parent_lock); + } + + spin_lock(&tpg->tpg_np_lock); + list_del(&tpg_np->tpg_np_list); + tpg->num_tpg_nps--; + if (tpg->tpg_tiqn) + tpg->tpg_tiqn->tiqn_num_tpg_nps--; + spin_unlock(&tpg->tpg_np_lock); + + return iscsit_tpg_release_np(tpg_np, tpg, np); +} + +int iscsit_tpg_set_initiator_node_queue_depth( + struct iscsi_portal_group *tpg, + unsigned char *initiatorname, + u32 queue_depth, + int force) +{ + return core_tpg_set_initiator_node_queue_depth(&tpg->tpg_se_tpg, + initiatorname, queue_depth, force); +} + +int iscsit_ta_authentication(struct iscsi_portal_group *tpg, u32 authentication) +{ + unsigned char buf1[256], buf2[256], *none = NULL; + int len; + struct iscsi_param *param; + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((authentication != 1) && (authentication != 0)) { + pr_err("Illegal value for authentication parameter:" + " %u, ignoring request.\n", authentication); + return -1; + } + + memset(buf1, 0, sizeof(buf1)); + memset(buf2, 0, sizeof(buf2)); + + param = iscsi_find_param_from_key(AUTHMETHOD, tpg->param_list); + if (!param) + return -EINVAL; + + if (authentication) { + snprintf(buf1, sizeof(buf1), "%s", param->value); + none = strstr(buf1, NONE); + if (!none) + goto out; + if (!strncmp(none + 4, ",", 1)) { + if (!strcmp(buf1, none)) + sprintf(buf2, "%s", none+5); + else { + none--; + *none = '\0'; + len = sprintf(buf2, "%s", buf1); + none += 5; + sprintf(buf2 + len, "%s", none); + } + } else { + none--; + *none = '\0'; + sprintf(buf2, "%s", buf1); + } + if (iscsi_update_param_value(param, buf2) < 0) + return -EINVAL; + } else { + snprintf(buf1, sizeof(buf1), "%s", param->value); + none = strstr(buf1, NONE); + if ((none)) + goto out; + strncat(buf1, ",", strlen(",")); + strncat(buf1, NONE, strlen(NONE)); + if (iscsi_update_param_value(param, buf1) < 0) + return -EINVAL; + } + +out: + a->authentication = authentication; + pr_debug("%s iSCSI Authentication Methods for TPG: %hu.\n", + a->authentication ? "Enforcing" : "Disabling", tpg->tpgt); + + return 0; +} + +int iscsit_ta_login_timeout( + struct iscsi_portal_group *tpg, + u32 login_timeout) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (login_timeout > TA_LOGIN_TIMEOUT_MAX) { + pr_err("Requested Login Timeout %u larger than maximum" + " %u\n", login_timeout, TA_LOGIN_TIMEOUT_MAX); + return -EINVAL; + } else if (login_timeout < TA_LOGIN_TIMEOUT_MIN) { + pr_err("Requested Logout Timeout %u smaller than" + " minimum %u\n", login_timeout, TA_LOGIN_TIMEOUT_MIN); + return -EINVAL; + } + + a->login_timeout = login_timeout; + pr_debug("Set Logout Timeout to %u for Target Portal Group" + " %hu\n", a->login_timeout, tpg->tpgt); + + return 0; +} + +int iscsit_ta_netif_timeout( + struct iscsi_portal_group *tpg, + u32 netif_timeout) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (netif_timeout > TA_NETIF_TIMEOUT_MAX) { + pr_err("Requested Network Interface Timeout %u larger" + " than maximum %u\n", netif_timeout, + TA_NETIF_TIMEOUT_MAX); + return -EINVAL; + } else if (netif_timeout < TA_NETIF_TIMEOUT_MIN) { + pr_err("Requested Network Interface Timeout %u smaller" + " than minimum %u\n", netif_timeout, + TA_NETIF_TIMEOUT_MIN); + return -EINVAL; + } + + a->netif_timeout = netif_timeout; + pr_debug("Set Network Interface Timeout to %u for" + " Target Portal Group %hu\n", a->netif_timeout, tpg->tpgt); + + return 0; +} + +int iscsit_ta_generate_node_acls( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->generate_node_acls = flag; + pr_debug("iSCSI_TPG[%hu] - Generate Initiator Portal Group ACLs: %s\n", + tpg->tpgt, (a->generate_node_acls) ? "Enabled" : "Disabled"); + + return 0; +} + +int iscsit_ta_default_cmdsn_depth( + struct iscsi_portal_group *tpg, + u32 tcq_depth) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if (tcq_depth > TA_DEFAULT_CMDSN_DEPTH_MAX) { + pr_err("Requested Default Queue Depth: %u larger" + " than maximum %u\n", tcq_depth, + TA_DEFAULT_CMDSN_DEPTH_MAX); + return -EINVAL; + } else if (tcq_depth < TA_DEFAULT_CMDSN_DEPTH_MIN) { + pr_err("Requested Default Queue Depth: %u smaller" + " than minimum %u\n", tcq_depth, + TA_DEFAULT_CMDSN_DEPTH_MIN); + return -EINVAL; + } + + a->default_cmdsn_depth = tcq_depth; + pr_debug("iSCSI_TPG[%hu] - Set Default CmdSN TCQ Depth to %u\n", + tpg->tpgt, a->default_cmdsn_depth); + + return 0; +} + +int iscsit_ta_cache_dynamic_acls( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->cache_dynamic_acls = flag; + pr_debug("iSCSI_TPG[%hu] - Cache Dynamic Initiator Portal Group" + " ACLs %s\n", tpg->tpgt, (a->cache_dynamic_acls) ? + "Enabled" : "Disabled"); + + return 0; +} + +int iscsit_ta_demo_mode_write_protect( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->demo_mode_write_protect = flag; + pr_debug("iSCSI_TPG[%hu] - Demo Mode Write Protect bit: %s\n", + tpg->tpgt, (a->demo_mode_write_protect) ? "ON" : "OFF"); + + return 0; +} + +int iscsit_ta_prod_mode_write_protect( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = &tpg->tpg_attrib; + + if ((flag != 0) && (flag != 1)) { + pr_err("Illegal value %d\n", flag); + return -EINVAL; + } + + a->prod_mode_write_protect = flag; + pr_debug("iSCSI_TPG[%hu] - Production Mode Write Protect bit:" + " %s\n", tpg->tpgt, (a->prod_mode_write_protect) ? + "ON" : "OFF"); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h new file mode 100644 index 000000000000..dda48c141a8c --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -0,0 +1,41 @@ +#ifndef ISCSI_TARGET_TPG_H +#define ISCSI_TARGET_TPG_H + +extern struct iscsi_portal_group *iscsit_alloc_portal_group(struct iscsi_tiqn *, u16); +extern int iscsit_load_discovery_tpg(void); +extern void iscsit_release_discovery_tpg(void); +extern struct iscsi_portal_group *iscsit_get_tpg_from_np(struct iscsi_tiqn *, + struct iscsi_np *); +extern int iscsit_get_tpg(struct iscsi_portal_group *); +extern void iscsit_put_tpg(struct iscsi_portal_group *); +extern void iscsit_clear_tpg_np_login_threads(struct iscsi_portal_group *); +extern void iscsit_tpg_dump_params(struct iscsi_portal_group *); +extern int iscsit_tpg_add_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *); +extern int iscsit_tpg_del_portal_group(struct iscsi_tiqn *, struct iscsi_portal_group *, + int); +extern int iscsit_tpg_enable_portal_group(struct iscsi_portal_group *); +extern int iscsit_tpg_disable_portal_group(struct iscsi_portal_group *, int); +extern struct iscsi_node_acl *iscsit_tpg_add_initiator_node_acl( + struct iscsi_portal_group *, const char *, u32); +extern void iscsit_tpg_del_initiator_node_acl(struct iscsi_portal_group *, + struct se_node_acl *); +extern struct iscsi_node_attrib *iscsit_tpg_get_node_attrib(struct iscsi_session *); +extern void iscsit_tpg_del_external_nps(struct iscsi_tpg_np *); +extern struct iscsi_tpg_np *iscsit_tpg_locate_child_np(struct iscsi_tpg_np *, int); +extern struct iscsi_tpg_np *iscsit_tpg_add_network_portal(struct iscsi_portal_group *, + struct __kernel_sockaddr_storage *, char *, struct iscsi_tpg_np *, + int); +extern int iscsit_tpg_del_network_portal(struct iscsi_portal_group *, + struct iscsi_tpg_np *); +extern int iscsit_tpg_set_initiator_node_queue_depth(struct iscsi_portal_group *, + unsigned char *, u32, int); +extern int iscsit_ta_authentication(struct iscsi_portal_group *, u32); +extern int iscsit_ta_login_timeout(struct iscsi_portal_group *, u32); +extern int iscsit_ta_netif_timeout(struct iscsi_portal_group *, u32); +extern int iscsit_ta_generate_node_acls(struct iscsi_portal_group *, u32); +extern int iscsit_ta_default_cmdsn_depth(struct iscsi_portal_group *, u32); +extern int iscsit_ta_cache_dynamic_acls(struct iscsi_portal_group *, u32); +extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); +extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); + +#endif /* ISCSI_TARGET_TPG_H */ diff --git a/drivers/target/iscsi/iscsi_target_tq.c b/drivers/target/iscsi/iscsi_target_tq.c new file mode 100644 index 000000000000..0baac5bcebd4 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tq.c @@ -0,0 +1,551 @@ +/******************************************************************************* + * This file contains the iSCSI Login Thread and Thread Queue functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/kthread.h> +#include <linux/list.h> +#include <linux/bitmap.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_tq.h" +#include "iscsi_target.h" + +static LIST_HEAD(active_ts_list); +static LIST_HEAD(inactive_ts_list); +static DEFINE_SPINLOCK(active_ts_lock); +static DEFINE_SPINLOCK(inactive_ts_lock); +static DEFINE_SPINLOCK(ts_bitmap_lock); + +static void iscsi_add_ts_to_active_list(struct iscsi_thread_set *ts) +{ + spin_lock(&active_ts_lock); + list_add_tail(&ts->ts_list, &active_ts_list); + iscsit_global->active_ts++; + spin_unlock(&active_ts_lock); +} + +extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *ts) +{ + spin_lock(&inactive_ts_lock); + list_add_tail(&ts->ts_list, &inactive_ts_list); + iscsit_global->inactive_ts++; + spin_unlock(&inactive_ts_lock); +} + +static void iscsi_del_ts_from_active_list(struct iscsi_thread_set *ts) +{ + spin_lock(&active_ts_lock); + list_del(&ts->ts_list); + iscsit_global->active_ts--; + spin_unlock(&active_ts_lock); +} + +static struct iscsi_thread_set *iscsi_get_ts_from_inactive_list(void) +{ + struct iscsi_thread_set *ts; + + spin_lock(&inactive_ts_lock); + if (list_empty(&inactive_ts_list)) { + spin_unlock(&inactive_ts_lock); + return NULL; + } + + list_for_each_entry(ts, &inactive_ts_list, ts_list) + break; + + list_del(&ts->ts_list); + iscsit_global->inactive_ts--; + spin_unlock(&inactive_ts_lock); + + return ts; +} + +extern int iscsi_allocate_thread_sets(u32 thread_pair_count) +{ + int allocated_thread_pair_count = 0, i, thread_id; + struct iscsi_thread_set *ts = NULL; + + for (i = 0; i < thread_pair_count; i++) { + ts = kzalloc(sizeof(struct iscsi_thread_set), GFP_KERNEL); + if (!ts) { + pr_err("Unable to allocate memory for" + " thread set.\n"); + return allocated_thread_pair_count; + } + /* + * Locate the next available regision in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + thread_id = bitmap_find_free_region(iscsit_global->ts_bitmap, + iscsit_global->ts_bitmap_count, get_order(1)); + spin_unlock(&ts_bitmap_lock); + if (thread_id < 0) { + pr_err("bitmap_find_free_region() failed for" + " thread_set_bitmap\n"); + kfree(ts); + return allocated_thread_pair_count; + } + + ts->thread_id = thread_id; + ts->status = ISCSI_THREAD_SET_FREE; + INIT_LIST_HEAD(&ts->ts_list); + spin_lock_init(&ts->ts_state_lock); + init_completion(&ts->rx_post_start_comp); + init_completion(&ts->tx_post_start_comp); + init_completion(&ts->rx_restart_comp); + init_completion(&ts->tx_restart_comp); + init_completion(&ts->rx_start_comp); + init_completion(&ts->tx_start_comp); + + ts->create_threads = 1; + ts->tx_thread = kthread_run(iscsi_target_tx_thread, ts, "%s", + ISCSI_TX_THREAD_NAME); + if (IS_ERR(ts->tx_thread)) { + dump_stack(); + pr_err("Unable to start iscsi_target_tx_thread\n"); + break; + } + + ts->rx_thread = kthread_run(iscsi_target_rx_thread, ts, "%s", + ISCSI_RX_THREAD_NAME); + if (IS_ERR(ts->rx_thread)) { + kthread_stop(ts->tx_thread); + pr_err("Unable to start iscsi_target_rx_thread\n"); + break; + } + ts->create_threads = 0; + + iscsi_add_ts_to_inactive_list(ts); + allocated_thread_pair_count++; + } + + pr_debug("Spawned %d thread set(s) (%d total threads).\n", + allocated_thread_pair_count, allocated_thread_pair_count * 2); + return allocated_thread_pair_count; +} + +extern void iscsi_deallocate_thread_sets(void) +{ + u32 released_count = 0; + struct iscsi_thread_set *ts = NULL; + + while ((ts = iscsi_get_ts_from_inactive_list())) { + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + spin_unlock_bh(&ts->ts_state_lock); + + if (ts->rx_thread) { + send_sig(SIGINT, ts->rx_thread, 1); + kthread_stop(ts->rx_thread); + } + if (ts->tx_thread) { + send_sig(SIGINT, ts->tx_thread, 1); + kthread_stop(ts->tx_thread); + } + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); + + released_count++; + kfree(ts); + } + + if (released_count) + pr_debug("Stopped %d thread set(s) (%d total threads)." + "\n", released_count, released_count * 2); +} + +static void iscsi_deallocate_extra_thread_sets(void) +{ + u32 orig_count, released_count = 0; + struct iscsi_thread_set *ts = NULL; + + orig_count = TARGET_THREAD_SET_COUNT; + + while ((iscsit_global->inactive_ts + 1) > orig_count) { + ts = iscsi_get_ts_from_inactive_list(); + if (!ts) + break; + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_DIE; + spin_unlock_bh(&ts->ts_state_lock); + + if (ts->rx_thread) { + send_sig(SIGINT, ts->rx_thread, 1); + kthread_stop(ts->rx_thread); + } + if (ts->tx_thread) { + send_sig(SIGINT, ts->tx_thread, 1); + kthread_stop(ts->tx_thread); + } + /* + * Release this thread_id in the thread_set_bitmap + */ + spin_lock(&ts_bitmap_lock); + bitmap_release_region(iscsit_global->ts_bitmap, + ts->thread_id, get_order(1)); + spin_unlock(&ts_bitmap_lock); + + released_count++; + kfree(ts); + } + + if (released_count) { + pr_debug("Stopped %d thread set(s) (%d total threads)." + "\n", released_count, released_count * 2); + } +} + +void iscsi_activate_thread_set(struct iscsi_conn *conn, struct iscsi_thread_set *ts) +{ + iscsi_add_ts_to_active_list(ts); + + spin_lock_bh(&ts->ts_state_lock); + conn->thread_set = ts; + ts->conn = conn; + spin_unlock_bh(&ts->ts_state_lock); + /* + * Start up the RX thread and wait on rx_post_start_comp. The RX + * Thread will then do the same for the TX Thread in + * iscsi_rx_thread_pre_handler(). + */ + complete(&ts->rx_start_comp); + wait_for_completion(&ts->rx_post_start_comp); +} + +struct iscsi_thread_set *iscsi_get_thread_set(void) +{ + int allocate_ts = 0; + struct completion comp; + struct iscsi_thread_set *ts = NULL; + /* + * If no inactive thread set is available on the first call to + * iscsi_get_ts_from_inactive_list(), sleep for a second and + * try again. If still none are available after two attempts, + * allocate a set ourselves. + */ +get_set: + ts = iscsi_get_ts_from_inactive_list(); + if (!ts) { + if (allocate_ts == 2) + iscsi_allocate_thread_sets(1); + + init_completion(&comp); + wait_for_completion_timeout(&comp, 1 * HZ); + + allocate_ts++; + goto get_set; + } + + ts->delay_inactive = 1; + ts->signal_sent = 0; + ts->thread_count = 2; + init_completion(&ts->rx_restart_comp); + init_completion(&ts->tx_restart_comp); + + return ts; +} + +void iscsi_set_thread_clear(struct iscsi_conn *conn, u8 thread_clear) +{ + struct iscsi_thread_set *ts = NULL; + + if (!conn->thread_set) { + pr_err("struct iscsi_conn->thread_set is NULL\n"); + return; + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->thread_clear &= ~thread_clear; + + if ((thread_clear & ISCSI_CLEAR_RX_THREAD) && + (ts->blocked_threads & ISCSI_BLOCK_RX_THREAD)) + complete(&ts->rx_restart_comp); + else if ((thread_clear & ISCSI_CLEAR_TX_THREAD) && + (ts->blocked_threads & ISCSI_BLOCK_TX_THREAD)) + complete(&ts->tx_restart_comp); + spin_unlock_bh(&ts->ts_state_lock); +} + +void iscsi_set_thread_set_signal(struct iscsi_conn *conn, u8 signal_sent) +{ + struct iscsi_thread_set *ts = NULL; + + if (!conn->thread_set) { + pr_err("struct iscsi_conn->thread_set is NULL\n"); + return; + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->signal_sent |= signal_sent; + spin_unlock_bh(&ts->ts_state_lock); +} + +int iscsi_release_thread_set(struct iscsi_conn *conn) +{ + int thread_called = 0; + struct iscsi_thread_set *ts = NULL; + + if (!conn || !conn->thread_set) { + pr_err("connection or thread set pointer is NULL\n"); + BUG(); + } + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_RESET; + + if (!strncmp(current->comm, ISCSI_RX_THREAD_NAME, + strlen(ISCSI_RX_THREAD_NAME))) + thread_called = ISCSI_RX_THREAD; + else if (!strncmp(current->comm, ISCSI_TX_THREAD_NAME, + strlen(ISCSI_TX_THREAD_NAME))) + thread_called = ISCSI_TX_THREAD; + + if (ts->rx_thread && (thread_called == ISCSI_TX_THREAD) && + (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) { + + if (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD)) { + send_sig(SIGINT, ts->rx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD; + } + ts->blocked_threads |= ISCSI_BLOCK_RX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); + wait_for_completion(&ts->rx_restart_comp); + spin_lock_bh(&ts->ts_state_lock); + ts->blocked_threads &= ~ISCSI_BLOCK_RX_THREAD; + } + if (ts->tx_thread && (thread_called == ISCSI_RX_THREAD) && + (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) { + + if (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD)) { + send_sig(SIGINT, ts->tx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD; + } + ts->blocked_threads |= ISCSI_BLOCK_TX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); + wait_for_completion(&ts->tx_restart_comp); + spin_lock_bh(&ts->ts_state_lock); + ts->blocked_threads &= ~ISCSI_BLOCK_TX_THREAD; + } + + ts->conn = NULL; + ts->status = ISCSI_THREAD_SET_FREE; + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +int iscsi_thread_set_force_reinstatement(struct iscsi_conn *conn) +{ + struct iscsi_thread_set *ts; + + if (!conn->thread_set) + return -1; + ts = conn->thread_set; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->status != ISCSI_THREAD_SET_ACTIVE) { + spin_unlock_bh(&ts->ts_state_lock); + return -1; + } + + if (ts->tx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_TX_THREAD))) { + send_sig(SIGINT, ts->tx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_TX_THREAD; + } + if (ts->rx_thread && (!(ts->signal_sent & ISCSI_SIGNAL_RX_THREAD))) { + send_sig(SIGINT, ts->rx_thread, 1); + ts->signal_sent |= ISCSI_SIGNAL_RX_THREAD; + } + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +static void iscsi_check_to_add_additional_sets(void) +{ + int thread_sets_add; + + spin_lock(&inactive_ts_lock); + thread_sets_add = iscsit_global->inactive_ts; + spin_unlock(&inactive_ts_lock); + if (thread_sets_add == 1) + iscsi_allocate_thread_sets(1); +} + +static int iscsi_signal_thread_pre_handler(struct iscsi_thread_set *ts) +{ + spin_lock_bh(&ts->ts_state_lock); + if ((ts->status == ISCSI_THREAD_SET_DIE) || signal_pending(current)) { + spin_unlock_bh(&ts->ts_state_lock); + return -1; + } + spin_unlock_bh(&ts->ts_state_lock); + + return 0; +} + +struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *ts) +{ + int ret; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->create_threads) { + spin_unlock_bh(&ts->ts_state_lock); + goto sleep; + } + + flush_signals(current); + + if (ts->delay_inactive && (--ts->thread_count == 0)) { + spin_unlock_bh(&ts->ts_state_lock); + iscsi_del_ts_from_active_list(ts); + + if (!iscsit_global->in_shutdown) + iscsi_deallocate_extra_thread_sets(); + + iscsi_add_ts_to_inactive_list(ts); + spin_lock_bh(&ts->ts_state_lock); + } + + if ((ts->status == ISCSI_THREAD_SET_RESET) && + (ts->thread_clear & ISCSI_CLEAR_RX_THREAD)) + complete(&ts->rx_restart_comp); + + ts->thread_clear &= ~ISCSI_CLEAR_RX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); +sleep: + ret = wait_for_completion_interruptible(&ts->rx_start_comp); + if (ret != 0) + return NULL; + + if (iscsi_signal_thread_pre_handler(ts) < 0) + return NULL; + + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for" + " thread_id: %d, going back to sleep\n", ts->thread_id); + goto sleep; + } + iscsi_check_to_add_additional_sets(); + /* + * The RX Thread starts up the TX Thread and sleeps. + */ + ts->thread_clear |= ISCSI_CLEAR_RX_THREAD; + complete(&ts->tx_start_comp); + wait_for_completion(&ts->tx_post_start_comp); + + return ts->conn; +} + +struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *ts) +{ + int ret; + + spin_lock_bh(&ts->ts_state_lock); + if (ts->create_threads) { + spin_unlock_bh(&ts->ts_state_lock); + goto sleep; + } + + flush_signals(current); + + if (ts->delay_inactive && (--ts->thread_count == 0)) { + spin_unlock_bh(&ts->ts_state_lock); + iscsi_del_ts_from_active_list(ts); + + if (!iscsit_global->in_shutdown) + iscsi_deallocate_extra_thread_sets(); + + iscsi_add_ts_to_inactive_list(ts); + spin_lock_bh(&ts->ts_state_lock); + } + if ((ts->status == ISCSI_THREAD_SET_RESET) && + (ts->thread_clear & ISCSI_CLEAR_TX_THREAD)) + complete(&ts->tx_restart_comp); + + ts->thread_clear &= ~ISCSI_CLEAR_TX_THREAD; + spin_unlock_bh(&ts->ts_state_lock); +sleep: + ret = wait_for_completion_interruptible(&ts->tx_start_comp); + if (ret != 0) + return NULL; + + if (iscsi_signal_thread_pre_handler(ts) < 0) + return NULL; + + if (!ts->conn) { + pr_err("struct iscsi_thread_set->conn is NULL for " + " thread_id: %d, going back to sleep\n", + ts->thread_id); + goto sleep; + } + + iscsi_check_to_add_additional_sets(); + /* + * From the TX thread, up the tx_post_start_comp that the RX Thread is + * sleeping on in iscsi_rx_thread_pre_handler(), then up the + * rx_post_start_comp that iscsi_activate_thread_set() is sleeping on. + */ + ts->thread_clear |= ISCSI_CLEAR_TX_THREAD; + complete(&ts->tx_post_start_comp); + complete(&ts->rx_post_start_comp); + + spin_lock_bh(&ts->ts_state_lock); + ts->status = ISCSI_THREAD_SET_ACTIVE; + spin_unlock_bh(&ts->ts_state_lock); + + return ts->conn; +} + +int iscsi_thread_set_init(void) +{ + int size; + + iscsit_global->ts_bitmap_count = ISCSI_TS_BITMAP_BITS; + + size = BITS_TO_LONGS(iscsit_global->ts_bitmap_count) * sizeof(long); + iscsit_global->ts_bitmap = kzalloc(size, GFP_KERNEL); + if (!iscsit_global->ts_bitmap) { + pr_err("Unable to allocate iscsit_global->ts_bitmap\n"); + return -ENOMEM; + } + + spin_lock_init(&active_ts_lock); + spin_lock_init(&inactive_ts_lock); + spin_lock_init(&ts_bitmap_lock); + INIT_LIST_HEAD(&active_ts_list); + INIT_LIST_HEAD(&inactive_ts_list); + + return 0; +} + +void iscsi_thread_set_free(void) +{ + kfree(iscsit_global->ts_bitmap); +} diff --git a/drivers/target/iscsi/iscsi_target_tq.h b/drivers/target/iscsi/iscsi_target_tq.h new file mode 100644 index 000000000000..26e6a95ec203 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_tq.h @@ -0,0 +1,88 @@ +#ifndef ISCSI_THREAD_QUEUE_H +#define ISCSI_THREAD_QUEUE_H + +/* + * Defines for thread sets. + */ +extern int iscsi_thread_set_force_reinstatement(struct iscsi_conn *); +extern void iscsi_add_ts_to_inactive_list(struct iscsi_thread_set *); +extern int iscsi_allocate_thread_sets(u32); +extern void iscsi_deallocate_thread_sets(void); +extern void iscsi_activate_thread_set(struct iscsi_conn *, struct iscsi_thread_set *); +extern struct iscsi_thread_set *iscsi_get_thread_set(void); +extern void iscsi_set_thread_clear(struct iscsi_conn *, u8); +extern void iscsi_set_thread_set_signal(struct iscsi_conn *, u8); +extern int iscsi_release_thread_set(struct iscsi_conn *); +extern struct iscsi_conn *iscsi_rx_thread_pre_handler(struct iscsi_thread_set *); +extern struct iscsi_conn *iscsi_tx_thread_pre_handler(struct iscsi_thread_set *); +extern int iscsi_thread_set_init(void); +extern void iscsi_thread_set_free(void); + +extern int iscsi_target_tx_thread(void *); +extern int iscsi_target_rx_thread(void *); + +#define TARGET_THREAD_SET_COUNT 4 + +#define ISCSI_RX_THREAD 1 +#define ISCSI_TX_THREAD 2 +#define ISCSI_RX_THREAD_NAME "iscsi_trx" +#define ISCSI_TX_THREAD_NAME "iscsi_ttx" +#define ISCSI_BLOCK_RX_THREAD 0x1 +#define ISCSI_BLOCK_TX_THREAD 0x2 +#define ISCSI_CLEAR_RX_THREAD 0x1 +#define ISCSI_CLEAR_TX_THREAD 0x2 +#define ISCSI_SIGNAL_RX_THREAD 0x1 +#define ISCSI_SIGNAL_TX_THREAD 0x2 + +/* struct iscsi_thread_set->status */ +#define ISCSI_THREAD_SET_FREE 1 +#define ISCSI_THREAD_SET_ACTIVE 2 +#define ISCSI_THREAD_SET_DIE 3 +#define ISCSI_THREAD_SET_RESET 4 +#define ISCSI_THREAD_SET_DEALLOCATE_THREADS 5 + +/* By default allow a maximum of 32K iSCSI connections */ +#define ISCSI_TS_BITMAP_BITS 32768 + +struct iscsi_thread_set { + /* flags used for blocking and restarting sets */ + int blocked_threads; + /* flag for creating threads */ + int create_threads; + /* flag for delaying readding to inactive list */ + int delay_inactive; + /* status for thread set */ + int status; + /* which threads have had signals sent */ + int signal_sent; + /* flag for which threads exited first */ + int thread_clear; + /* Active threads in the thread set */ + int thread_count; + /* Unique thread ID */ + u32 thread_id; + /* pointer to connection if set is active */ + struct iscsi_conn *conn; + /* used for controlling ts state accesses */ + spinlock_t ts_state_lock; + /* Used for rx side post startup */ + struct completion rx_post_start_comp; + /* Used for tx side post startup */ + struct completion tx_post_start_comp; + /* used for restarting thread queue */ + struct completion rx_restart_comp; + /* used for restarting thread queue */ + struct completion tx_restart_comp; + /* used for normal unused blocking */ + struct completion rx_start_comp; + /* used for normal unused blocking */ + struct completion tx_start_comp; + /* OS descriptor for rx thread */ + struct task_struct *rx_thread; + /* OS descriptor for tx thread */ + struct task_struct *tx_thread; + /* struct iscsi_thread_set in list list head*/ + struct list_head ts_list; +}; + +#endif /*** ISCSI_THREAD_QUEUE_H ***/ diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c new file mode 100644 index 000000000000..a1acb0167902 --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_util.c @@ -0,0 +1,1819 @@ +/******************************************************************************* + * This file contains the iSCSI Target specific utility functions. + * + * \u00a9 Copyright 2007-2011 RisingTide Systems LLC. + * + * Licensed to the Linux Foundation under the General Public License (GPL) version 2. + * + * Author: Nicholas A. Bellinger <nab@linux-iscsi.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + ******************************************************************************/ + +#include <linux/list.h> +#include <scsi/scsi_tcq.h> +#include <scsi/iscsi_proto.h> +#include <target/target_core_base.h> +#include <target/target_core_transport.h> +#include <target/target_core_tmr.h> +#include <target/target_core_fabric_ops.h> +#include <target/target_core_configfs.h> + +#include "iscsi_target_core.h" +#include "iscsi_target_parameters.h" +#include "iscsi_target_seq_pdu_list.h" +#include "iscsi_target_datain_values.h" +#include "iscsi_target_erl0.h" +#include "iscsi_target_erl1.h" +#include "iscsi_target_erl2.h" +#include "iscsi_target_tpg.h" +#include "iscsi_target_tq.h" +#include "iscsi_target_util.h" +#include "iscsi_target.h" + +#define PRINT_BUFF(buff, len) \ +{ \ + int zzz; \ + \ + pr_debug("%d:\n", __LINE__); \ + for (zzz = 0; zzz < len; zzz++) { \ + if (zzz % 16 == 0) { \ + if (zzz) \ + pr_debug("\n"); \ + pr_debug("%4i: ", zzz); \ + } \ + pr_debug("%02x ", (unsigned char) (buff)[zzz]); \ + } \ + if ((len + 1) % 16) \ + pr_debug("\n"); \ +} + +extern struct list_head g_tiqn_list; +extern spinlock_t tiqn_lock; + +/* + * Called with cmd->r2t_lock held. + */ +int iscsit_add_r2t_to_list( + struct iscsi_cmd *cmd, + u32 offset, + u32 xfer_len, + int recovery, + u32 r2t_sn) +{ + struct iscsi_r2t *r2t; + + r2t = kmem_cache_zalloc(lio_r2t_cache, GFP_ATOMIC); + if (!r2t) { + pr_err("Unable to allocate memory for struct iscsi_r2t.\n"); + return -1; + } + INIT_LIST_HEAD(&r2t->r2t_list); + + r2t->recovery_r2t = recovery; + r2t->r2t_sn = (!r2t_sn) ? cmd->r2t_sn++ : r2t_sn; + r2t->offset = offset; + r2t->xfer_len = xfer_len; + list_add_tail(&r2t->r2t_list, &cmd->cmd_r2t_list); + spin_unlock_bh(&cmd->r2t_lock); + + iscsit_add_cmd_to_immediate_queue(cmd, cmd->conn, ISTATE_SEND_R2T); + + spin_lock_bh(&cmd->r2t_lock); + return 0; +} + +struct iscsi_r2t *iscsit_get_r2t_for_eos( + struct iscsi_cmd *cmd, + u32 offset, + u32 length) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if ((r2t->offset <= offset) && + (r2t->offset + r2t->xfer_len) >= (offset + length)) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate R2T for Offset: %u, Length:" + " %u\n", offset, length); + return NULL; +} + +struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *cmd) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (!r2t->sent_r2t) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + pr_err("Unable to locate next R2T to send for ITT:" + " 0x%08x.\n", cmd->init_task_tag); + return NULL; +} + +/* + * Called with cmd->r2t_lock held. + */ +void iscsit_free_r2t(struct iscsi_r2t *r2t, struct iscsi_cmd *cmd) +{ + list_del(&r2t->r2t_list); + kmem_cache_free(lio_r2t_cache, r2t); +} + +void iscsit_free_r2ts_from_list(struct iscsi_cmd *cmd) +{ + struct iscsi_r2t *r2t, *r2t_tmp; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry_safe(r2t, r2t_tmp, &cmd->cmd_r2t_list, r2t_list) + iscsit_free_r2t(r2t, cmd); + spin_unlock_bh(&cmd->r2t_lock); +} + +/* + * May be called from software interrupt (timer) context for allocating + * iSCSI NopINs. + */ +struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *conn, gfp_t gfp_mask) +{ + struct iscsi_cmd *cmd; + + cmd = kmem_cache_zalloc(lio_cmd_cache, gfp_mask); + if (!cmd) { + pr_err("Unable to allocate memory for struct iscsi_cmd.\n"); + return NULL; + } + + cmd->conn = conn; + INIT_LIST_HEAD(&cmd->i_list); + INIT_LIST_HEAD(&cmd->datain_list); + INIT_LIST_HEAD(&cmd->cmd_r2t_list); + init_completion(&cmd->reject_comp); + spin_lock_init(&cmd->datain_lock); + spin_lock_init(&cmd->dataout_timeout_lock); + spin_lock_init(&cmd->istate_lock); + spin_lock_init(&cmd->error_lock); + spin_lock_init(&cmd->r2t_lock); + + return cmd; +} + +/* + * Called from iscsi_handle_scsi_cmd() + */ +struct iscsi_cmd *iscsit_allocate_se_cmd( + struct iscsi_conn *conn, + u32 data_length, + int data_direction, + int iscsi_task_attr) +{ + struct iscsi_cmd *cmd; + struct se_cmd *se_cmd; + int sam_task_attr; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->data_direction = data_direction; + cmd->data_length = data_length; + /* + * Figure out the SAM Task Attribute for the incoming SCSI CDB + */ + if ((iscsi_task_attr == ISCSI_ATTR_UNTAGGED) || + (iscsi_task_attr == ISCSI_ATTR_SIMPLE)) + sam_task_attr = MSG_SIMPLE_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_ORDERED) + sam_task_attr = MSG_ORDERED_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_HEAD_OF_QUEUE) + sam_task_attr = MSG_HEAD_TAG; + else if (iscsi_task_attr == ISCSI_ATTR_ACA) + sam_task_attr = MSG_ACA_TAG; + else { + pr_debug("Unknown iSCSI Task Attribute: 0x%02x, using" + " MSG_SIMPLE_TAG\n", iscsi_task_attr); + sam_task_attr = MSG_SIMPLE_TAG; + } + + se_cmd = &cmd->se_cmd; + /* + * Initialize struct se_cmd descriptor from target_core_mod infrastructure + */ + transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops, + conn->sess->se_sess, data_length, data_direction, + sam_task_attr, &cmd->sense_buffer[0]); + return cmd; +} + +struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr( + struct iscsi_conn *conn, + u8 function) +{ + struct iscsi_cmd *cmd; + struct se_cmd *se_cmd; + u8 tcm_function; + + cmd = iscsit_allocate_cmd(conn, GFP_KERNEL); + if (!cmd) + return NULL; + + cmd->data_direction = DMA_NONE; + + cmd->tmr_req = kzalloc(sizeof(struct iscsi_tmr_req), GFP_KERNEL); + if (!cmd->tmr_req) { + pr_err("Unable to allocate memory for" + " Task Management command!\n"); + return NULL; + } + /* + * TASK_REASSIGN for ERL=2 / connection stays inside of + * LIO-Target $FABRIC_MOD + */ + if (function == ISCSI_TM_FUNC_TASK_REASSIGN) + return cmd; + + se_cmd = &cmd->se_cmd; + /* + * Initialize struct se_cmd descriptor from target_core_mod infrastructure + */ + transport_init_se_cmd(se_cmd, &lio_target_fabric_configfs->tf_ops, + conn->sess->se_sess, 0, DMA_NONE, + MSG_SIMPLE_TAG, &cmd->sense_buffer[0]); + + switch (function) { + case ISCSI_TM_FUNC_ABORT_TASK: + tcm_function = TMR_ABORT_TASK; + break; + case ISCSI_TM_FUNC_ABORT_TASK_SET: + tcm_function = TMR_ABORT_TASK_SET; + break; + case ISCSI_TM_FUNC_CLEAR_ACA: + tcm_function = TMR_CLEAR_ACA; + break; + case ISCSI_TM_FUNC_CLEAR_TASK_SET: + tcm_function = TMR_CLEAR_TASK_SET; + break; + case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: + tcm_function = TMR_LUN_RESET; + break; + case ISCSI_TM_FUNC_TARGET_WARM_RESET: + tcm_function = TMR_TARGET_WARM_RESET; + break; + case ISCSI_TM_FUNC_TARGET_COLD_RESET: + tcm_function = TMR_TARGET_COLD_RESET; + break; + default: + pr_err("Unknown iSCSI TMR Function:" + " 0x%02x\n", function); + goto out; + } + + se_cmd->se_tmr_req = core_tmr_alloc_req(se_cmd, + (void *)cmd->tmr_req, tcm_function); + if (!se_cmd->se_tmr_req) + goto out; + + cmd->tmr_req->se_tmr_req = se_cmd->se_tmr_req; + + return cmd; +out: + iscsit_release_cmd(cmd); + if (se_cmd) + transport_free_se_cmd(se_cmd); + return NULL; +} + +int iscsit_decide_list_to_build( + struct iscsi_cmd *cmd, + u32 immediate_data_length) +{ + struct iscsi_build_list bl; + struct iscsi_conn *conn = cmd->conn; + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na; + + if (sess->sess_ops->DataSequenceInOrder && + sess->sess_ops->DataPDUInOrder) + return 0; + + if (cmd->data_direction == DMA_NONE) + return 0; + + na = iscsit_tpg_get_node_attrib(sess); + memset(&bl, 0, sizeof(struct iscsi_build_list)); + + if (cmd->data_direction == DMA_FROM_DEVICE) { + bl.data_direction = ISCSI_PDU_READ; + bl.type = PDULIST_NORMAL; + if (na->random_datain_pdu_offsets) + bl.randomize |= RANDOM_DATAIN_PDU_OFFSETS; + if (na->random_datain_seq_offsets) + bl.randomize |= RANDOM_DATAIN_SEQ_OFFSETS; + } else { + bl.data_direction = ISCSI_PDU_WRITE; + bl.immediate_data_length = immediate_data_length; + if (na->random_r2t_offsets) + bl.randomize |= RANDOM_R2T_OFFSETS; + + if (!cmd->immediate_data && !cmd->unsolicited_data) + bl.type = PDULIST_NORMAL; + else if (cmd->immediate_data && !cmd->unsolicited_data) + bl.type = PDULIST_IMMEDIATE; + else if (!cmd->immediate_data && cmd->unsolicited_data) + bl.type = PDULIST_UNSOLICITED; + else if (cmd->immediate_data && cmd->unsolicited_data) + bl.type = PDULIST_IMMEDIATE_AND_UNSOLICITED; + } + + return iscsit_do_build_list(cmd, &bl); +} + +struct iscsi_seq *iscsit_get_seq_holder_for_datain( + struct iscsi_cmd *cmd, + u32 seq_send_order) +{ + u32 i; + + for (i = 0; i < cmd->seq_count; i++) + if (cmd->seq_list[i].seq_send_order == seq_send_order) + return &cmd->seq_list[i]; + + return NULL; +} + +struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *cmd) +{ + u32 i; + + if (!cmd->seq_list) { + pr_err("struct iscsi_cmd->seq_list is NULL!\n"); + return NULL; + } + + for (i = 0; i < cmd->seq_count; i++) { + if (cmd->seq_list[i].type != SEQTYPE_NORMAL) + continue; + if (cmd->seq_list[i].seq_send_order == cmd->seq_send_order) { + cmd->seq_send_order++; + return &cmd->seq_list[i]; + } + } + + return NULL; +} + +struct iscsi_r2t *iscsit_get_holder_for_r2tsn( + struct iscsi_cmd *cmd, + u32 r2t_sn) +{ + struct iscsi_r2t *r2t; + + spin_lock_bh(&cmd->r2t_lock); + list_for_each_entry(r2t, &cmd->cmd_r2t_list, r2t_list) { + if (r2t->r2t_sn == r2t_sn) { + spin_unlock_bh(&cmd->r2t_lock); + return r2t; + } + } + spin_unlock_bh(&cmd->r2t_lock); + + return NULL; +} + +static inline int iscsit_check_received_cmdsn(struct iscsi_session *sess, u32 cmdsn) +{ + int ret; + + /* + * This is the proper method of checking received CmdSN against + * ExpCmdSN and MaxCmdSN values, as well as accounting for out + * or order CmdSNs due to multiple connection sessions and/or + * CRC failures. + */ + if (iscsi_sna_gt(cmdsn, sess->max_cmd_sn)) { + pr_err("Received CmdSN: 0x%08x is greater than" + " MaxCmdSN: 0x%08x, protocol error.\n", cmdsn, + sess->max_cmd_sn); + ret = CMDSN_ERROR_CANNOT_RECOVER; + + } else if (cmdsn == sess->exp_cmd_sn) { + sess->exp_cmd_sn++; + pr_debug("Received CmdSN matches ExpCmdSN," + " incremented ExpCmdSN to: 0x%08x\n", + sess->exp_cmd_sn); + ret = CMDSN_NORMAL_OPERATION; + + } else if (iscsi_sna_gt(cmdsn, sess->exp_cmd_sn)) { + pr_debug("Received CmdSN: 0x%08x is greater" + " than ExpCmdSN: 0x%08x, not acknowledging.\n", + cmdsn, sess->exp_cmd_sn); + ret = CMDSN_HIGHER_THAN_EXP; + + } else { + pr_err("Received CmdSN: 0x%08x is less than" + " ExpCmdSN: 0x%08x, ignoring.\n", cmdsn, + sess->exp_cmd_sn); + ret = CMDSN_LOWER_THAN_EXP; + } + + return ret; +} + +/* + * Commands may be received out of order if MC/S is in use. + * Ensure they are executed in CmdSN order. + */ +int iscsit_sequence_cmd( + struct iscsi_conn *conn, + struct iscsi_cmd *cmd, + u32 cmdsn) +{ + int ret; + int cmdsn_ret; + + mutex_lock(&conn->sess->cmdsn_mutex); + + cmdsn_ret = iscsit_check_received_cmdsn(conn->sess, cmdsn); + switch (cmdsn_ret) { + case CMDSN_NORMAL_OPERATION: + ret = iscsit_execute_cmd(cmd, 0); + if ((ret >= 0) && !list_empty(&conn->sess->sess_ooo_cmdsn_list)) + iscsit_execute_ooo_cmdsns(conn->sess); + break; + case CMDSN_HIGHER_THAN_EXP: + ret = iscsit_handle_ooo_cmdsn(conn->sess, cmd, cmdsn); + break; + case CMDSN_LOWER_THAN_EXP: + cmd->i_state = ISTATE_REMOVE; + iscsit_add_cmd_to_immediate_queue(cmd, conn, cmd->i_state); + ret = cmdsn_ret; + break; + default: + ret = cmdsn_ret; + break; + } + mutex_unlock(&conn->sess->cmdsn_mutex); + + return ret; +} + +int iscsit_check_unsolicited_dataout(struct iscsi_cmd *cmd, unsigned char *buf) +{ + struct iscsi_conn *conn = cmd->conn; + struct se_cmd *se_cmd = &cmd->se_cmd; + struct iscsi_data *hdr = (struct iscsi_data *) buf; + u32 payload_length = ntoh24(hdr->dlength); + + if (conn->sess->sess_ops->InitialR2T) { + pr_err("Received unexpected unsolicited data" + " while InitialR2T=Yes, protocol error.\n"); + transport_send_check_condition_and_sense(se_cmd, + TCM_UNEXPECTED_UNSOLICITED_DATA, 0); + return -1; + } + + if ((cmd->first_burst_len + payload_length) > + conn->sess->sess_ops->FirstBurstLength) { + pr_err("Total %u bytes exceeds FirstBurstLength: %u" + " for this Unsolicited DataOut Burst.\n", + (cmd->first_burst_len + payload_length), + conn->sess->sess_ops->FirstBurstLength); + transport_send_check_condition_and_sense(se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return -1; + } + + if (!(hdr->flags & ISCSI_FLAG_CMD_FINAL)) + return 0; + + if (((cmd->first_burst_len + payload_length) != cmd->data_length) && + ((cmd->first_burst_len + payload_length) != + conn->sess->sess_ops->FirstBurstLength)) { + pr_err("Unsolicited non-immediate data received %u" + " does not equal FirstBurstLength: %u, and does" + " not equal ExpXferLen %u.\n", + (cmd->first_burst_len + payload_length), + conn->sess->sess_ops->FirstBurstLength, cmd->data_length); + transport_send_check_condition_and_sense(se_cmd, + TCM_INCORRECT_AMOUNT_OF_DATA, 0); + return -1; + } + return 0; +} + +struct iscsi_cmd *iscsit_find_cmd_from_itt( + struct iscsi_conn *conn, + u32 init_task_tag) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate ITT: 0x%08x on CID: %hu", + init_task_tag, conn->cid); + return NULL; +} + +struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump( + struct iscsi_conn *conn, + u32 init_task_tag, + u32 length) +{ + struct iscsi_cmd *cmd; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate ITT: 0x%08x on CID: %hu," + " dumping payload\n", init_task_tag, conn->cid); + if (length) + iscsit_dump_data_payload(conn, length, 1); + + return NULL; +} + +struct iscsi_cmd *iscsit_find_cmd_from_ttt( + struct iscsi_conn *conn, + u32 targ_xfer_tag) +{ + struct iscsi_cmd *cmd = NULL; + + spin_lock_bh(&conn->cmd_lock); + list_for_each_entry(cmd, &conn->conn_cmd_list, i_list) { + if (cmd->targ_xfer_tag == targ_xfer_tag) { + spin_unlock_bh(&conn->cmd_lock); + return cmd; + } + } + spin_unlock_bh(&conn->cmd_lock); + + pr_err("Unable to locate TTT: 0x%08x on CID: %hu\n", + targ_xfer_tag, conn->cid); + return NULL; +} + +int iscsit_find_cmd_for_recovery( + struct iscsi_session *sess, + struct iscsi_cmd **cmd_ptr, + struct iscsi_conn_recovery **cr_ptr, + u32 init_task_tag) +{ + struct iscsi_cmd *cmd = NULL; + struct iscsi_conn_recovery *cr; + /* + * Scan through the inactive connection recovery list's command list. + * If init_task_tag matches the command is still alligent. + */ + spin_lock(&sess->cr_i_lock); + list_for_each_entry(cr, &sess->cr_inactive_list, cr_list) { + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_unlock(&sess->cr_i_lock); + + *cr_ptr = cr; + *cmd_ptr = cmd; + return -2; + } + } + spin_unlock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&sess->cr_i_lock); + /* + * Scan through the active connection recovery list's command list. + * If init_task_tag matches the command is ready to be reassigned. + */ + spin_lock(&sess->cr_a_lock); + list_for_each_entry(cr, &sess->cr_active_list, cr_list) { + spin_lock(&cr->conn_recovery_cmd_lock); + list_for_each_entry(cmd, &cr->conn_recovery_cmd_list, i_list) { + if (cmd->init_task_tag == init_task_tag) { + spin_unlock(&cr->conn_recovery_cmd_lock); + spin_unlock(&sess->cr_a_lock); + + *cr_ptr = cr; + *cmd_ptr = cmd; + return 0; + } + } + spin_unlock(&cr->conn_recovery_cmd_lock); + } + spin_unlock(&sess->cr_a_lock); + + return -1; +} + +void iscsit_add_cmd_to_immediate_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + u8 state) +{ + struct iscsi_queue_req *qr; + + qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC); + if (!qr) { + pr_err("Unable to allocate memory for" + " struct iscsi_queue_req\n"); + return; + } + INIT_LIST_HEAD(&qr->qr_list); + qr->cmd = cmd; + qr->state = state; + + spin_lock_bh(&conn->immed_queue_lock); + list_add_tail(&qr->qr_list, &conn->immed_queue_list); + atomic_inc(&cmd->immed_queue_count); + atomic_set(&conn->check_immediate_queue, 1); + spin_unlock_bh(&conn->immed_queue_lock); + + wake_up_process(conn->thread_set->tx_thread); +} + +struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr; + + spin_lock_bh(&conn->immed_queue_lock); + if (list_empty(&conn->immed_queue_list)) { + spin_unlock_bh(&conn->immed_queue_lock); + return NULL; + } + list_for_each_entry(qr, &conn->immed_queue_list, qr_list) + break; + + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->immed_queue_count); + spin_unlock_bh(&conn->immed_queue_lock); + + return qr; +} + +static void iscsit_remove_cmd_from_immediate_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->immed_queue_lock); + if (!atomic_read(&cmd->immed_queue_count)) { + spin_unlock_bh(&conn->immed_queue_lock); + return; + } + + list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) { + if (qr->cmd != cmd) + continue; + + atomic_dec(&qr->cmd->immed_queue_count); + list_del(&qr->qr_list); + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->immed_queue_lock); + + if (atomic_read(&cmd->immed_queue_count)) { + pr_err("ITT: 0x%08x immed_queue_count: %d\n", + cmd->init_task_tag, + atomic_read(&cmd->immed_queue_count)); + } +} + +void iscsit_add_cmd_to_response_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + u8 state) +{ + struct iscsi_queue_req *qr; + + qr = kmem_cache_zalloc(lio_qr_cache, GFP_ATOMIC); + if (!qr) { + pr_err("Unable to allocate memory for" + " struct iscsi_queue_req\n"); + return; + } + INIT_LIST_HEAD(&qr->qr_list); + qr->cmd = cmd; + qr->state = state; + + spin_lock_bh(&conn->response_queue_lock); + list_add_tail(&qr->qr_list, &conn->response_queue_list); + atomic_inc(&cmd->response_queue_count); + spin_unlock_bh(&conn->response_queue_lock); + + wake_up_process(conn->thread_set->tx_thread); +} + +struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr; + + spin_lock_bh(&conn->response_queue_lock); + if (list_empty(&conn->response_queue_list)) { + spin_unlock_bh(&conn->response_queue_lock); + return NULL; + } + + list_for_each_entry(qr, &conn->response_queue_list, qr_list) + break; + + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->response_queue_count); + spin_unlock_bh(&conn->response_queue_lock); + + return qr; +} + +static void iscsit_remove_cmd_from_response_queue( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->response_queue_lock); + if (!atomic_read(&cmd->response_queue_count)) { + spin_unlock_bh(&conn->response_queue_lock); + return; + } + + list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list, + qr_list) { + if (qr->cmd != cmd) + continue; + + atomic_dec(&qr->cmd->response_queue_count); + list_del(&qr->qr_list); + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->response_queue_lock); + + if (atomic_read(&cmd->response_queue_count)) { + pr_err("ITT: 0x%08x response_queue_count: %d\n", + cmd->init_task_tag, + atomic_read(&cmd->response_queue_count)); + } +} + +void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *conn) +{ + struct iscsi_queue_req *qr, *qr_tmp; + + spin_lock_bh(&conn->immed_queue_lock); + list_for_each_entry_safe(qr, qr_tmp, &conn->immed_queue_list, qr_list) { + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->immed_queue_count); + + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->immed_queue_lock); + + spin_lock_bh(&conn->response_queue_lock); + list_for_each_entry_safe(qr, qr_tmp, &conn->response_queue_list, + qr_list) { + list_del(&qr->qr_list); + if (qr->cmd) + atomic_dec(&qr->cmd->response_queue_count); + + kmem_cache_free(lio_qr_cache, qr); + } + spin_unlock_bh(&conn->response_queue_lock); +} + +void iscsit_release_cmd(struct iscsi_cmd *cmd) +{ + struct iscsi_conn *conn = cmd->conn; + int i; + + iscsit_free_r2ts_from_list(cmd); + iscsit_free_all_datain_reqs(cmd); + + kfree(cmd->buf_ptr); + kfree(cmd->pdu_list); + kfree(cmd->seq_list); + kfree(cmd->tmr_req); + kfree(cmd->iov_data); + + for (i = 0; i < cmd->t_mem_sg_nents; i++) + __free_page(sg_page(&cmd->t_mem_sg[i])); + + kfree(cmd->t_mem_sg); + + if (conn) { + iscsit_remove_cmd_from_immediate_queue(cmd, conn); + iscsit_remove_cmd_from_response_queue(cmd, conn); + } + + kmem_cache_free(lio_cmd_cache, cmd); +} + +int iscsit_check_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + if (sess->session_usage_count != 0) { + sess->session_waiting_on_uc = 1; + spin_unlock_bh(&sess->session_usage_lock); + if (in_interrupt()) + return 2; + + wait_for_completion(&sess->session_waiting_on_uc_comp); + return 1; + } + spin_unlock_bh(&sess->session_usage_lock); + + return 0; +} + +void iscsit_dec_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + sess->session_usage_count--; + + if (!sess->session_usage_count && sess->session_waiting_on_uc) + complete(&sess->session_waiting_on_uc_comp); + + spin_unlock_bh(&sess->session_usage_lock); +} + +void iscsit_inc_session_usage_count(struct iscsi_session *sess) +{ + spin_lock_bh(&sess->session_usage_lock); + sess->session_usage_count++; + spin_unlock_bh(&sess->session_usage_lock); +} + +/* + * Used before iscsi_do[rx,tx]_data() to determine iov and [rx,tx]_marker + * array counts needed for sync and steering. + */ +static int iscsit_determine_sync_and_steering_counts( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + u32 length = count->data_length; + u32 marker, markint; + + count->sync_and_steering = 1; + + marker = (count->type == ISCSI_RX_DATA) ? + conn->of_marker : conn->if_marker; + markint = (count->type == ISCSI_RX_DATA) ? + (conn->conn_ops->OFMarkInt * 4) : + (conn->conn_ops->IFMarkInt * 4); + count->ss_iov_count = count->iov_count; + + while (length > 0) { + if (length >= marker) { + count->ss_iov_count += 3; + count->ss_marker_count += 2; + + length -= marker; + marker = markint; + } else + length = 0; + } + + return 0; +} + +/* + * Setup conn->if_marker and conn->of_marker values based upon + * the initial marker-less interval. (see iSCSI v19 A.2) + */ +int iscsit_set_sync_and_steering_values(struct iscsi_conn *conn) +{ + int login_ifmarker_count = 0, login_ofmarker_count = 0, next_marker = 0; + /* + * IFMarkInt and OFMarkInt are negotiated as 32-bit words. + */ + u32 IFMarkInt = (conn->conn_ops->IFMarkInt * 4); + u32 OFMarkInt = (conn->conn_ops->OFMarkInt * 4); + + if (conn->conn_ops->OFMarker) { + /* + * Account for the first Login Command received not + * via iscsi_recv_msg(). + */ + conn->of_marker += ISCSI_HDR_LEN; + if (conn->of_marker <= OFMarkInt) { + conn->of_marker = (OFMarkInt - conn->of_marker); + } else { + login_ofmarker_count = (conn->of_marker / OFMarkInt); + next_marker = (OFMarkInt * (login_ofmarker_count + 1)) + + (login_ofmarker_count * MARKER_SIZE); + conn->of_marker = (next_marker - conn->of_marker); + } + conn->of_marker_offset = 0; + pr_debug("Setting OFMarker value to %u based on Initial" + " Markerless Interval.\n", conn->of_marker); + } + + if (conn->conn_ops->IFMarker) { + if (conn->if_marker <= IFMarkInt) { + conn->if_marker = (IFMarkInt - conn->if_marker); + } else { + login_ifmarker_count = (conn->if_marker / IFMarkInt); + next_marker = (IFMarkInt * (login_ifmarker_count + 1)) + + (login_ifmarker_count * MARKER_SIZE); + conn->if_marker = (next_marker - conn->if_marker); + } + pr_debug("Setting IFMarker value to %u based on Initial" + " Markerless Interval.\n", conn->if_marker); + } + + return 0; +} + +struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *sess, u16 cid) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + if ((conn->cid == cid) && + (conn->conn_state == TARG_CONN_STATE_LOGGED_IN)) { + iscsit_inc_conn_usage_count(conn); + spin_unlock_bh(&sess->conn_lock); + return conn; + } + } + spin_unlock_bh(&sess->conn_lock); + + return NULL; +} + +struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *sess, u16 cid) +{ + struct iscsi_conn *conn; + + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) { + if (conn->cid == cid) { + iscsit_inc_conn_usage_count(conn); + spin_lock(&conn->state_lock); + atomic_set(&conn->connection_wait_rcfr, 1); + spin_unlock(&conn->state_lock); + spin_unlock_bh(&sess->conn_lock); + return conn; + } + } + spin_unlock_bh(&sess->conn_lock); + + return NULL; +} + +void iscsit_check_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + if (conn->conn_usage_count != 0) { + conn->conn_waiting_on_uc = 1; + spin_unlock_bh(&conn->conn_usage_lock); + + wait_for_completion(&conn->conn_waiting_on_uc_comp); + return; + } + spin_unlock_bh(&conn->conn_usage_lock); +} + +void iscsit_dec_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + conn->conn_usage_count--; + + if (!conn->conn_usage_count && conn->conn_waiting_on_uc) + complete(&conn->conn_waiting_on_uc_comp); + + spin_unlock_bh(&conn->conn_usage_lock); +} + +void iscsit_inc_conn_usage_count(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->conn_usage_lock); + conn->conn_usage_count++; + spin_unlock_bh(&conn->conn_usage_lock); +} + +static int iscsit_add_nopin(struct iscsi_conn *conn, int want_response) +{ + u8 state; + struct iscsi_cmd *cmd; + + cmd = iscsit_allocate_cmd(conn, GFP_ATOMIC); + if (!cmd) + return -1; + + cmd->iscsi_opcode = ISCSI_OP_NOOP_IN; + state = (want_response) ? ISTATE_SEND_NOPIN_WANT_RESPONSE : + ISTATE_SEND_NOPIN_NO_RESPONSE; + cmd->init_task_tag = 0xFFFFFFFF; + spin_lock_bh(&conn->sess->ttt_lock); + cmd->targ_xfer_tag = (want_response) ? conn->sess->targ_xfer_tag++ : + 0xFFFFFFFF; + if (want_response && (cmd->targ_xfer_tag == 0xFFFFFFFF)) + cmd->targ_xfer_tag = conn->sess->targ_xfer_tag++; + spin_unlock_bh(&conn->sess->ttt_lock); + + spin_lock_bh(&conn->cmd_lock); + list_add_tail(&cmd->i_list, &conn->conn_cmd_list); + spin_unlock_bh(&conn->cmd_lock); + + if (want_response) + iscsit_start_nopin_response_timer(conn); + iscsit_add_cmd_to_immediate_queue(cmd, conn, state); + + return 0; +} + +static void iscsit_handle_nopin_response_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *) data; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_response_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&conn->nopin_timer_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + + pr_debug("Did not receive response to NOPIN on CID: %hu on" + " SID: %u, failing connection.\n", conn->cid, + conn->sess->sid); + conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); + + { + struct iscsi_portal_group *tpg = conn->sess->tpg; + struct iscsi_tiqn *tiqn = tpg->tpg_tiqn; + + if (tiqn) { + spin_lock_bh(&tiqn->sess_err_stats.lock); + strcpy(tiqn->sess_err_stats.last_sess_fail_rem_name, + (void *)conn->sess->sess_ops->InitiatorName); + tiqn->sess_err_stats.last_sess_failure_type = + ISCSI_SESS_ERR_CXN_TIMEOUT; + tiqn->sess_err_stats.cxn_timeout_errors++; + conn->sess->conn_timeout_errors++; + spin_unlock_bh(&tiqn->sess_err_stats.lock); + } + } + + iscsit_cause_connection_reinstatement(conn, 0); + iscsit_dec_conn_usage_count(conn); +} + +void iscsit_mod_nopin_response_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + mod_timer(&conn->nopin_response_timer, + (get_jiffies_64() + na->nopin_response_timeout * HZ)); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +/* + * Called with conn->nopin_timer_lock held. + */ +void iscsit_start_nopin_response_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_response_timer_flags & ISCSI_TF_RUNNING) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + init_timer(&conn->nopin_response_timer); + conn->nopin_response_timer.expires = + (get_jiffies_64() + na->nopin_response_timeout * HZ); + conn->nopin_response_timer.data = (unsigned long)conn; + conn->nopin_response_timer.function = iscsit_handle_nopin_response_timeout; + conn->nopin_response_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_response_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_response_timer); + + pr_debug("Started NOPIN Response Timer on CID: %d to %u" + " seconds\n", conn->cid, na->nopin_response_timeout); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +void iscsit_stop_nopin_response_timer(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_response_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + conn->nopin_response_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&conn->nopin_timer_lock); + + del_timer_sync(&conn->nopin_response_timer); + + spin_lock_bh(&conn->nopin_timer_lock); + conn->nopin_response_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); +} + +static void iscsit_handle_nopin_timeout(unsigned long data) +{ + struct iscsi_conn *conn = (struct iscsi_conn *) data; + + iscsit_inc_conn_usage_count(conn); + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_timer_flags & ISCSI_TF_STOP) { + spin_unlock_bh(&conn->nopin_timer_lock); + iscsit_dec_conn_usage_count(conn); + return; + } + conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); + + iscsit_add_nopin(conn, 1); + iscsit_dec_conn_usage_count(conn); +} + +/* + * Called with conn->nopin_timer_lock held. + */ +void __iscsit_start_nopin_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + /* + * NOPIN timeout is disabled. + */ + if (!na->nopin_timeout) + return; + + if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) + return; + + init_timer(&conn->nopin_timer); + conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ); + conn->nopin_timer.data = (unsigned long)conn; + conn->nopin_timer.function = iscsit_handle_nopin_timeout; + conn->nopin_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_timer); + + pr_debug("Started NOPIN Timer on CID: %d at %u second" + " interval\n", conn->cid, na->nopin_timeout); +} + +void iscsit_start_nopin_timer(struct iscsi_conn *conn) +{ + struct iscsi_session *sess = conn->sess; + struct iscsi_node_attrib *na = iscsit_tpg_get_node_attrib(sess); + /* + * NOPIN timeout is disabled.. + */ + if (!na->nopin_timeout) + return; + + spin_lock_bh(&conn->nopin_timer_lock); + if (conn->nopin_timer_flags & ISCSI_TF_RUNNING) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + + init_timer(&conn->nopin_timer); + conn->nopin_timer.expires = (get_jiffies_64() + na->nopin_timeout * HZ); + conn->nopin_timer.data = (unsigned long)conn; + conn->nopin_timer.function = iscsit_handle_nopin_timeout; + conn->nopin_timer_flags &= ~ISCSI_TF_STOP; + conn->nopin_timer_flags |= ISCSI_TF_RUNNING; + add_timer(&conn->nopin_timer); + + pr_debug("Started NOPIN Timer on CID: %d at %u second" + " interval\n", conn->cid, na->nopin_timeout); + spin_unlock_bh(&conn->nopin_timer_lock); +} + +void iscsit_stop_nopin_timer(struct iscsi_conn *conn) +{ + spin_lock_bh(&conn->nopin_timer_lock); + if (!(conn->nopin_timer_flags & ISCSI_TF_RUNNING)) { + spin_unlock_bh(&conn->nopin_timer_lock); + return; + } + conn->nopin_timer_flags |= ISCSI_TF_STOP; + spin_unlock_bh(&conn->nopin_timer_lock); + + del_timer_sync(&conn->nopin_timer); + + spin_lock_bh(&conn->nopin_timer_lock); + conn->nopin_timer_flags &= ~ISCSI_TF_RUNNING; + spin_unlock_bh(&conn->nopin_timer_lock); +} + +int iscsit_send_tx_data( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn, + int use_misc) +{ + int tx_sent, tx_size; + u32 iov_count; + struct kvec *iov; + +send_data: + tx_size = cmd->tx_size; + + if (!use_misc) { + iov = &cmd->iov_data[0]; + iov_count = cmd->iov_data_count; + } else { + iov = &cmd->iov_misc[0]; + iov_count = cmd->iov_misc_count; + } + + tx_sent = tx_data(conn, &iov[0], iov_count, tx_size); + if (tx_size != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_data; + } else + return -1; + } + cmd->tx_size = 0; + + return 0; +} + +int iscsit_fe_sendpage_sg( + struct iscsi_cmd *cmd, + struct iscsi_conn *conn) +{ + struct scatterlist *sg = cmd->first_data_sg; + struct kvec iov; + u32 tx_hdr_size, data_len; + u32 offset = cmd->first_data_sg_off; + int tx_sent; + +send_hdr: + tx_hdr_size = ISCSI_HDR_LEN; + if (conn->conn_ops->HeaderDigest) + tx_hdr_size += ISCSI_CRC_LEN; + + iov.iov_base = cmd->pdu; + iov.iov_len = tx_hdr_size; + + tx_sent = tx_data(conn, &iov, 1, tx_hdr_size); + if (tx_hdr_size != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_hdr; + } + return -1; + } + + data_len = cmd->tx_size - tx_hdr_size - cmd->padding; + if (conn->conn_ops->DataDigest) + data_len -= ISCSI_CRC_LEN; + + /* + * Perform sendpage() for each page in the scatterlist + */ + while (data_len) { + u32 space = (sg->length - offset); + u32 sub_len = min_t(u32, data_len, space); +send_pg: + tx_sent = conn->sock->ops->sendpage(conn->sock, + sg_page(sg), sg->offset + offset, sub_len, 0); + if (tx_sent != sub_len) { + if (tx_sent == -EAGAIN) { + pr_err("tcp_sendpage() returned" + " -EAGAIN\n"); + goto send_pg; + } + + pr_err("tcp_sendpage() failure: %d\n", + tx_sent); + return -1; + } + + data_len -= sub_len; + offset = 0; + sg = sg_next(sg); + } + +send_padding: + if (cmd->padding) { + struct kvec *iov_p = + &cmd->iov_data[cmd->iov_data_count-1]; + + tx_sent = tx_data(conn, iov_p, 1, cmd->padding); + if (cmd->padding != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_padding; + } + return -1; + } + } + +send_datacrc: + if (conn->conn_ops->DataDigest) { + struct kvec *iov_d = + &cmd->iov_data[cmd->iov_data_count]; + + tx_sent = tx_data(conn, iov_d, 1, ISCSI_CRC_LEN); + if (ISCSI_CRC_LEN != tx_sent) { + if (tx_sent == -EAGAIN) { + pr_err("tx_data() returned -EAGAIN\n"); + goto send_datacrc; + } + return -1; + } + } + + return 0; +} + +/* + * This function is used for mainly sending a ISCSI_TARG_LOGIN_RSP PDU + * back to the Initiator when an expection condition occurs with the + * errors set in status_class and status_detail. + * + * Parameters: iSCSI Connection, Status Class, Status Detail. + * Returns: 0 on success, -1 on error. + */ +int iscsit_tx_login_rsp(struct iscsi_conn *conn, u8 status_class, u8 status_detail) +{ + u8 iscsi_hdr[ISCSI_HDR_LEN]; + int err; + struct kvec iov; + struct iscsi_login_rsp *hdr; + + iscsit_collect_login_stats(conn, status_class, status_detail); + + memset(&iov, 0, sizeof(struct kvec)); + memset(&iscsi_hdr, 0x0, ISCSI_HDR_LEN); + + hdr = (struct iscsi_login_rsp *)&iscsi_hdr; + hdr->opcode = ISCSI_OP_LOGIN_RSP; + hdr->status_class = status_class; + hdr->status_detail = status_detail; + hdr->itt = cpu_to_be32(conn->login_itt); + + iov.iov_base = &iscsi_hdr; + iov.iov_len = ISCSI_HDR_LEN; + + PRINT_BUFF(iscsi_hdr, ISCSI_HDR_LEN); + + err = tx_data(conn, &iov, 1, ISCSI_HDR_LEN); + if (err != ISCSI_HDR_LEN) { + pr_err("tx_data returned less than expected\n"); + return -1; + } + + return 0; +} + +void iscsit_print_session_params(struct iscsi_session *sess) +{ + struct iscsi_conn *conn; + + pr_debug("-----------------------------[Session Params for" + " SID: %u]-----------------------------\n", sess->sid); + spin_lock_bh(&sess->conn_lock); + list_for_each_entry(conn, &sess->sess_conn_list, conn_list) + iscsi_dump_conn_ops(conn->conn_ops); + spin_unlock_bh(&sess->conn_lock); + + iscsi_dump_sess_ops(sess->sess_ops); +} + +static int iscsit_do_rx_data( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len; + u32 rx_marker_val[count->ss_marker_count], rx_marker_iov = 0; + struct kvec iov[count->ss_iov_count], *iov_p; + struct msghdr msg; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&msg, 0, sizeof(struct msghdr)); + + if (count->sync_and_steering) { + int size = 0; + u32 i, orig_iov_count = 0; + u32 orig_iov_len = 0, orig_iov_loc = 0; + u32 iov_count = 0, per_iov_bytes = 0; + u32 *rx_marker, old_rx_marker = 0; + struct kvec *iov_record; + + memset(&rx_marker_val, 0, + count->ss_marker_count * sizeof(u32)); + memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); + + iov_record = count->iov; + orig_iov_count = count->iov_count; + rx_marker = &conn->of_marker; + + i = 0; + size = data; + orig_iov_len = iov_record[orig_iov_loc].iov_len; + while (size > 0) { + pr_debug("rx_data: #1 orig_iov_len %u," + " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); + pr_debug("rx_data: #2 rx_marker %u, size" + " %u\n", *rx_marker, size); + + if (orig_iov_len >= *rx_marker) { + iov[iov_count].iov_len = *rx_marker; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &rx_marker_val[rx_marker_iov++]; + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &rx_marker_val[rx_marker_iov++]; + old_rx_marker = *rx_marker; + + /* + * OFMarkInt is in 32-bit words. + */ + *rx_marker = (conn->conn_ops->OFMarkInt * 4); + size -= old_rx_marker; + orig_iov_len -= old_rx_marker; + per_iov_bytes += old_rx_marker; + + pr_debug("rx_data: #3 new_rx_marker" + " %u, size %u\n", *rx_marker, size); + } else { + iov[iov_count].iov_len = orig_iov_len; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + per_iov_bytes = 0; + *rx_marker -= orig_iov_len; + size -= orig_iov_len; + + if (size) + orig_iov_len = + iov_record[++orig_iov_loc].iov_len; + + pr_debug("rx_data: #4 new_rx_marker" + " %u, size %u\n", *rx_marker, size); + } + } + data += (rx_marker_iov * (MARKER_SIZE / 2)); + + iov_p = &iov[0]; + iov_len = iov_count; + + if (iov_count > count->ss_iov_count) { + pr_err("iov_count: %d, count->ss_iov_count:" + " %d\n", iov_count, count->ss_iov_count); + return -1; + } + if (rx_marker_iov > count->ss_marker_count) { + pr_err("rx_marker_iov: %d, count->ss_marker" + "_count: %d\n", rx_marker_iov, + count->ss_marker_count); + return -1; + } + } else { + iov_p = count->iov; + iov_len = count->iov_count; + } + + while (total_rx < data) { + rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len, + (data - total_rx), MSG_WAITALL); + if (rx_loop <= 0) { + pr_debug("rx_loop: %d total_rx: %d\n", + rx_loop, total_rx); + return rx_loop; + } + total_rx += rx_loop; + pr_debug("rx_loop: %d, total_rx: %d, data: %d\n", + rx_loop, total_rx, data); + } + + if (count->sync_and_steering) { + int j; + for (j = 0; j < rx_marker_iov; j++) { + pr_debug("rx_data: #5 j: %d, offset: %d\n", + j, rx_marker_val[j]); + conn->of_marker_offset = rx_marker_val[j]; + } + total_rx -= (rx_marker_iov * (MARKER_SIZE / 2)); + } + + return total_rx; +} + +static int iscsit_do_tx_data( + struct iscsi_conn *conn, + struct iscsi_data_count *count) +{ + int data = count->data_length, total_tx = 0, tx_loop = 0, iov_len; + u32 tx_marker_val[count->ss_marker_count], tx_marker_iov = 0; + struct kvec iov[count->ss_iov_count], *iov_p; + struct msghdr msg; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + if (data <= 0) { + pr_err("Data length is: %d\n", data); + return -1; + } + + memset(&msg, 0, sizeof(struct msghdr)); + + if (count->sync_and_steering) { + int size = 0; + u32 i, orig_iov_count = 0; + u32 orig_iov_len = 0, orig_iov_loc = 0; + u32 iov_count = 0, per_iov_bytes = 0; + u32 *tx_marker, old_tx_marker = 0; + struct kvec *iov_record; + + memset(&tx_marker_val, 0, + count->ss_marker_count * sizeof(u32)); + memset(&iov, 0, count->ss_iov_count * sizeof(struct kvec)); + + iov_record = count->iov; + orig_iov_count = count->iov_count; + tx_marker = &conn->if_marker; + + i = 0; + size = data; + orig_iov_len = iov_record[orig_iov_loc].iov_len; + while (size > 0) { + pr_debug("tx_data: #1 orig_iov_len %u," + " orig_iov_loc %u\n", orig_iov_len, orig_iov_loc); + pr_debug("tx_data: #2 tx_marker %u, size" + " %u\n", *tx_marker, size); + + if (orig_iov_len >= *tx_marker) { + iov[iov_count].iov_len = *tx_marker; + iov[iov_count++].iov_base = + (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + tx_marker_val[tx_marker_iov] = + (size - *tx_marker); + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &tx_marker_val[tx_marker_iov++]; + iov[iov_count].iov_len = (MARKER_SIZE / 2); + iov[iov_count++].iov_base = + &tx_marker_val[tx_marker_iov++]; + old_tx_marker = *tx_marker; + + /* + * IFMarkInt is in 32-bit words. + */ + *tx_marker = (conn->conn_ops->IFMarkInt * 4); + size -= old_tx_marker; + orig_iov_len -= old_tx_marker; + per_iov_bytes += old_tx_marker; + + pr_debug("tx_data: #3 new_tx_marker" + " %u, size %u\n", *tx_marker, size); + pr_debug("tx_data: #4 offset %u\n", + tx_marker_val[tx_marker_iov-1]); + } else { + iov[iov_count].iov_len = orig_iov_len; + iov[iov_count++].iov_base + = (iov_record[orig_iov_loc].iov_base + + per_iov_bytes); + + per_iov_bytes = 0; + *tx_marker -= orig_iov_len; + size -= orig_iov_len; + + if (size) + orig_iov_len = + iov_record[++orig_iov_loc].iov_len; + + pr_debug("tx_data: #5 new_tx_marker" + " %u, size %u\n", *tx_marker, size); + } + } + + data += (tx_marker_iov * (MARKER_SIZE / 2)); + + iov_p = &iov[0]; + iov_len = iov_count; + + if (iov_count > count->ss_iov_count) { + pr_err("iov_count: %d, count->ss_iov_count:" + " %d\n", iov_count, count->ss_iov_count); + return -1; + } + if (tx_marker_iov > count->ss_marker_count) { + pr_err("tx_marker_iov: %d, count->ss_marker" + "_count: %d\n", tx_marker_iov, + count->ss_marker_count); + return -1; + } + } else { + iov_p = count->iov; + iov_len = count->iov_count; + } + + while (total_tx < data) { + tx_loop = kernel_sendmsg(conn->sock, &msg, iov_p, iov_len, + (data - total_tx)); + if (tx_loop <= 0) { + pr_debug("tx_loop: %d total_tx %d\n", + tx_loop, total_tx); + return tx_loop; + } + total_tx += tx_loop; + pr_debug("tx_loop: %d, total_tx: %d, data: %d\n", + tx_loop, total_tx, data); + } + + if (count->sync_and_steering) + total_tx -= (tx_marker_iov * (MARKER_SIZE / 2)); + + return total_tx; +} + +int rx_data( + struct iscsi_conn *conn, + struct kvec *iov, + int iov_count, + int data) +{ + struct iscsi_data_count c; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&c, 0, sizeof(struct iscsi_data_count)); + c.iov = iov; + c.iov_count = iov_count; + c.data_length = data; + c.type = ISCSI_RX_DATA; + + if (conn->conn_ops->OFMarker && + (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { + if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) + return -1; + } + + return iscsit_do_rx_data(conn, &c); +} + +int tx_data( + struct iscsi_conn *conn, + struct kvec *iov, + int iov_count, + int data) +{ + struct iscsi_data_count c; + + if (!conn || !conn->sock || !conn->conn_ops) + return -1; + + memset(&c, 0, sizeof(struct iscsi_data_count)); + c.iov = iov; + c.iov_count = iov_count; + c.data_length = data; + c.type = ISCSI_TX_DATA; + + if (conn->conn_ops->IFMarker && + (conn->conn_state >= TARG_CONN_STATE_LOGGED_IN)) { + if (iscsit_determine_sync_and_steering_counts(conn, &c) < 0) + return -1; + } + + return iscsit_do_tx_data(conn, &c); +} + +void iscsit_collect_login_stats( + struct iscsi_conn *conn, + u8 status_class, + u8 status_detail) +{ + struct iscsi_param *intrname = NULL; + struct iscsi_tiqn *tiqn; + struct iscsi_login_stats *ls; + + tiqn = iscsit_snmp_get_tiqn(conn); + if (!tiqn) + return; + + ls = &tiqn->login_stats; + + spin_lock(&ls->lock); + if (!strcmp(conn->login_ip, ls->last_intr_fail_ip_addr) && + ((get_jiffies_64() - ls->last_fail_time) < 10)) { + /* We already have the failure info for this login */ + spin_unlock(&ls->lock); + return; + } + + if (status_class == ISCSI_STATUS_CLS_SUCCESS) + ls->accepts++; + else if (status_class == ISCSI_STATUS_CLS_REDIRECT) { + ls->redirects++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_REDIRECT; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_AUTH_FAILED)) { + ls->authenticate_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHENTICATE; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_TGT_FORBIDDEN)) { + ls->authorize_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_AUTHORIZE; + } else if ((status_class == ISCSI_STATUS_CLS_INITIATOR_ERR) && + (status_detail == ISCSI_LOGIN_STATUS_INIT_ERR)) { + ls->negotiate_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_NEGOTIATE; + } else { + ls->other_fails++; + ls->last_fail_type = ISCSI_LOGIN_FAIL_OTHER; + } + + /* Save initiator name, ip address and time, if it is a failed login */ + if (status_class != ISCSI_STATUS_CLS_SUCCESS) { + if (conn->param_list) + intrname = iscsi_find_param_from_key(INITIATORNAME, + conn->param_list); + strcpy(ls->last_intr_fail_name, + (intrname ? intrname->value : "Unknown")); + + ls->last_intr_fail_ip_family = conn->sock->sk->sk_family; + snprintf(ls->last_intr_fail_ip_addr, IPV6_ADDRESS_SPACE, + "%s", conn->login_ip); + ls->last_fail_time = get_jiffies_64(); + } + + spin_unlock(&ls->lock); +} + +struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *conn) +{ + struct iscsi_portal_group *tpg; + + if (!conn || !conn->sess) + return NULL; + + tpg = conn->sess->tpg; + if (!tpg) + return NULL; + + if (!tpg->tpg_tiqn) + return NULL; + + return tpg->tpg_tiqn; +} diff --git a/drivers/target/iscsi/iscsi_target_util.h b/drivers/target/iscsi/iscsi_target_util.h new file mode 100644 index 000000000000..2cd49d607bda --- /dev/null +++ b/drivers/target/iscsi/iscsi_target_util.h @@ -0,0 +1,60 @@ +#ifndef ISCSI_TARGET_UTIL_H +#define ISCSI_TARGET_UTIL_H + +#define MARKER_SIZE 8 + +extern int iscsit_add_r2t_to_list(struct iscsi_cmd *, u32, u32, int, u32); +extern struct iscsi_r2t *iscsit_get_r2t_for_eos(struct iscsi_cmd *, u32, u32); +extern struct iscsi_r2t *iscsit_get_r2t_from_list(struct iscsi_cmd *); +extern void iscsit_free_r2t(struct iscsi_r2t *, struct iscsi_cmd *); +extern void iscsit_free_r2ts_from_list(struct iscsi_cmd *); +extern struct iscsi_cmd *iscsit_allocate_cmd(struct iscsi_conn *, gfp_t); +extern struct iscsi_cmd *iscsit_allocate_se_cmd(struct iscsi_conn *, u32, int, int); +extern struct iscsi_cmd *iscsit_allocate_se_cmd_for_tmr(struct iscsi_conn *, u8); +extern int iscsit_decide_list_to_build(struct iscsi_cmd *, u32); +extern struct iscsi_seq *iscsit_get_seq_holder_for_datain(struct iscsi_cmd *, u32); +extern struct iscsi_seq *iscsit_get_seq_holder_for_r2t(struct iscsi_cmd *); +extern struct iscsi_r2t *iscsit_get_holder_for_r2tsn(struct iscsi_cmd *, u32); +int iscsit_sequence_cmd(struct iscsi_conn *conn, struct iscsi_cmd *cmd, u32 cmdsn); +extern int iscsit_check_unsolicited_dataout(struct iscsi_cmd *, unsigned char *); +extern struct iscsi_cmd *iscsit_find_cmd_from_itt(struct iscsi_conn *, u32); +extern struct iscsi_cmd *iscsit_find_cmd_from_itt_or_dump(struct iscsi_conn *, + u32, u32); +extern struct iscsi_cmd *iscsit_find_cmd_from_ttt(struct iscsi_conn *, u32); +extern int iscsit_find_cmd_for_recovery(struct iscsi_session *, struct iscsi_cmd **, + struct iscsi_conn_recovery **, u32); +extern void iscsit_add_cmd_to_immediate_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern struct iscsi_queue_req *iscsit_get_cmd_from_immediate_queue(struct iscsi_conn *); +extern void iscsit_add_cmd_to_response_queue(struct iscsi_cmd *, struct iscsi_conn *, u8); +extern struct iscsi_queue_req *iscsit_get_cmd_from_response_queue(struct iscsi_conn *); +extern void iscsit_remove_cmd_from_tx_queues(struct iscsi_cmd *, struct iscsi_conn *); +extern void iscsit_free_queue_reqs_for_conn(struct iscsi_conn *); +extern void iscsit_release_cmd(struct iscsi_cmd *); +extern int iscsit_check_session_usage_count(struct iscsi_session *); +extern void iscsit_dec_session_usage_count(struct iscsi_session *); +extern void iscsit_inc_session_usage_count(struct iscsi_session *); +extern int iscsit_set_sync_and_steering_values(struct iscsi_conn *); +extern struct iscsi_conn *iscsit_get_conn_from_cid(struct iscsi_session *, u16); +extern struct iscsi_conn *iscsit_get_conn_from_cid_rcfr(struct iscsi_session *, u16); +extern void iscsit_check_conn_usage_count(struct iscsi_conn *); +extern void iscsit_dec_conn_usage_count(struct iscsi_conn *); +extern void iscsit_inc_conn_usage_count(struct iscsi_conn *); +extern void iscsit_mod_nopin_response_timer(struct iscsi_conn *); +extern void iscsit_start_nopin_response_timer(struct iscsi_conn *); +extern void iscsit_stop_nopin_response_timer(struct iscsi_conn *); +extern void __iscsit_start_nopin_timer(struct iscsi_conn *); +extern void iscsit_start_nopin_timer(struct iscsi_conn *); +extern void iscsit_stop_nopin_timer(struct iscsi_conn *); +extern int iscsit_send_tx_data(struct iscsi_cmd *, struct iscsi_conn *, int); +extern int iscsit_fe_sendpage_sg(struct iscsi_cmd *, struct iscsi_conn *); +extern int iscsit_tx_login_rsp(struct iscsi_conn *, u8, u8); +extern void iscsit_print_session_params(struct iscsi_session *); +extern int iscsit_print_dev_to_proc(char *, char **, off_t, int); +extern int iscsit_print_sessions_to_proc(char *, char **, off_t, int); +extern int iscsit_print_tpg_to_proc(char *, char **, off_t, int); +extern int rx_data(struct iscsi_conn *, struct kvec *, int, int); +extern int tx_data(struct iscsi_conn *, struct kvec *, int, int); +extern void iscsit_collect_login_stats(struct iscsi_conn *, u8, u8); +extern struct iscsi_tiqn *iscsit_snmp_get_tiqn(struct iscsi_conn *); + +#endif /*** ISCSI_TARGET_UTIL_H ***/ diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 46352d658e35..c75a01a1c475 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -4052,17 +4052,16 @@ static int transport_allocate_data_tasks( struct se_task *task; struct se_device *dev = cmd->se_dev; unsigned long flags; - sector_t sectors; int task_count, i, ret; - sector_t dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors; + sector_t sectors, dev_max_sectors = dev->se_sub_dev->se_dev_attrib.max_sectors; u32 sector_size = dev->se_sub_dev->se_dev_attrib.block_size; struct scatterlist *sg; struct scatterlist *cmd_sg; WARN_ON(cmd->data_length % sector_size); sectors = DIV_ROUND_UP(cmd->data_length, sector_size); - task_count = DIV_ROUND_UP(sectors, dev_max_sectors); - + task_count = DIV_ROUND_UP_SECTOR_T(sectors, dev_max_sectors); + cmd_sg = sgl; for (i = 0; i < task_count; i++) { unsigned int task_size; diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 21d816e9dfa5..f441726ddf2b 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -28,6 +28,17 @@ menuconfig WATCHDOG if WATCHDOG +config WATCHDOG_CORE + bool "WatchDog Timer Driver Core" + ---help--- + Say Y here if you want to use the new watchdog timer driver core. + This driver provides a framework for all watchdog timer drivers + and gives them the /dev/watchdog interface (and later also the + sysfs interface). + + To compile this driver as a module, choose M here: the module will + be called watchdog. + config WATCHDOG_NOWAYOUT bool "Disable watchdog shutdown on close" help @@ -186,6 +197,15 @@ config SA1100_WATCHDOG To compile this driver as a module, choose M here: the module will be called sa1100_wdt. +config DW_WATCHDOG + tristate "Synopsys DesignWare watchdog" + depends on ARM && HAVE_CLK + help + Say Y here if to include support for the Synopsys DesignWare + watchdog timer found in many ARM chips. + To compile this driver as a module, choose M here: the + module will be called dw_wdt. + config MPCORE_WATCHDOG tristate "MPcore watchdog" depends on HAVE_ARM_TWD @@ -321,7 +341,7 @@ config MAX63XX_WATCHDOG config IMX2_WDT tristate "IMX2+ Watchdog" - depends on ARCH_MX2 || ARCH_MX25 || ARCH_MX3 || ARCH_MX5 + depends on IMX_HAVE_PLATFORM_IMX2_WDT help This is the driver for the hardware watchdog on the Freescale IMX2 and later processors. @@ -879,6 +899,20 @@ config M54xx_WATCHDOG To compile this driver as a module, choose M here: the module will be called m54xx_wdt. +# MicroBlaze Architecture + +config XILINX_WATCHDOG + tristate "Xilinx Watchdog timer" + depends on MICROBLAZE + ---help--- + Watchdog driver for the xps_timebase_wdt ip core. + + IMPORTANT: The xps_timebase_wdt parent must have the property + "clock-frequency" at device tree. + + To compile this driver as a module, choose M here: the + module will be called of_xilinx_wdt. + # MIPS Architecture config ATH79_WDT diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile index ed26f7094e47..55bd5740e910 100644 --- a/drivers/watchdog/Makefile +++ b/drivers/watchdog/Makefile @@ -2,6 +2,10 @@ # Makefile for the WatchDog device drivers. # +# The WatchDog Timer Driver Core. +watchdog-objs += watchdog_core.o watchdog_dev.o +obj-$(CONFIG_WATCHDOG_CORE) += watchdog.o + # Only one watchdog can succeed. We probe the ISA/PCI/USB based # watchdog-cards first, then the architecture specific watchdog # drivers and then the architecture independent "softdog" driver. @@ -37,6 +41,7 @@ obj-$(CONFIG_IXP4XX_WATCHDOG) += ixp4xx_wdt.o obj-$(CONFIG_KS8695_WATCHDOG) += ks8695_wdt.o obj-$(CONFIG_S3C2410_WATCHDOG) += s3c2410_wdt.o obj-$(CONFIG_SA1100_WATCHDOG) += sa1100_wdt.o +obj-$(CONFIG_DW_WATCHDOG) += dw_wdt.o obj-$(CONFIG_MPCORE_WATCHDOG) += mpcore_wdt.o obj-$(CONFIG_EP93XX_WATCHDOG) += ep93xx_wdt.o obj-$(CONFIG_PNX4008_WATCHDOG) += pnx4008_wdt.o @@ -109,6 +114,9 @@ obj-$(CONFIG_INTEL_SCU_WATCHDOG) += intel_scu_watchdog.o # M68K Architecture obj-$(CONFIG_M54xx_WATCHDOG) += m54xx_wdt.o +# MicroBlaze Architecture +obj-$(CONFIG_XILINX_WATCHDOG) += of_xilinx_wdt.o + # MIPS Architecture obj-$(CONFIG_ATH79_WDT) += ath79_wdt.o obj-$(CONFIG_BCM47XX_WDT) += bcm47xx_wdt.o diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c index eac26021e8da..87445b2d72a7 100644 --- a/drivers/watchdog/at91sam9_wdt.c +++ b/drivers/watchdog/at91sam9_wdt.c @@ -31,7 +31,7 @@ #include <linux/bitops.h> #include <linux/uaccess.h> -#include <mach/at91_wdt.h> +#include "at91sam9_wdt.h" #define DRV_NAME "AT91SAM9 Watchdog" @@ -284,27 +284,8 @@ static int __exit at91wdt_remove(struct platform_device *pdev) return res; } -#ifdef CONFIG_PM - -static int at91wdt_suspend(struct platform_device *pdev, pm_message_t message) -{ - return 0; -} - -static int at91wdt_resume(struct platform_device *pdev) -{ - return 0; -} - -#else -#define at91wdt_suspend NULL -#define at91wdt_resume NULL -#endif - static struct platform_driver at91wdt_driver = { .remove = __exit_p(at91wdt_remove), - .suspend = at91wdt_suspend, - .resume = at91wdt_resume, .driver = { .name = "at91_wdt", .owner = THIS_MODULE, diff --git a/arch/arm/mach-at91/include/mach/at91_wdt.h b/drivers/watchdog/at91sam9_wdt.h index fecc2e9f0ca8..757f9cab5c82 100644 --- a/arch/arm/mach-at91/include/mach/at91_wdt.h +++ b/drivers/watchdog/at91sam9_wdt.h @@ -1,5 +1,5 @@ /* - * arch/arm/mach-at91/include/mach/at91_wdt.h + * drivers/watchdog/at91sam9_wdt.h * * Copyright (C) 2007 Andrew Victor * Copyright (C) 2007 Atmel Corporation. diff --git a/drivers/watchdog/dw_wdt.c b/drivers/watchdog/dw_wdt.c new file mode 100644 index 000000000000..f10f8c0abba4 --- /dev/null +++ b/drivers/watchdog/dw_wdt.c @@ -0,0 +1,376 @@ +/* + * Copyright 2010-2011 Picochip Ltd., Jamie Iles + * http://www.picochip.com + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * This file implements a driver for the Synopsys DesignWare watchdog device + * in the many ARM subsystems. The watchdog has 16 different timeout periods + * and these are a function of the input clock frequency. + * + * The DesignWare watchdog cannot be stopped once it has been started so we + * use a software timer to implement a ping that will keep the watchdog alive. + * If we receive an expected close for the watchdog then we keep the timer + * running, otherwise the timer is stopped and the watchdog will expire. + */ +#define pr_fmt(fmt) "dw_wdt: " fmt + +#include <linux/bitops.h> +#include <linux/clk.h> +#include <linux/device.h> +#include <linux/err.h> +#include <linux/fs.h> +#include <linux/io.h> +#include <linux/kernel.h> +#include <linux/miscdevice.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/pm.h> +#include <linux/platform_device.h> +#include <linux/spinlock.h> +#include <linux/timer.h> +#include <linux/uaccess.h> +#include <linux/watchdog.h> + +#define WDOG_CONTROL_REG_OFFSET 0x00 +#define WDOG_CONTROL_REG_WDT_EN_MASK 0x01 +#define WDOG_TIMEOUT_RANGE_REG_OFFSET 0x04 +#define WDOG_CURRENT_COUNT_REG_OFFSET 0x08 +#define WDOG_COUNTER_RESTART_REG_OFFSET 0x0c +#define WDOG_COUNTER_RESTART_KICK_VALUE 0x76 + +/* The maximum TOP (timeout period) value that can be set in the watchdog. */ +#define DW_WDT_MAX_TOP 15 + +static int nowayout = WATCHDOG_NOWAYOUT; +module_param(nowayout, int, 0); +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started " + "(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")"); + +#define WDT_TIMEOUT (HZ / 2) + +static struct { + spinlock_t lock; + void __iomem *regs; + struct clk *clk; + unsigned long in_use; + unsigned long next_heartbeat; + struct timer_list timer; + int expect_close; +} dw_wdt; + +static inline int dw_wdt_is_enabled(void) +{ + return readl(dw_wdt.regs + WDOG_CONTROL_REG_OFFSET) & + WDOG_CONTROL_REG_WDT_EN_MASK; +} + +static inline int dw_wdt_top_in_seconds(unsigned top) +{ + /* + * There are 16 possible timeout values in 0..15 where the number of + * cycles is 2 ^ (16 + i) and the watchdog counts down. + */ + return (1 << (16 + top)) / clk_get_rate(dw_wdt.clk); +} + +static int dw_wdt_get_top(void) +{ + int top = readl(dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET) & 0xF; + + return dw_wdt_top_in_seconds(top); +} + +static inline void dw_wdt_set_next_heartbeat(void) +{ + dw_wdt.next_heartbeat = jiffies + dw_wdt_get_top() * HZ; +} + +static int dw_wdt_set_top(unsigned top_s) +{ + int i, top_val = DW_WDT_MAX_TOP; + + /* + * Iterate over the timeout values until we find the closest match. We + * always look for >=. + */ + for (i = 0; i <= DW_WDT_MAX_TOP; ++i) + if (dw_wdt_top_in_seconds(i) >= top_s) { + top_val = i; + break; + } + + /* Set the new value in the watchdog. */ + writel(top_val, dw_wdt.regs + WDOG_TIMEOUT_RANGE_REG_OFFSET); + + dw_wdt_set_next_heartbeat(); + + return dw_wdt_top_in_seconds(top_val); +} + +static void dw_wdt_keepalive(void) +{ + writel(WDOG_COUNTER_RESTART_KICK_VALUE, dw_wdt.regs + + WDOG_COUNTER_RESTART_REG_OFFSET); +} + +static void dw_wdt_ping(unsigned long data) +{ + if (time_before(jiffies, dw_wdt.next_heartbeat) || + (!nowayout && !dw_wdt.in_use)) { + dw_wdt_keepalive(); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + } else + pr_crit("keepalive missed, machine will reset\n"); +} + +static int dw_wdt_open(struct inode *inode, struct file *filp) +{ + if (test_and_set_bit(0, &dw_wdt.in_use)) + return -EBUSY; + + /* Make sure we don't get unloaded. */ + __module_get(THIS_MODULE); + + spin_lock(&dw_wdt.lock); + if (!dw_wdt_is_enabled()) { + /* + * The watchdog is not currently enabled. Set the timeout to + * the maximum and then start it. + */ + dw_wdt_set_top(DW_WDT_MAX_TOP); + writel(WDOG_CONTROL_REG_WDT_EN_MASK, + dw_wdt.regs + WDOG_CONTROL_REG_OFFSET); + } + + dw_wdt_set_next_heartbeat(); + + spin_unlock(&dw_wdt.lock); + + return nonseekable_open(inode, filp); +} + +ssize_t dw_wdt_write(struct file *filp, const char __user *buf, size_t len, + loff_t *offset) +{ + if (!len) + return 0; + + if (!nowayout) { + size_t i; + + dw_wdt.expect_close = 0; + + for (i = 0; i < len; ++i) { + char c; + + if (get_user(c, buf + i)) + return -EFAULT; + + if (c == 'V') { + dw_wdt.expect_close = 1; + break; + } + } + } + + dw_wdt_set_next_heartbeat(); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + + return len; +} + +static u32 dw_wdt_time_left(void) +{ + return readl(dw_wdt.regs + WDOG_CURRENT_COUNT_REG_OFFSET) / + clk_get_rate(dw_wdt.clk); +} + +static const struct watchdog_info dw_wdt_ident = { + .options = WDIOF_KEEPALIVEPING | WDIOF_SETTIMEOUT | + WDIOF_MAGICCLOSE, + .identity = "Synopsys DesignWare Watchdog", +}; + +static long dw_wdt_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) +{ + unsigned long val; + int timeout; + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user((struct watchdog_info *)arg, &dw_wdt_ident, + sizeof(dw_wdt_ident)) ? -EFAULT : 0; + + case WDIOC_GETSTATUS: + case WDIOC_GETBOOTSTATUS: + return put_user(0, (int *)arg); + + case WDIOC_KEEPALIVE: + dw_wdt_set_next_heartbeat(); + return 0; + + case WDIOC_SETTIMEOUT: + if (get_user(val, (int __user *)arg)) + return -EFAULT; + timeout = dw_wdt_set_top(val); + return put_user(timeout , (int __user *)arg); + + case WDIOC_GETTIMEOUT: + return put_user(dw_wdt_get_top(), (int __user *)arg); + + case WDIOC_GETTIMELEFT: + /* Get the time left until expiry. */ + if (get_user(val, (int __user *)arg)) + return -EFAULT; + return put_user(dw_wdt_time_left(), (int __user *)arg); + + default: + return -ENOTTY; + } +} + +static int dw_wdt_release(struct inode *inode, struct file *filp) +{ + clear_bit(0, &dw_wdt.in_use); + + if (!dw_wdt.expect_close) { + del_timer(&dw_wdt.timer); + + if (!nowayout) + pr_crit("unexpected close, system will reboot soon\n"); + else + pr_crit("watchdog cannot be disabled, system will reboot soon\n"); + } + + dw_wdt.expect_close = 0; + + return 0; +} + +#ifdef CONFIG_PM +static int dw_wdt_suspend(struct device *dev) +{ + clk_disable(dw_wdt.clk); + + return 0; +} + +static int dw_wdt_resume(struct device *dev) +{ + int err = clk_enable(dw_wdt.clk); + + if (err) + return err; + + dw_wdt_keepalive(); + + return 0; +} + +static const struct dev_pm_ops dw_wdt_pm_ops = { + .suspend = dw_wdt_suspend, + .resume = dw_wdt_resume, +}; +#endif /* CONFIG_PM */ + +static const struct file_operations wdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .open = dw_wdt_open, + .write = dw_wdt_write, + .unlocked_ioctl = dw_wdt_ioctl, + .release = dw_wdt_release +}; + +static struct miscdevice dw_wdt_miscdev = { + .fops = &wdt_fops, + .name = "watchdog", + .minor = WATCHDOG_MINOR, +}; + +static int __devinit dw_wdt_drv_probe(struct platform_device *pdev) +{ + int ret; + struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + + if (!mem) + return -EINVAL; + + if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem), + "dw_wdt")) + return -ENOMEM; + + dw_wdt.regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); + if (!dw_wdt.regs) + return -ENOMEM; + + dw_wdt.clk = clk_get(&pdev->dev, NULL); + if (IS_ERR(dw_wdt.clk)) + return PTR_ERR(dw_wdt.clk); + + ret = clk_enable(dw_wdt.clk); + if (ret) + goto out_put_clk; + + spin_lock_init(&dw_wdt.lock); + + ret = misc_register(&dw_wdt_miscdev); + if (ret) + goto out_disable_clk; + + dw_wdt_set_next_heartbeat(); + setup_timer(&dw_wdt.timer, dw_wdt_ping, 0); + mod_timer(&dw_wdt.timer, jiffies + WDT_TIMEOUT); + + return 0; + +out_disable_clk: + clk_disable(dw_wdt.clk); +out_put_clk: + clk_put(dw_wdt.clk); + + return ret; +} + +static int __devexit dw_wdt_drv_remove(struct platform_device *pdev) +{ + misc_deregister(&dw_wdt_miscdev); + + clk_disable(dw_wdt.clk); + clk_put(dw_wdt.clk); + + return 0; +} + +static struct platform_driver dw_wdt_driver = { + .probe = dw_wdt_drv_probe, + .remove = __devexit_p(dw_wdt_drv_remove), + .driver = { + .name = "dw_wdt", + .owner = THIS_MODULE, +#ifdef CONFIG_PM + .pm = &dw_wdt_pm_ops, +#endif /* CONFIG_PM */ + }, +}; + +static int __init dw_wdt_watchdog_init(void) +{ + return platform_driver_register(&dw_wdt_driver); +} +module_init(dw_wdt_watchdog_init); + +static void __exit dw_wdt_watchdog_exit(void) +{ + platform_driver_unregister(&dw_wdt_driver); +} +module_exit(dw_wdt_watchdog_exit); + +MODULE_AUTHOR("Jamie Iles"); +MODULE_DESCRIPTION("Synopsys DesignWare Watchdog Driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/hpwdt.c b/drivers/watchdog/hpwdt.c index 8cb26855bfed..410fba45378d 100644 --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -36,7 +36,7 @@ #include <asm/cacheflush.h> #endif /* CONFIG_HPWDT_NMI_DECODING */ -#define HPWDT_VERSION "1.2.0" +#define HPWDT_VERSION "1.3.0" #define SECS_TO_TICKS(secs) ((secs) * 1000 / 128) #define TICKS_TO_SECS(ticks) ((ticks) * 128 / 1000) #define HPWDT_MAX_TIMER TICKS_TO_SECS(65535) @@ -87,6 +87,19 @@ struct smbios_cru64_info { }; #define SMBIOS_CRU64_INFORMATION 212 +/* type 219 */ +struct smbios_proliant_info { + u8 type; + u8 byte_length; + u16 handle; + u32 power_features; + u32 omega_features; + u32 reserved; + u32 misc_features; +}; +#define SMBIOS_ICRU_INFORMATION 219 + + struct cmn_registers { union { struct { @@ -132,6 +145,7 @@ struct cmn_registers { static unsigned int hpwdt_nmi_decoding; static unsigned int allow_kdump; static unsigned int priority; /* hpwdt at end of die_notify list */ +static unsigned int is_icru; static DEFINE_SPINLOCK(rom_lock); static void *cru_rom_addr; static struct cmn_registers cmn_regs; @@ -476,19 +490,22 @@ static int hpwdt_pretimeout(struct notifier_block *nb, unsigned long ulReason, goto out; spin_lock_irqsave(&rom_lock, rom_pl); - if (!die_nmi_called) + if (!die_nmi_called && !is_icru) asminline_call(&cmn_regs, cru_rom_addr); die_nmi_called = 1; spin_unlock_irqrestore(&rom_lock, rom_pl); - if (cmn_regs.u1.ral == 0) { - printk(KERN_WARNING "hpwdt: An NMI occurred, " - "but unable to determine source.\n"); - } else { - if (allow_kdump) - hpwdt_stop(); - panic("An NMI occurred, please see the Integrated " - "Management Log for details.\n"); + if (!is_icru) { + if (cmn_regs.u1.ral == 0) { + printk(KERN_WARNING "hpwdt: An NMI occurred, " + "but unable to determine source.\n"); + } } + + if (allow_kdump) + hpwdt_stop(); + panic("An NMI occurred, please see the Integrated " + "Management Log for details.\n"); + out: return NOTIFY_OK; } @@ -659,30 +676,63 @@ static void __devinit hpwdt_check_nmi_decoding(struct pci_dev *dev) } #endif /* CONFIG_X86_LOCAL_APIC */ +/* + * dmi_find_icru + * + * Routine Description: + * This function checks whether or not we are on an iCRU-based server. + * This check is independent of architecture and needs to be made for + * any ProLiant system. + */ +static void __devinit dmi_find_icru(const struct dmi_header *dm, void *dummy) +{ + struct smbios_proliant_info *smbios_proliant_ptr; + + if (dm->type == SMBIOS_ICRU_INFORMATION) { + smbios_proliant_ptr = (struct smbios_proliant_info *) dm; + if (smbios_proliant_ptr->misc_features & 0x01) + is_icru = 1; + } +} + static int __devinit hpwdt_init_nmi_decoding(struct pci_dev *dev) { int retval; /* - * We need to map the ROM to get the CRU service. - * For 32 bit Operating Systems we need to go through the 32 Bit - * BIOS Service Directory - * For 64 bit Operating Systems we get that service through SMBIOS. + * On typical CRU-based systems we need to map that service in + * the BIOS. For 32 bit Operating Systems we need to go through + * the 32 Bit BIOS Service Directory. For 64 bit Operating + * Systems we get that service through SMBIOS. + * + * On systems that support the new iCRU service all we need to + * do is call dmi_walk to get the supported flag value and skip + * the old cru detect code. */ - retval = detect_cru_service(); - if (retval < 0) { - dev_warn(&dev->dev, - "Unable to detect the %d Bit CRU Service.\n", - HPWDT_ARCH); - return retval; - } + dmi_walk(dmi_find_icru, NULL); + if (!is_icru) { + + /* + * We need to map the ROM to get the CRU service. + * For 32 bit Operating Systems we need to go through the 32 Bit + * BIOS Service Directory + * For 64 bit Operating Systems we get that service through SMBIOS. + */ + retval = detect_cru_service(); + if (retval < 0) { + dev_warn(&dev->dev, + "Unable to detect the %d Bit CRU Service.\n", + HPWDT_ARCH); + return retval; + } - /* - * We know this is the only CRU call we need to make so lets keep as - * few instructions as possible once the NMI comes in. - */ - cmn_regs.u1.rah = 0x0D; - cmn_regs.u1.ral = 0x02; + /* + * We know this is the only CRU call we need to make so lets keep as + * few instructions as possible once the NMI comes in. + */ + cmn_regs.u1.rah = 0x0D; + cmn_regs.u1.ral = 0x02; + } /* * If the priority is set to 1, then we will be put first on the diff --git a/drivers/watchdog/iTCO_wdt.c b/drivers/watchdog/iTCO_wdt.c index 5fd020da7c55..751a591684da 100644 --- a/drivers/watchdog/iTCO_wdt.c +++ b/drivers/watchdog/iTCO_wdt.c @@ -120,72 +120,12 @@ enum iTCO_chipsets { TCO_3420, /* 3420 */ TCO_3450, /* 3450 */ TCO_EP80579, /* EP80579 */ - TCO_CPT1, /* Cougar Point */ - TCO_CPT2, /* Cougar Point Desktop */ - TCO_CPT3, /* Cougar Point Mobile */ - TCO_CPT4, /* Cougar Point */ - TCO_CPT5, /* Cougar Point */ - TCO_CPT6, /* Cougar Point */ - TCO_CPT7, /* Cougar Point */ - TCO_CPT8, /* Cougar Point */ - TCO_CPT9, /* Cougar Point */ - TCO_CPT10, /* Cougar Point */ - TCO_CPT11, /* Cougar Point */ - TCO_CPT12, /* Cougar Point */ - TCO_CPT13, /* Cougar Point */ - TCO_CPT14, /* Cougar Point */ - TCO_CPT15, /* Cougar Point */ - TCO_CPT16, /* Cougar Point */ - TCO_CPT17, /* Cougar Point */ - TCO_CPT18, /* Cougar Point */ - TCO_CPT19, /* Cougar Point */ - TCO_CPT20, /* Cougar Point */ - TCO_CPT21, /* Cougar Point */ - TCO_CPT22, /* Cougar Point */ - TCO_CPT23, /* Cougar Point */ - TCO_CPT24, /* Cougar Point */ - TCO_CPT25, /* Cougar Point */ - TCO_CPT26, /* Cougar Point */ - TCO_CPT27, /* Cougar Point */ - TCO_CPT28, /* Cougar Point */ - TCO_CPT29, /* Cougar Point */ - TCO_CPT30, /* Cougar Point */ - TCO_CPT31, /* Cougar Point */ - TCO_PBG1, /* Patsburg */ - TCO_PBG2, /* Patsburg */ + TCO_CPT, /* Cougar Point */ + TCO_CPTD, /* Cougar Point Desktop */ + TCO_CPTM, /* Cougar Point Mobile */ + TCO_PBG, /* Patsburg */ TCO_DH89XXCC, /* DH89xxCC */ - TCO_PPT0, /* Panther Point */ - TCO_PPT1, /* Panther Point */ - TCO_PPT2, /* Panther Point */ - TCO_PPT3, /* Panther Point */ - TCO_PPT4, /* Panther Point */ - TCO_PPT5, /* Panther Point */ - TCO_PPT6, /* Panther Point */ - TCO_PPT7, /* Panther Point */ - TCO_PPT8, /* Panther Point */ - TCO_PPT9, /* Panther Point */ - TCO_PPT10, /* Panther Point */ - TCO_PPT11, /* Panther Point */ - TCO_PPT12, /* Panther Point */ - TCO_PPT13, /* Panther Point */ - TCO_PPT14, /* Panther Point */ - TCO_PPT15, /* Panther Point */ - TCO_PPT16, /* Panther Point */ - TCO_PPT17, /* Panther Point */ - TCO_PPT18, /* Panther Point */ - TCO_PPT19, /* Panther Point */ - TCO_PPT20, /* Panther Point */ - TCO_PPT21, /* Panther Point */ - TCO_PPT22, /* Panther Point */ - TCO_PPT23, /* Panther Point */ - TCO_PPT24, /* Panther Point */ - TCO_PPT25, /* Panther Point */ - TCO_PPT26, /* Panther Point */ - TCO_PPT27, /* Panther Point */ - TCO_PPT28, /* Panther Point */ - TCO_PPT29, /* Panther Point */ - TCO_PPT30, /* Panther Point */ - TCO_PPT31, /* Panther Point */ + TCO_PPT, /* Panther Point */ }; static struct { @@ -244,83 +184,14 @@ static struct { {"3450", 2}, {"EP80579", 2}, {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Cougar Point", 2}, - {"Patsburg", 2}, + {"Cougar Point Desktop", 2}, + {"Cougar Point Mobile", 2}, {"Patsburg", 2}, {"DH89xxCC", 2}, {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, - {"Panther Point", 2}, {NULL, 0} }; -#define ITCO_PCI_DEVICE(dev, data) \ - .vendor = PCI_VENDOR_ID_INTEL, \ - .device = dev, \ - .subvendor = PCI_ANY_ID, \ - .subdevice = PCI_ANY_ID, \ - .class = 0, \ - .class_mask = 0, \ - .driver_data = data - /* * This data only exists for exporting the supported PCI ids * via MODULE_DEVICE_TABLE. We do not actually register a @@ -328,138 +199,138 @@ static struct { * functions that probably will be registered by other drivers. */ static DEFINE_PCI_DEVICE_TABLE(iTCO_wdt_pci_tbl) = { - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AA_0, TCO_ICH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801AB_0, TCO_ICH0)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_0, TCO_ICH2)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801BA_10, TCO_ICH2M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_0, TCO_ICH3)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801CA_12, TCO_ICH3M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_0, TCO_ICH4)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801DB_12, TCO_ICH4M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801E_0, TCO_CICH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_82801EB_0, TCO_ICH5)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB_1, TCO_6300ESB)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_0, TCO_ICH6)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_1, TCO_ICH6M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH6_2, TCO_ICH6W)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ESB2_0, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2671, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2672, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2673, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2674, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2675, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2676, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2677, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2678, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x2679, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267a, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267b, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267c, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267d, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267e, TCO_631XESB)}, - { ITCO_PCI_DEVICE(0x267f, TCO_631XESB)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_0, TCO_ICH7)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_30, TCO_ICH7DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_1, TCO_ICH7M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH7_31, TCO_ICH7MDH)}, - { ITCO_PCI_DEVICE(0x27bc, TCO_NM10)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_0, TCO_ICH8)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_2, TCO_ICH8DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_3, TCO_ICH8DO)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_4, TCO_ICH8M)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH8_1, TCO_ICH8ME)}, - { ITCO_PCI_DEVICE(0x2918, TCO_ICH9)}, - { ITCO_PCI_DEVICE(0x2916, TCO_ICH9R)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_2, TCO_ICH9DH)}, - { ITCO_PCI_DEVICE(PCI_DEVICE_ID_INTEL_ICH9_4, TCO_ICH9DO)}, - { ITCO_PCI_DEVICE(0x2919, TCO_ICH9M)}, - { ITCO_PCI_DEVICE(0x2917, TCO_ICH9ME)}, - { ITCO_PCI_DEVICE(0x3a18, TCO_ICH10)}, - { ITCO_PCI_DEVICE(0x3a16, TCO_ICH10R)}, - { ITCO_PCI_DEVICE(0x3a1a, TCO_ICH10D)}, - { ITCO_PCI_DEVICE(0x3a14, TCO_ICH10DO)}, - { ITCO_PCI_DEVICE(0x3b00, TCO_PCH)}, - { ITCO_PCI_DEVICE(0x3b01, TCO_PCHM)}, - { ITCO_PCI_DEVICE(0x3b02, TCO_P55)}, - { ITCO_PCI_DEVICE(0x3b03, TCO_PM55)}, - { ITCO_PCI_DEVICE(0x3b06, TCO_H55)}, - { ITCO_PCI_DEVICE(0x3b07, TCO_QM57)}, - { ITCO_PCI_DEVICE(0x3b08, TCO_H57)}, - { ITCO_PCI_DEVICE(0x3b09, TCO_HM55)}, - { ITCO_PCI_DEVICE(0x3b0a, TCO_Q57)}, - { ITCO_PCI_DEVICE(0x3b0b, TCO_HM57)}, - { ITCO_PCI_DEVICE(0x3b0d, TCO_PCHMSFF)}, - { ITCO_PCI_DEVICE(0x3b0f, TCO_QS57)}, - { ITCO_PCI_DEVICE(0x3b12, TCO_3400)}, - { ITCO_PCI_DEVICE(0x3b14, TCO_3420)}, - { ITCO_PCI_DEVICE(0x3b16, TCO_3450)}, - { ITCO_PCI_DEVICE(0x5031, TCO_EP80579)}, - { ITCO_PCI_DEVICE(0x1c41, TCO_CPT1)}, - { ITCO_PCI_DEVICE(0x1c42, TCO_CPT2)}, - { ITCO_PCI_DEVICE(0x1c43, TCO_CPT3)}, - { ITCO_PCI_DEVICE(0x1c44, TCO_CPT4)}, - { ITCO_PCI_DEVICE(0x1c45, TCO_CPT5)}, - { ITCO_PCI_DEVICE(0x1c46, TCO_CPT6)}, - { ITCO_PCI_DEVICE(0x1c47, TCO_CPT7)}, - { ITCO_PCI_DEVICE(0x1c48, TCO_CPT8)}, - { ITCO_PCI_DEVICE(0x1c49, TCO_CPT9)}, - { ITCO_PCI_DEVICE(0x1c4a, TCO_CPT10)}, - { ITCO_PCI_DEVICE(0x1c4b, TCO_CPT11)}, - { ITCO_PCI_DEVICE(0x1c4c, TCO_CPT12)}, - { ITCO_PCI_DEVICE(0x1c4d, TCO_CPT13)}, - { ITCO_PCI_DEVICE(0x1c4e, TCO_CPT14)}, - { ITCO_PCI_DEVICE(0x1c4f, TCO_CPT15)}, - { ITCO_PCI_DEVICE(0x1c50, TCO_CPT16)}, - { ITCO_PCI_DEVICE(0x1c51, TCO_CPT17)}, - { ITCO_PCI_DEVICE(0x1c52, TCO_CPT18)}, - { ITCO_PCI_DEVICE(0x1c53, TCO_CPT19)}, - { ITCO_PCI_DEVICE(0x1c54, TCO_CPT20)}, - { ITCO_PCI_DEVICE(0x1c55, TCO_CPT21)}, - { ITCO_PCI_DEVICE(0x1c56, TCO_CPT22)}, - { ITCO_PCI_DEVICE(0x1c57, TCO_CPT23)}, - { ITCO_PCI_DEVICE(0x1c58, TCO_CPT24)}, - { ITCO_PCI_DEVICE(0x1c59, TCO_CPT25)}, - { ITCO_PCI_DEVICE(0x1c5a, TCO_CPT26)}, - { ITCO_PCI_DEVICE(0x1c5b, TCO_CPT27)}, - { ITCO_PCI_DEVICE(0x1c5c, TCO_CPT28)}, - { ITCO_PCI_DEVICE(0x1c5d, TCO_CPT29)}, - { ITCO_PCI_DEVICE(0x1c5e, TCO_CPT30)}, - { ITCO_PCI_DEVICE(0x1c5f, TCO_CPT31)}, - { ITCO_PCI_DEVICE(0x1d40, TCO_PBG1)}, - { ITCO_PCI_DEVICE(0x1d41, TCO_PBG2)}, - { ITCO_PCI_DEVICE(0x2310, TCO_DH89XXCC)}, - { ITCO_PCI_DEVICE(0x1e40, TCO_PPT0)}, - { ITCO_PCI_DEVICE(0x1e41, TCO_PPT1)}, - { ITCO_PCI_DEVICE(0x1e42, TCO_PPT2)}, - { ITCO_PCI_DEVICE(0x1e43, TCO_PPT3)}, - { ITCO_PCI_DEVICE(0x1e44, TCO_PPT4)}, - { ITCO_PCI_DEVICE(0x1e45, TCO_PPT5)}, - { ITCO_PCI_DEVICE(0x1e46, TCO_PPT6)}, - { ITCO_PCI_DEVICE(0x1e47, TCO_PPT7)}, - { ITCO_PCI_DEVICE(0x1e48, TCO_PPT8)}, - { ITCO_PCI_DEVICE(0x1e49, TCO_PPT9)}, - { ITCO_PCI_DEVICE(0x1e4a, TCO_PPT10)}, - { ITCO_PCI_DEVICE(0x1e4b, TCO_PPT11)}, - { ITCO_PCI_DEVICE(0x1e4c, TCO_PPT12)}, - { ITCO_PCI_DEVICE(0x1e4d, TCO_PPT13)}, - { ITCO_PCI_DEVICE(0x1e4e, TCO_PPT14)}, - { ITCO_PCI_DEVICE(0x1e4f, TCO_PPT15)}, - { ITCO_PCI_DEVICE(0x1e50, TCO_PPT16)}, - { ITCO_PCI_DEVICE(0x1e51, TCO_PPT17)}, - { ITCO_PCI_DEVICE(0x1e52, TCO_PPT18)}, - { ITCO_PCI_DEVICE(0x1e53, TCO_PPT19)}, - { ITCO_PCI_DEVICE(0x1e54, TCO_PPT20)}, - { ITCO_PCI_DEVICE(0x1e55, TCO_PPT21)}, - { ITCO_PCI_DEVICE(0x1e56, TCO_PPT22)}, - { ITCO_PCI_DEVICE(0x1e57, TCO_PPT23)}, - { ITCO_PCI_DEVICE(0x1e58, TCO_PPT24)}, - { ITCO_PCI_DEVICE(0x1e59, TCO_PPT25)}, - { ITCO_PCI_DEVICE(0x1e5a, TCO_PPT26)}, - { ITCO_PCI_DEVICE(0x1e5b, TCO_PPT27)}, - { ITCO_PCI_DEVICE(0x1e5c, TCO_PPT28)}, - { ITCO_PCI_DEVICE(0x1e5d, TCO_PPT29)}, - { ITCO_PCI_DEVICE(0x1e5e, TCO_PPT30)}, - { ITCO_PCI_DEVICE(0x1e5f, TCO_PPT31)}, + { PCI_VDEVICE(INTEL, 0x2410), TCO_ICH}, + { PCI_VDEVICE(INTEL, 0x2420), TCO_ICH0}, + { PCI_VDEVICE(INTEL, 0x2440), TCO_ICH2}, + { PCI_VDEVICE(INTEL, 0x244c), TCO_ICH2M}, + { PCI_VDEVICE(INTEL, 0x2480), TCO_ICH3}, + { PCI_VDEVICE(INTEL, 0x248c), TCO_ICH3M}, + { PCI_VDEVICE(INTEL, 0x24c0), TCO_ICH4}, + { PCI_VDEVICE(INTEL, 0x24cc), TCO_ICH4M}, + { PCI_VDEVICE(INTEL, 0x2450), TCO_CICH}, + { PCI_VDEVICE(INTEL, 0x24d0), TCO_ICH5}, + { PCI_VDEVICE(INTEL, 0x25a1), TCO_6300ESB}, + { PCI_VDEVICE(INTEL, 0x2640), TCO_ICH6}, + { PCI_VDEVICE(INTEL, 0x2641), TCO_ICH6M}, + { PCI_VDEVICE(INTEL, 0x2642), TCO_ICH6W}, + { PCI_VDEVICE(INTEL, 0x2670), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2671), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2672), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2673), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2674), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2675), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2676), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2677), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2678), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x2679), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267a), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267b), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267c), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267d), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267e), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x267f), TCO_631XESB}, + { PCI_VDEVICE(INTEL, 0x27b8), TCO_ICH7}, + { PCI_VDEVICE(INTEL, 0x27b0), TCO_ICH7DH}, + { PCI_VDEVICE(INTEL, 0x27b9), TCO_ICH7M}, + { PCI_VDEVICE(INTEL, 0x27bd), TCO_ICH7MDH}, + { PCI_VDEVICE(INTEL, 0x27bc), TCO_NM10}, + { PCI_VDEVICE(INTEL, 0x2810), TCO_ICH8}, + { PCI_VDEVICE(INTEL, 0x2812), TCO_ICH8DH}, + { PCI_VDEVICE(INTEL, 0x2814), TCO_ICH8DO}, + { PCI_VDEVICE(INTEL, 0x2815), TCO_ICH8M}, + { PCI_VDEVICE(INTEL, 0x2811), TCO_ICH8ME}, + { PCI_VDEVICE(INTEL, 0x2918), TCO_ICH9}, + { PCI_VDEVICE(INTEL, 0x2916), TCO_ICH9R}, + { PCI_VDEVICE(INTEL, 0x2912), TCO_ICH9DH}, + { PCI_VDEVICE(INTEL, 0x2914), TCO_ICH9DO}, + { PCI_VDEVICE(INTEL, 0x2919), TCO_ICH9M}, + { PCI_VDEVICE(INTEL, 0x2917), TCO_ICH9ME}, + { PCI_VDEVICE(INTEL, 0x3a18), TCO_ICH10}, + { PCI_VDEVICE(INTEL, 0x3a16), TCO_ICH10R}, + { PCI_VDEVICE(INTEL, 0x3a1a), TCO_ICH10D}, + { PCI_VDEVICE(INTEL, 0x3a14), TCO_ICH10DO}, + { PCI_VDEVICE(INTEL, 0x3b00), TCO_PCH}, + { PCI_VDEVICE(INTEL, 0x3b01), TCO_PCHM}, + { PCI_VDEVICE(INTEL, 0x3b02), TCO_P55}, + { PCI_VDEVICE(INTEL, 0x3b03), TCO_PM55}, + { PCI_VDEVICE(INTEL, 0x3b06), TCO_H55}, + { PCI_VDEVICE(INTEL, 0x3b07), TCO_QM57}, + { PCI_VDEVICE(INTEL, 0x3b08), TCO_H57}, + { PCI_VDEVICE(INTEL, 0x3b09), TCO_HM55}, + { PCI_VDEVICE(INTEL, 0x3b0a), TCO_Q57}, + { PCI_VDEVICE(INTEL, 0x3b0b), TCO_HM57}, + { PCI_VDEVICE(INTEL, 0x3b0d), TCO_PCHMSFF}, + { PCI_VDEVICE(INTEL, 0x3b0f), TCO_QS57}, + { PCI_VDEVICE(INTEL, 0x3b12), TCO_3400}, + { PCI_VDEVICE(INTEL, 0x3b14), TCO_3420}, + { PCI_VDEVICE(INTEL, 0x3b16), TCO_3450}, + { PCI_VDEVICE(INTEL, 0x5031), TCO_EP80579}, + { PCI_VDEVICE(INTEL, 0x1c41), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c42), TCO_CPTD}, + { PCI_VDEVICE(INTEL, 0x1c43), TCO_CPTM}, + { PCI_VDEVICE(INTEL, 0x1c44), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c45), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c46), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c47), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c48), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c49), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4a), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4b), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4c), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4d), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4e), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c4f), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c50), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c51), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c52), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c53), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c54), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c55), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c56), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c57), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c58), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c59), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5a), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5b), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5c), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5d), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5e), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1c5f), TCO_CPT}, + { PCI_VDEVICE(INTEL, 0x1d40), TCO_PBG}, + { PCI_VDEVICE(INTEL, 0x1d41), TCO_PBG}, + { PCI_VDEVICE(INTEL, 0x2310), TCO_DH89XXCC}, + { PCI_VDEVICE(INTEL, 0x1e40), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e41), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e42), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e43), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e44), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e45), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e46), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e47), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e48), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e49), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4a), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4b), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4c), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4d), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4e), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e4f), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e50), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e51), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e52), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e53), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e54), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e55), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e56), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e57), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e58), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e59), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5a), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5b), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5c), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5d), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5e), TCO_PPT}, + { PCI_VDEVICE(INTEL, 0x1e5f), TCO_PPT}, { 0, }, /* End of list */ }; MODULE_DEVICE_TABLE(pci, iTCO_wdt_pci_tbl); @@ -1052,15 +923,10 @@ static void iTCO_wdt_shutdown(struct platform_device *dev) iTCO_wdt_stop(); } -#define iTCO_wdt_suspend NULL -#define iTCO_wdt_resume NULL - static struct platform_driver iTCO_wdt_driver = { .probe = iTCO_wdt_probe, .remove = __devexit_p(iTCO_wdt_remove), .shutdown = iTCO_wdt_shutdown, - .suspend = iTCO_wdt_suspend, - .resume = iTCO_wdt_resume, .driver = { .owner = THIS_MODULE, .name = DRV_NAME, diff --git a/drivers/watchdog/imx2_wdt.c b/drivers/watchdog/imx2_wdt.c index 86f7cac1026c..b8ef2c6dca7c 100644 --- a/drivers/watchdog/imx2_wdt.c +++ b/drivers/watchdog/imx2_wdt.c @@ -329,12 +329,18 @@ static void imx2_wdt_shutdown(struct platform_device *pdev) } } +static const struct of_device_id imx2_wdt_dt_ids[] = { + { .compatible = "fsl,imx21-wdt", }, + { /* sentinel */ } +}; + static struct platform_driver imx2_wdt_driver = { .remove = __exit_p(imx2_wdt_remove), .shutdown = imx2_wdt_shutdown, .driver = { .name = DRIVER_NAME, .owner = THIS_MODULE, + .of_match_table = imx2_wdt_dt_ids, }, }; diff --git a/drivers/watchdog/it8712f_wdt.c b/drivers/watchdog/it8712f_wdt.c index 6143f52ba6b8..8d2d8502d3e8 100644 --- a/drivers/watchdog/it8712f_wdt.c +++ b/drivers/watchdog/it8712f_wdt.c @@ -28,10 +28,10 @@ #include <linux/notifier.h> #include <linux/reboot.h> #include <linux/fs.h> -#include <linux/pci.h> #include <linux/spinlock.h> #include <linux/uaccess.h> #include <linux/io.h> +#include <linux/ioport.h> #define NAME "it8712f_wdt" @@ -51,7 +51,6 @@ MODULE_PARM_DESC(nowayout, "Disable watchdog shutdown on close"); static unsigned long wdt_open; static unsigned expect_close; -static spinlock_t io_lock; static unsigned char revision; /* Dog Food address - We use the game port address */ @@ -121,20 +120,26 @@ static inline void superio_select(int ldn) outb(ldn, VAL); } -static inline void superio_enter(void) +static inline int superio_enter(void) { - spin_lock(&io_lock); + /* + * Try to reserve REG and REG + 1 for exclusive access. + */ + if (!request_muxed_region(REG, 2, NAME)) + return -EBUSY; + outb(0x87, REG); outb(0x01, REG); outb(0x55, REG); outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0x02, REG); outb(0x02, VAL); - spin_unlock(&io_lock); + release_region(REG, 2); } static inline void it8712f_wdt_ping(void) @@ -173,10 +178,13 @@ static int it8712f_wdt_get_status(void) return 0; } -static void it8712f_wdt_enable(void) +static int it8712f_wdt_enable(void) { + int ret = superio_enter(); + if (ret) + return ret; + printk(KERN_DEBUG NAME ": enabling watchdog timer\n"); - superio_enter(); superio_select(LDN_GPIO); superio_outb(wdt_control_reg, WDT_CONTROL); @@ -186,13 +194,17 @@ static void it8712f_wdt_enable(void) superio_exit(); it8712f_wdt_ping(); + + return 0; } -static void it8712f_wdt_disable(void) +static int it8712f_wdt_disable(void) { - printk(KERN_DEBUG NAME ": disabling watchdog timer\n"); + int ret = superio_enter(); + if (ret) + return ret; - superio_enter(); + printk(KERN_DEBUG NAME ": disabling watchdog timer\n"); superio_select(LDN_GPIO); superio_outb(0, WDT_CONFIG); @@ -202,6 +214,7 @@ static void it8712f_wdt_disable(void) superio_outb(0, WDT_TIMEOUT); superio_exit(); + return 0; } static int it8712f_wdt_notify(struct notifier_block *this, @@ -252,6 +265,7 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, WDIOF_MAGICCLOSE, }; int value; + int ret; switch (cmd) { case WDIOC_GETSUPPORT: @@ -259,7 +273,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, return -EFAULT; return 0; case WDIOC_GETSTATUS: - superio_enter(); + ret = superio_enter(); + if (ret) + return ret; superio_select(LDN_GPIO); value = it8712f_wdt_get_status(); @@ -280,7 +296,9 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, if (value > (max_units * 60)) return -EINVAL; margin = value; - superio_enter(); + ret = superio_enter(); + if (ret) + return ret; superio_select(LDN_GPIO); it8712f_wdt_update_margin(); @@ -299,10 +317,14 @@ static long it8712f_wdt_ioctl(struct file *file, unsigned int cmd, static int it8712f_wdt_open(struct inode *inode, struct file *file) { + int ret; /* only allow one at a time */ if (test_and_set_bit(0, &wdt_open)) return -EBUSY; - it8712f_wdt_enable(); + + ret = it8712f_wdt_enable(); + if (ret) + return ret; return nonseekable_open(inode, file); } @@ -313,7 +335,8 @@ static int it8712f_wdt_release(struct inode *inode, struct file *file) ": watchdog device closed unexpectedly, will not" " disable the watchdog timer\n"); } else if (!nowayout) { - it8712f_wdt_disable(); + if (it8712f_wdt_disable()) + printk(KERN_WARNING NAME "Watchdog disable failed\n"); } expect_close = 0; clear_bit(0, &wdt_open); @@ -340,8 +363,10 @@ static int __init it8712f_wdt_find(unsigned short *address) { int err = -ENODEV; int chip_type; + int ret = superio_enter(); + if (ret) + return ret; - superio_enter(); chip_type = superio_inw(DEVID); if (chip_type != IT8712F_DEVID) goto exit; @@ -382,8 +407,6 @@ static int __init it8712f_wdt_init(void) { int err = 0; - spin_lock_init(&io_lock); - if (it8712f_wdt_find(&address)) return -ENODEV; @@ -392,7 +415,11 @@ static int __init it8712f_wdt_init(void) return -EBUSY; } - it8712f_wdt_disable(); + err = it8712f_wdt_disable(); + if (err) { + printk(KERN_ERR NAME ": unable to disable watchdog timer.\n"); + goto out; + } err = register_reboot_notifier(&it8712f_wdt_notifier); if (err) { diff --git a/drivers/watchdog/it87_wdt.c b/drivers/watchdog/it87_wdt.c index b1bc72f9a209..a2d9a1266a23 100644 --- a/drivers/watchdog/it87_wdt.c +++ b/drivers/watchdog/it87_wdt.c @@ -137,7 +137,6 @@ static unsigned int base, gpact, ciract, max_units, chip_type; static unsigned long wdt_status; -static DEFINE_SPINLOCK(spinlock); static int nogameport = DEFAULT_NOGAMEPORT; static int exclusive = DEFAULT_EXCLUSIVE; @@ -163,18 +162,26 @@ MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started, default=" /* Superio Chip */ -static inline void superio_enter(void) +static inline int superio_enter(void) { + /* + * Try to reserve REG and REG + 1 for exclusive access. + */ + if (!request_muxed_region(REG, 2, WATCHDOG_NAME)) + return -EBUSY; + outb(0x87, REG); outb(0x01, REG); outb(0x55, REG); outb(0x55, REG); + return 0; } static inline void superio_exit(void) { outb(0x02, REG); outb(0x02, VAL); + release_region(REG, 2); } static inline void superio_select(int ldn) @@ -255,12 +262,11 @@ static void wdt_keepalive(void) set_bit(WDTS_KEEPALIVE, &wdt_status); } -static void wdt_start(void) +static int wdt_start(void) { - unsigned long flags; - - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; superio_select(GPIO); if (test_bit(WDTS_USE_GP, &wdt_status)) @@ -270,15 +276,15 @@ static void wdt_start(void) wdt_update_timeout(); superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + + return 0; } -static void wdt_stop(void) +static int wdt_stop(void) { - unsigned long flags; - - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; superio_select(GPIO); superio_outb(0x00, WDTCTRL); @@ -288,7 +294,7 @@ static void wdt_stop(void) superio_outb(0x00, WDTVALMSB); superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + return 0; } /** @@ -303,8 +309,6 @@ static void wdt_stop(void) static int wdt_set_timeout(int t) { - unsigned long flags; - if (t < 1 || t > max_units * 60) return -EINVAL; @@ -313,14 +317,15 @@ static int wdt_set_timeout(int t) else timeout = t; - spin_lock_irqsave(&spinlock, flags); if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; + superio_select(GPIO); wdt_update_timeout(); superio_exit(); } - spin_unlock_irqrestore(&spinlock, flags); return 0; } @@ -339,12 +344,12 @@ static int wdt_set_timeout(int t) static int wdt_get_status(int *status) { - unsigned long flags; - *status = 0; if (testmode) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + int ret = superio_enter(); + if (ret) + return ret; + superio_select(GPIO); if (superio_inb(WDTCTRL) & WDT_ZERO) { superio_outb(0x00, WDTCTRL); @@ -353,7 +358,6 @@ static int wdt_get_status(int *status) } superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } if (test_and_clear_bit(WDTS_KEEPALIVE, &wdt_status)) *status |= WDIOF_KEEPALIVEPING; @@ -379,9 +383,17 @@ static int wdt_open(struct inode *inode, struct file *file) if (exclusive && test_and_set_bit(WDTS_DEV_OPEN, &wdt_status)) return -EBUSY; if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) { + int ret; if (nowayout && !test_and_set_bit(WDTS_LOCKED, &wdt_status)) __module_get(THIS_MODULE); - wdt_start(); + + ret = wdt_start(); + if (ret) { + clear_bit(WDTS_LOCKED, &wdt_status); + clear_bit(WDTS_TIMER_RUN, &wdt_status); + clear_bit(WDTS_DEV_OPEN, &wdt_status); + return ret; + } } return nonseekable_open(inode, file); } @@ -403,7 +415,16 @@ static int wdt_release(struct inode *inode, struct file *file) { if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { if (test_and_clear_bit(WDTS_EXPECTED, &wdt_status)) { - wdt_stop(); + int ret = wdt_stop(); + if (ret) { + /* + * Stop failed. Just keep the watchdog alive + * and hope nothing bad happens. + */ + set_bit(WDTS_EXPECTED, &wdt_status); + wdt_keepalive(); + return ret; + } clear_bit(WDTS_TIMER_RUN, &wdt_status); } else { wdt_keepalive(); @@ -484,7 +505,9 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) &ident, sizeof(ident)) ? -EFAULT : 0; case WDIOC_GETSTATUS: - wdt_get_status(&status); + rc = wdt_get_status(&status); + if (rc) + return rc; return put_user(status, uarg.i); case WDIOC_GETBOOTSTATUS: @@ -500,14 +523,22 @@ static long wdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) switch (new_options) { case WDIOS_DISABLECARD: - if (test_bit(WDTS_TIMER_RUN, &wdt_status)) - wdt_stop(); + if (test_bit(WDTS_TIMER_RUN, &wdt_status)) { + rc = wdt_stop(); + if (rc) + return rc; + } clear_bit(WDTS_TIMER_RUN, &wdt_status); return 0; case WDIOS_ENABLECARD: - if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) - wdt_start(); + if (!test_and_set_bit(WDTS_TIMER_RUN, &wdt_status)) { + rc = wdt_start(); + if (rc) { + clear_bit(WDTS_TIMER_RUN, &wdt_status); + return rc; + } + } return 0; default: @@ -560,16 +591,17 @@ static int __init it87_wdt_init(void) int rc = 0; int try_gameport = !nogameport; u8 chip_rev; - unsigned long flags; + int gp_rreq_fail = 0; wdt_status = 0; - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + rc = superio_enter(); + if (rc) + return rc; + chip_type = superio_inw(CHIPID); chip_rev = superio_inb(CHIPREV) & 0x0f; superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); switch (chip_type) { case IT8702_ID: @@ -603,8 +635,9 @@ static int __init it87_wdt_init(void) return -ENODEV; } - spin_lock_irqsave(&spinlock, flags); - superio_enter(); + rc = superio_enter(); + if (rc) + return rc; superio_select(GPIO); superio_outb(WDT_TOV1, WDTCFG); @@ -620,21 +653,16 @@ static int __init it87_wdt_init(void) } gpact = superio_inb(ACTREG); superio_outb(0x01, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); if (request_region(base, 1, WATCHDOG_NAME)) set_bit(WDTS_USE_GP, &wdt_status); else - rc = -EIO; - } else { - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); + gp_rreq_fail = 1; } /* If we haven't Gameport support, try to get CIR support */ if (!test_bit(WDTS_USE_GP, &wdt_status)) { if (!request_region(CIR_BASE, 8, WATCHDOG_NAME)) { - if (rc == -EIO) + if (gp_rreq_fail) printk(KERN_ERR PFX "I/O Address 0x%04x and 0x%04x" " already in use\n", base, CIR_BASE); @@ -646,21 +674,16 @@ static int __init it87_wdt_init(void) goto err_out; } base = CIR_BASE; - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(CIR); superio_outw(base, BASEREG); superio_outb(0x00, CIR_ILS); ciract = superio_inb(ACTREG); superio_outb(0x01, ACTREG); - if (rc == -EIO) { + if (gp_rreq_fail) { superio_select(GAMEPORT); superio_outb(gpact, ACTREG); } - - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } if (timeout < 1 || timeout > max_units * 60) { @@ -704,6 +727,7 @@ static int __init it87_wdt_init(void) "nogameport=%d)\n", chip_type, chip_rev, timeout, nowayout, testmode, exclusive, nogameport); + superio_exit(); return 0; err_out_reboot: @@ -711,49 +735,37 @@ err_out_reboot: err_out_region: release_region(base, test_bit(WDTS_USE_GP, &wdt_status) ? 1 : 8); if (!test_bit(WDTS_USE_GP, &wdt_status)) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(CIR); superio_outb(ciract, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } err_out: if (try_gameport) { - spin_lock_irqsave(&spinlock, flags); - superio_enter(); superio_select(GAMEPORT); superio_outb(gpact, ACTREG); - superio_exit(); - spin_unlock_irqrestore(&spinlock, flags); } + superio_exit(); return rc; } static void __exit it87_wdt_exit(void) { - unsigned long flags; - int nolock; - - nolock = !spin_trylock_irqsave(&spinlock, flags); - superio_enter(); - superio_select(GPIO); - superio_outb(0x00, WDTCTRL); - superio_outb(0x00, WDTCFG); - superio_outb(0x00, WDTVALLSB); - if (max_units > 255) - superio_outb(0x00, WDTVALMSB); - if (test_bit(WDTS_USE_GP, &wdt_status)) { - superio_select(GAMEPORT); - superio_outb(gpact, ACTREG); - } else { - superio_select(CIR); - superio_outb(ciract, ACTREG); + if (superio_enter() == 0) { + superio_select(GPIO); + superio_outb(0x00, WDTCTRL); + superio_outb(0x00, WDTCFG); + superio_outb(0x00, WDTVALLSB); + if (max_units > 255) + superio_outb(0x00, WDTVALMSB); + if (test_bit(WDTS_USE_GP, &wdt_status)) { + superio_select(GAMEPORT); + superio_outb(gpact, ACTREG); + } else { + superio_select(CIR); + superio_outb(ciract, ACTREG); + } + superio_exit(); } - superio_exit(); - if (!nolock) - spin_unlock_irqrestore(&spinlock, flags); misc_deregister(&wdt_miscdev); unregister_reboot_notifier(&wdt_notifier); diff --git a/drivers/watchdog/mpcore_wdt.c b/drivers/watchdog/mpcore_wdt.c index 2b4af222b5f2..4dc31024d26c 100644 --- a/drivers/watchdog/mpcore_wdt.c +++ b/drivers/watchdog/mpcore_wdt.c @@ -407,12 +407,35 @@ static int __devexit mpcore_wdt_remove(struct platform_device *dev) return 0; } +#ifdef CONFIG_PM +static int mpcore_wdt_suspend(struct platform_device *dev, pm_message_t msg) +{ + struct mpcore_wdt *wdt = platform_get_drvdata(dev); + mpcore_wdt_stop(wdt); /* Turn the WDT off */ + return 0; +} + +static int mpcore_wdt_resume(struct platform_device *dev) +{ + struct mpcore_wdt *wdt = platform_get_drvdata(dev); + /* re-activate timer */ + if (test_bit(0, &wdt->timer_alive)) + mpcore_wdt_start(wdt); + return 0; +} +#else +#define mpcore_wdt_suspend NULL +#define mpcore_wdt_resume NULL +#endif + /* work with hotplug and coldplug */ MODULE_ALIAS("platform:mpcore_wdt"); static struct platform_driver mpcore_wdt_driver = { .probe = mpcore_wdt_probe, .remove = __devexit_p(mpcore_wdt_remove), + .suspend = mpcore_wdt_suspend, + .resume = mpcore_wdt_resume, .shutdown = mpcore_wdt_shutdown, .driver = { .owner = THIS_MODULE, diff --git a/drivers/watchdog/mtx-1_wdt.c b/drivers/watchdog/mtx-1_wdt.c index 0430e093b1a0..ac37bb82392c 100644 --- a/drivers/watchdog/mtx-1_wdt.c +++ b/drivers/watchdog/mtx-1_wdt.c @@ -225,11 +225,11 @@ static int __devinit mtx1_wdt_probe(struct platform_device *pdev) ret = misc_register(&mtx1_wdt_misc); if (ret < 0) { - printk(KERN_ERR " mtx-1_wdt : failed to register\n"); + dev_err(&pdev->dev, "failed to register\n"); return ret; } mtx1_wdt_start(); - printk(KERN_INFO "MTX-1 Watchdog driver\n"); + dev_info(&pdev->dev, "MTX-1 Watchdog driver\n"); return 0; } diff --git a/drivers/watchdog/of_xilinx_wdt.c b/drivers/watchdog/of_xilinx_wdt.c new file mode 100644 index 000000000000..4ec741ac952c --- /dev/null +++ b/drivers/watchdog/of_xilinx_wdt.c @@ -0,0 +1,433 @@ +/* +* of_xilinx_wdt.c 1.01 A Watchdog Device Driver for Xilinx xps_timebase_wdt +* +* (C) Copyright 2011 (Alejandro Cabrera <aldaya@gmail.com>) +* +* ----------------------- +* +* This program is free software; you can redistribute it and/or +* modify it under the terms of the GNU General Public License +* as published by the Free Software Foundation; either version +* 2 of the License, or (at your option) any later version. +* +* ----------------------- +* 30-May-2011 Alejandro Cabrera <aldaya@gmail.com> +* - If "xlnx,wdt-enable-once" wasn't found on device tree the +* module will use CONFIG_WATCHDOG_NOWAYOUT +* - If the device tree parameters ("clock-frequency" and +* "xlnx,wdt-interval") wasn't found the driver won't +* know the wdt reset interval +*/ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/init.h> +#include <linux/ioport.h> +#include <linux/watchdog.h> +#include <linux/io.h> +#include <linux/uaccess.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/of_address.h> + +/* Register offsets for the Wdt device */ +#define XWT_TWCSR0_OFFSET 0x0 /* Control/Status Register0 */ +#define XWT_TWCSR1_OFFSET 0x4 /* Control/Status Register1 */ +#define XWT_TBR_OFFSET 0x8 /* Timebase Register Offset */ + +/* Control/Status Register Masks */ +#define XWT_CSR0_WRS_MASK 0x00000008 /* Reset status */ +#define XWT_CSR0_WDS_MASK 0x00000004 /* Timer state */ +#define XWT_CSR0_EWDT1_MASK 0x00000002 /* Enable bit 1 */ + +/* Control/Status Register 0/1 bits */ +#define XWT_CSRX_EWDT2_MASK 0x00000001 /* Enable bit 2 */ + +/* SelfTest constants */ +#define XWT_MAX_SELFTEST_LOOP_COUNT 0x00010000 +#define XWT_TIMER_FAILED 0xFFFFFFFF + +#define WATCHDOG_NAME "Xilinx Watchdog" +#define PFX WATCHDOG_NAME ": " + +struct xwdt_device { + struct resource res; + void __iomem *base; + u32 nowayout; + u32 wdt_interval; + u32 boot_status; +}; + +static struct xwdt_device xdev; + +static u32 timeout; +static u32 control_status_reg; +static u8 expect_close; +static u8 no_timeout; +static unsigned long driver_open; + +static DEFINE_SPINLOCK(spinlock); + +static void xwdt_start(void) +{ + spin_lock(&spinlock); + + /* Clean previous status and enable the watchdog timer */ + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK); + + iowrite32((control_status_reg | XWT_CSR0_EWDT1_MASK), + xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32(XWT_CSRX_EWDT2_MASK, xdev.base + XWT_TWCSR1_OFFSET); + + spin_unlock(&spinlock); +} + +static void xwdt_stop(void) +{ + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32((control_status_reg & ~XWT_CSR0_EWDT1_MASK), + xdev.base + XWT_TWCSR0_OFFSET); + + iowrite32(0, xdev.base + XWT_TWCSR1_OFFSET); + + spin_unlock(&spinlock); + printk(KERN_INFO PFX "Stopped!\n"); +} + +static void xwdt_keepalive(void) +{ + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + control_status_reg |= (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK); + iowrite32(control_status_reg, xdev.base + XWT_TWCSR0_OFFSET); + + spin_unlock(&spinlock); +} + +static void xwdt_get_status(int *status) +{ + int new_status; + + spin_lock(&spinlock); + + control_status_reg = ioread32(xdev.base + XWT_TWCSR0_OFFSET); + new_status = ((control_status_reg & + (XWT_CSR0_WRS_MASK | XWT_CSR0_WDS_MASK)) != 0); + spin_unlock(&spinlock); + + *status = 0; + if (new_status & 1) + *status |= WDIOF_CARDRESET; +} + +static u32 xwdt_selftest(void) +{ + int i; + u32 timer_value1; + u32 timer_value2; + + spin_lock(&spinlock); + + timer_value1 = ioread32(xdev.base + XWT_TBR_OFFSET); + timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET); + + for (i = 0; + ((i <= XWT_MAX_SELFTEST_LOOP_COUNT) && + (timer_value2 == timer_value1)); i++) { + timer_value2 = ioread32(xdev.base + XWT_TBR_OFFSET); + } + + spin_unlock(&spinlock); + + if (timer_value2 != timer_value1) + return ~XWT_TIMER_FAILED; + else + return XWT_TIMER_FAILED; +} + +static int xwdt_open(struct inode *inode, struct file *file) +{ + /* Only one process can handle the wdt at a time */ + if (test_and_set_bit(0, &driver_open)) + return -EBUSY; + + /* Make sure that the module are always loaded...*/ + if (xdev.nowayout) + __module_get(THIS_MODULE); + + xwdt_start(); + printk(KERN_INFO PFX "Started...\n"); + + return nonseekable_open(inode, file); +} + +static int xwdt_release(struct inode *inode, struct file *file) +{ + if (expect_close == 42) { + xwdt_stop(); + } else { + printk(KERN_CRIT PFX + "Unexpected close, not stopping watchdog!\n"); + xwdt_keepalive(); + } + + clear_bit(0, &driver_open); + expect_close = 0; + return 0; +} + +/* + * xwdt_write: + * @file: file handle to the watchdog + * @buf: buffer to write (unused as data does not matter here + * @count: count of bytes + * @ppos: pointer to the position to write. No seeks allowed + * + * A write to a watchdog device is defined as a keepalive signal. Any + * write of data will do, as we don't define content meaning. + */ +static ssize_t xwdt_write(struct file *file, const char __user *buf, + size_t len, loff_t *ppos) +{ + if (len) { + if (!xdev.nowayout) { + size_t i; + + /* In case it was set long ago */ + expect_close = 0; + + for (i = 0; i != len; i++) { + char c; + + if (get_user(c, buf + i)) + return -EFAULT; + if (c == 'V') + expect_close = 42; + } + } + xwdt_keepalive(); + } + return len; +} + +static const struct watchdog_info ident = { + .options = WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING, + .firmware_version = 1, + .identity = WATCHDOG_NAME, +}; + +/* + * xwdt_ioctl: + * @file: file handle to the device + * @cmd: watchdog command + * @arg: argument pointer + * + * The watchdog API defines a common set of functions for all watchdogs + * according to their available features. + */ +static long xwdt_ioctl(struct file *file, unsigned int cmd, unsigned long arg) +{ + int status; + + union { + struct watchdog_info __user *ident; + int __user *i; + } uarg; + + uarg.i = (int __user *)arg; + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user(uarg.ident, &ident, + sizeof(ident)) ? -EFAULT : 0; + + case WDIOC_GETBOOTSTATUS: + return put_user(xdev.boot_status, uarg.i); + + case WDIOC_GETSTATUS: + xwdt_get_status(&status); + return put_user(status, uarg.i); + + case WDIOC_KEEPALIVE: + xwdt_keepalive(); + return 0; + + case WDIOC_GETTIMEOUT: + if (no_timeout) + return -ENOTTY; + else + return put_user(timeout, uarg.i); + + default: + return -ENOTTY; + } +} + +static const struct file_operations xwdt_fops = { + .owner = THIS_MODULE, + .llseek = no_llseek, + .write = xwdt_write, + .open = xwdt_open, + .release = xwdt_release, + .unlocked_ioctl = xwdt_ioctl, +}; + +static struct miscdevice xwdt_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &xwdt_fops, +}; + +static int __devinit xwdt_probe(struct platform_device *pdev) +{ + int rc; + u32 *tmptr; + u32 *pfreq; + + no_timeout = 0; + + pfreq = (u32 *)of_get_property(pdev->dev.of_node->parent, + "clock-frequency", NULL); + + if (pfreq == NULL) { + printk(KERN_WARNING PFX + "The watchdog clock frequency cannot be obtained!\n"); + no_timeout = 1; + } + + rc = of_address_to_resource(pdev->dev.of_node, 0, &xdev.res); + if (rc) { + printk(KERN_WARNING PFX "invalid address!\n"); + return rc; + } + + tmptr = (u32 *)of_get_property(pdev->dev.of_node, + "xlnx,wdt-interval", NULL); + if (tmptr == NULL) { + printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-interval\"" + " not found in device tree!\n"); + no_timeout = 1; + } else { + xdev.wdt_interval = *tmptr; + } + + tmptr = (u32 *)of_get_property(pdev->dev.of_node, + "xlnx,wdt-enable-once", NULL); + if (tmptr == NULL) { + printk(KERN_WARNING PFX "Parameter \"xlnx,wdt-enable-once\"" + " not found in device tree!\n"); + xdev.nowayout = WATCHDOG_NOWAYOUT; + } + +/* + * Twice of the 2^wdt_interval / freq because the first wdt overflow is + * ignored (interrupt), reset is only generated at second wdt overflow + */ + if (!no_timeout) + timeout = 2 * ((1<<xdev.wdt_interval) / *pfreq); + + if (!request_mem_region(xdev.res.start, + xdev.res.end - xdev.res.start + 1, WATCHDOG_NAME)) { + rc = -ENXIO; + printk(KERN_ERR PFX "memory request failure!\n"); + goto err_out; + } + + xdev.base = ioremap(xdev.res.start, xdev.res.end - xdev.res.start + 1); + if (xdev.base == NULL) { + rc = -ENOMEM; + printk(KERN_ERR PFX "ioremap failure!\n"); + goto release_mem; + } + + rc = xwdt_selftest(); + if (rc == XWT_TIMER_FAILED) { + printk(KERN_ERR PFX "SelfTest routine error!\n"); + goto unmap_io; + } + + xwdt_get_status(&xdev.boot_status); + + rc = misc_register(&xwdt_miscdev); + if (rc) { + printk(KERN_ERR PFX + "cannot register miscdev on minor=%d (err=%d)\n", + xwdt_miscdev.minor, rc); + goto unmap_io; + } + + if (no_timeout) + printk(KERN_INFO PFX + "driver loaded (timeout=? sec, nowayout=%d)\n", + xdev.nowayout); + else + printk(KERN_INFO PFX + "driver loaded (timeout=%d sec, nowayout=%d)\n", + timeout, xdev.nowayout); + + expect_close = 0; + clear_bit(0, &driver_open); + + return 0; + +unmap_io: + iounmap(xdev.base); +release_mem: + release_mem_region(xdev.res.start, resource_size(&xdev.res)); +err_out: + return rc; +} + +static int __devexit xwdt_remove(struct platform_device *dev) +{ + misc_deregister(&xwdt_miscdev); + iounmap(xdev.base); + release_mem_region(xdev.res.start, resource_size(&xdev.res)); + + return 0; +} + +/* Match table for of_platform binding */ +static struct of_device_id __devinitdata xwdt_of_match[] = { + { .compatible = "xlnx,xps-timebase-wdt-1.01.a", }, + {}, +}; +MODULE_DEVICE_TABLE(of, xwdt_of_match); + +static struct platform_driver xwdt_driver = { + .probe = xwdt_probe, + .remove = __devexit_p(xwdt_remove), + .driver = { + .owner = THIS_MODULE, + .name = WATCHDOG_NAME, + .of_match_table = xwdt_of_match, + }, +}; + +static int __init xwdt_init(void) +{ + return platform_driver_register(&xwdt_driver); +} + +static void __exit xwdt_exit(void) +{ + platform_driver_unregister(&xwdt_driver); +} + +module_init(xwdt_init); +module_exit(xwdt_exit); + +MODULE_AUTHOR("Alejandro Cabrera <aldaya@gmail.com>"); +MODULE_DESCRIPTION("Xilinx Watchdog driver"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_MISCDEV(WATCHDOG_MINOR); diff --git a/drivers/watchdog/pc87413_wdt.c b/drivers/watchdog/pc87413_wdt.c index b7c139051575..e78d89986768 100644 --- a/drivers/watchdog/pc87413_wdt.c +++ b/drivers/watchdog/pc87413_wdt.c @@ -56,6 +56,7 @@ #define IO_DEFAULT 0x2E /* Address used on Portwell Boards */ static int io = IO_DEFAULT; +static int swc_base_addr = -1; static int timeout = DEFAULT_TIMEOUT; /* timeout value */ static unsigned long timer_enabled; /* is the timer enabled? */ @@ -116,9 +117,8 @@ static inline void pc87413_enable_swc(void) /* Read SWC I/O base address */ -static inline unsigned int pc87413_get_swc_base(void) +static void pc87413_get_swc_base_addr(void) { - unsigned int swc_base_addr = 0; unsigned char addr_l, addr_h = 0; /* Step 3: Read SWC I/O Base Address */ @@ -136,12 +136,11 @@ static inline unsigned int pc87413_get_swc_base(void) "Read SWC I/O Base Address: low %d, high %d, res %d\n", addr_l, addr_h, swc_base_addr); #endif - return swc_base_addr; } /* Select Bank 3 of SWC */ -static inline void pc87413_swc_bank3(unsigned int swc_base_addr) +static inline void pc87413_swc_bank3(void) { /* Step 4: Select Bank3 of SWC */ outb_p(inb(swc_base_addr + 0x0f) | 0x03, swc_base_addr + 0x0f); @@ -152,8 +151,7 @@ static inline void pc87413_swc_bank3(unsigned int swc_base_addr) /* Set watchdog timeout to x minutes */ -static inline void pc87413_programm_wdto(unsigned int swc_base_addr, - char pc87413_time) +static inline void pc87413_programm_wdto(char pc87413_time) { /* Step 5: Programm WDTO, Twd. */ outb_p(pc87413_time, swc_base_addr + WDTO); @@ -164,7 +162,7 @@ static inline void pc87413_programm_wdto(unsigned int swc_base_addr, /* Enable WDEN */ -static inline void pc87413_enable_wden(unsigned int swc_base_addr) +static inline void pc87413_enable_wden(void) { /* Step 6: Enable WDEN */ outb_p(inb(swc_base_addr + WDCTL) | 0x01, swc_base_addr + WDCTL); @@ -174,7 +172,7 @@ static inline void pc87413_enable_wden(unsigned int swc_base_addr) } /* Enable SW_WD_TREN */ -static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr) +static inline void pc87413_enable_sw_wd_tren(void) { /* Enable SW_WD_TREN */ outb_p(inb(swc_base_addr + WDCFG) | 0x80, swc_base_addr + WDCFG); @@ -185,7 +183,7 @@ static inline void pc87413_enable_sw_wd_tren(unsigned int swc_base_addr) /* Disable SW_WD_TREN */ -static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr) +static inline void pc87413_disable_sw_wd_tren(void) { /* Disable SW_WD_TREN */ outb_p(inb(swc_base_addr + WDCFG) & 0x7f, swc_base_addr + WDCFG); @@ -196,7 +194,7 @@ static inline void pc87413_disable_sw_wd_tren(unsigned int swc_base_addr) /* Enable SW_WD_TRG */ -static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr) +static inline void pc87413_enable_sw_wd_trg(void) { /* Enable SW_WD_TRG */ outb_p(inb(swc_base_addr + WDCTL) | 0x80, swc_base_addr + WDCTL); @@ -207,7 +205,7 @@ static inline void pc87413_enable_sw_wd_trg(unsigned int swc_base_addr) /* Disable SW_WD_TRG */ -static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr) +static inline void pc87413_disable_sw_wd_trg(void) { /* Disable SW_WD_TRG */ outb_p(inb(swc_base_addr + WDCTL) & 0x7f, swc_base_addr + WDCTL); @@ -222,18 +220,13 @@ static inline void pc87413_disable_sw_wd_trg(unsigned int swc_base_addr) static void pc87413_enable(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, timeout); - pc87413_enable_wden(swc_base_addr); - pc87413_enable_sw_wd_tren(swc_base_addr); - pc87413_enable_sw_wd_trg(swc_base_addr); + pc87413_swc_bank3(); + pc87413_programm_wdto(timeout); + pc87413_enable_wden(); + pc87413_enable_sw_wd_tren(); + pc87413_enable_sw_wd_trg(); spin_unlock(&io_lock); } @@ -242,17 +235,12 @@ static void pc87413_enable(void) static void pc87413_disable(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_disable_sw_wd_tren(swc_base_addr); - pc87413_disable_sw_wd_trg(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, 0); + pc87413_swc_bank3(); + pc87413_disable_sw_wd_tren(); + pc87413_disable_sw_wd_trg(); + pc87413_programm_wdto(0); spin_unlock(&io_lock); } @@ -261,20 +249,15 @@ static void pc87413_disable(void) static void pc87413_refresh(void) { - unsigned int swc_base_addr; - spin_lock(&io_lock); - pc87413_select_wdt_out(); - pc87413_enable_swc(); - swc_base_addr = pc87413_get_swc_base(); - pc87413_swc_bank3(swc_base_addr); - pc87413_disable_sw_wd_tren(swc_base_addr); - pc87413_disable_sw_wd_trg(swc_base_addr); - pc87413_programm_wdto(swc_base_addr, timeout); - pc87413_enable_wden(swc_base_addr); - pc87413_enable_sw_wd_tren(swc_base_addr); - pc87413_enable_sw_wd_trg(swc_base_addr); + pc87413_swc_bank3(); + pc87413_disable_sw_wd_tren(); + pc87413_disable_sw_wd_trg(); + pc87413_programm_wdto(timeout); + pc87413_enable_wden(); + pc87413_enable_sw_wd_tren(); + pc87413_enable_sw_wd_trg(); spin_unlock(&io_lock); } @@ -528,7 +511,8 @@ static int __init pc87413_init(void) printk(KERN_INFO PFX "Version " VERSION " at io 0x%X\n", WDT_INDEX_IO_PORT); - /* request_region(io, 2, "pc87413"); */ + if (!request_muxed_region(io, 2, MODNAME)) + return -EBUSY; ret = register_reboot_notifier(&pc87413_notifier); if (ret != 0) { @@ -541,12 +525,32 @@ static int __init pc87413_init(void) printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n", WATCHDOG_MINOR, ret); - unregister_reboot_notifier(&pc87413_notifier); - return ret; + goto reboot_unreg; } printk(KERN_INFO PFX "initialized. timeout=%d min \n", timeout); + + pc87413_select_wdt_out(); + pc87413_enable_swc(); + pc87413_get_swc_base_addr(); + + if (!request_region(swc_base_addr, 0x20, MODNAME)) { + printk(KERN_ERR PFX + "cannot request SWC region at 0x%x\n", swc_base_addr); + ret = -EBUSY; + goto misc_unreg; + } + pc87413_enable(); + + release_region(io, 2); return 0; + +misc_unreg: + misc_deregister(&pc87413_miscdev); +reboot_unreg: + unregister_reboot_notifier(&pc87413_notifier); + release_region(io, 2); + return ret; } /** @@ -569,7 +573,7 @@ static void __exit pc87413_exit(void) misc_deregister(&pc87413_miscdev); unregister_reboot_notifier(&pc87413_notifier); - /* release_region(io, 2); */ + release_region(swc_base_addr, 0x20); printk(KERN_INFO MODNAME " watchdog component driver removed.\n"); } diff --git a/drivers/watchdog/s3c2410_wdt.c b/drivers/watchdog/s3c2410_wdt.c index f7f5aa00df60..30da88f47cd3 100644 --- a/drivers/watchdog/s3c2410_wdt.c +++ b/drivers/watchdog/s3c2410_wdt.c @@ -589,6 +589,15 @@ static int s3c2410wdt_resume(struct platform_device *dev) #define s3c2410wdt_resume NULL #endif /* CONFIG_PM */ +#ifdef CONFIG_OF +static const struct of_device_id s3c2410_wdt_match[] = { + { .compatible = "samsung,s3c2410-wdt" }, + {}, +}; +MODULE_DEVICE_TABLE(of, s3c2410_wdt_match); +#else +#define s3c2410_wdt_match NULL +#endif static struct platform_driver s3c2410wdt_driver = { .probe = s3c2410wdt_probe, @@ -599,6 +608,7 @@ static struct platform_driver s3c2410wdt_driver = { .driver = { .owner = THIS_MODULE, .name = "s3c2410-wdt", + .of_match_table = s3c2410_wdt_match, }, }; diff --git a/drivers/watchdog/sch311x_wdt.c b/drivers/watchdog/sch311x_wdt.c index c7cf4b01f58d..029467e34636 100644 --- a/drivers/watchdog/sch311x_wdt.c +++ b/drivers/watchdog/sch311x_wdt.c @@ -472,15 +472,10 @@ static void sch311x_wdt_shutdown(struct platform_device *dev) sch311x_wdt_stop(); } -#define sch311x_wdt_suspend NULL -#define sch311x_wdt_resume NULL - static struct platform_driver sch311x_wdt_driver = { .probe = sch311x_wdt_probe, .remove = __devexit_p(sch311x_wdt_remove), .shutdown = sch311x_wdt_shutdown, - .suspend = sch311x_wdt_suspend, - .resume = sch311x_wdt_resume, .driver = { .owner = THIS_MODULE, .name = DRV_NAME, diff --git a/drivers/watchdog/sp805_wdt.c b/drivers/watchdog/sp805_wdt.c index 0d80e08b6439..cc2cfbe33b30 100644 --- a/drivers/watchdog/sp805_wdt.c +++ b/drivers/watchdog/sp805_wdt.c @@ -134,6 +134,8 @@ static void wdt_enable(void) writel(INT_ENABLE | RESET_ENABLE, wdt->base + WDTCONTROL); writel(LOCK, wdt->base + WDTLOCK); + /* Flush posted writes. */ + readl(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); } @@ -144,9 +146,10 @@ static void wdt_disable(void) writel(UNLOCK, wdt->base + WDTLOCK); writel(0, wdt->base + WDTCONTROL); - writel(0, wdt->base + WDTLOAD); writel(LOCK, wdt->base + WDTLOCK); + /* Flush posted writes. */ + readl(wdt->base + WDTLOCK); spin_unlock(&wdt->lock); } diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c new file mode 100644 index 000000000000..cfa1a1518aad --- /dev/null +++ b/drivers/watchdog/watchdog_core.c @@ -0,0 +1,111 @@ +/* + * watchdog_core.c + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> /* For EXPORT_SYMBOL/module stuff/... */ +#include <linux/types.h> /* For standard types */ +#include <linux/errno.h> /* For the -ENODEV/... values */ +#include <linux/kernel.h> /* For printk/panic/... */ +#include <linux/watchdog.h> /* For watchdog specific items */ +#include <linux/init.h> /* For __init/__exit/... */ + +#include "watchdog_dev.h" /* For watchdog_dev_register/... */ + +/** + * watchdog_register_device() - register a watchdog device + * @wdd: watchdog device + * + * Register a watchdog device with the kernel so that the + * watchdog timer can be accessed from userspace. + * + * A zero is returned on success and a negative errno code for + * failure. + */ +int watchdog_register_device(struct watchdog_device *wdd) +{ + int ret; + + if (wdd == NULL || wdd->info == NULL || wdd->ops == NULL) + return -EINVAL; + + /* Mandatory operations need to be supported */ + if (wdd->ops->start == NULL || wdd->ops->stop == NULL) + return -EINVAL; + + /* + * Check that we have valid min and max timeout values, if + * not reset them both to 0 (=not used or unknown) + */ + if (wdd->min_timeout > wdd->max_timeout) { + pr_info("Invalid min and max timeout values, resetting to 0!\n"); + wdd->min_timeout = 0; + wdd->max_timeout = 0; + } + + /* + * Note: now that all watchdog_device data has been verified, we + * will not check this anymore in other functions. If data gets + * corrupted in a later stage then we expect a kernel panic! + */ + + /* We only support 1 watchdog device via the /dev/watchdog interface */ + ret = watchdog_dev_register(wdd); + if (ret) { + pr_err("error registering /dev/watchdog (err=%d).\n", ret); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(watchdog_register_device); + +/** + * watchdog_unregister_device() - unregister a watchdog device + * @wdd: watchdog device to unregister + * + * Unregister a watchdog device that was previously successfully + * registered with watchdog_register_device(). + */ +void watchdog_unregister_device(struct watchdog_device *wdd) +{ + int ret; + + if (wdd == NULL) + return; + + ret = watchdog_dev_unregister(wdd); + if (ret) + pr_err("error unregistering /dev/watchdog (err=%d).\n", ret); +} +EXPORT_SYMBOL_GPL(watchdog_unregister_device); + +MODULE_AUTHOR("Alan Cox <alan@lxorguk.ukuu.org.uk>"); +MODULE_AUTHOR("Wim Van Sebroeck <wim@iguana.be>"); +MODULE_DESCRIPTION("WatchDog Timer Driver Core"); +MODULE_LICENSE("GPL"); diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c new file mode 100644 index 000000000000..d33520d0b4c9 --- /dev/null +++ b/drivers/watchdog/watchdog_dev.c @@ -0,0 +1,395 @@ +/* + * watchdog_dev.c + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * This part of the generic code takes care of the following + * misc device: /dev/watchdog. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> /* For module stuff/... */ +#include <linux/types.h> /* For standard types (like size_t) */ +#include <linux/errno.h> /* For the -ENODEV/... values */ +#include <linux/kernel.h> /* For printk/panic/... */ +#include <linux/fs.h> /* For file operations */ +#include <linux/watchdog.h> /* For watchdog specific items */ +#include <linux/miscdevice.h> /* For handling misc devices */ +#include <linux/init.h> /* For __init/__exit/... */ +#include <linux/uaccess.h> /* For copy_to_user/put_user/... */ + +/* make sure we only register one /dev/watchdog device */ +static unsigned long watchdog_dev_busy; +/* the watchdog device behind /dev/watchdog */ +static struct watchdog_device *wdd; + +/* + * watchdog_ping: ping the watchdog. + * @wddev: the watchdog device to ping + * + * If the watchdog has no own ping operation then it needs to be + * restarted via the start operation. This wrapper function does + * exactly that. + * We only ping when the watchdog device is running. + */ + +static int watchdog_ping(struct watchdog_device *wddev) +{ + if (test_bit(WDOG_ACTIVE, &wdd->status)) { + if (wddev->ops->ping) + return wddev->ops->ping(wddev); /* ping the watchdog */ + else + return wddev->ops->start(wddev); /* restart watchdog */ + } + return 0; +} + +/* + * watchdog_start: wrapper to start the watchdog. + * @wddev: the watchdog device to start + * + * Start the watchdog if it is not active and mark it active. + * This function returns zero on success or a negative errno code for + * failure. + */ + +static int watchdog_start(struct watchdog_device *wddev) +{ + int err; + + if (!test_bit(WDOG_ACTIVE, &wdd->status)) { + err = wddev->ops->start(wddev); + if (err < 0) + return err; + + set_bit(WDOG_ACTIVE, &wdd->status); + } + return 0; +} + +/* + * watchdog_stop: wrapper to stop the watchdog. + * @wddev: the watchdog device to stop + * + * Stop the watchdog if it is still active and unmark it active. + * This function returns zero on success or a negative errno code for + * failure. + * If the 'nowayout' feature was set, the watchdog cannot be stopped. + */ + +static int watchdog_stop(struct watchdog_device *wddev) +{ + int err = -EBUSY; + + if (test_bit(WDOG_NO_WAY_OUT, &wdd->status)) { + pr_info("%s: nowayout prevents watchdog to be stopped!\n", + wdd->info->identity); + return err; + } + + if (test_bit(WDOG_ACTIVE, &wdd->status)) { + err = wddev->ops->stop(wddev); + if (err < 0) + return err; + + clear_bit(WDOG_ACTIVE, &wdd->status); + } + return 0; +} + +/* + * watchdog_write: writes to the watchdog. + * @file: file from VFS + * @data: user address of data + * @len: length of data + * @ppos: pointer to the file offset + * + * A write to a watchdog device is defined as a keepalive ping. + * Writing the magic 'V' sequence allows the next close to turn + * off the watchdog (if 'nowayout' is not set). + */ + +static ssize_t watchdog_write(struct file *file, const char __user *data, + size_t len, loff_t *ppos) +{ + size_t i; + char c; + + if (len == 0) + return 0; + + /* + * Note: just in case someone wrote the magic character + * five months ago... + */ + clear_bit(WDOG_ALLOW_RELEASE, &wdd->status); + + /* scan to see whether or not we got the magic character */ + for (i = 0; i != len; i++) { + if (get_user(c, data + i)) + return -EFAULT; + if (c == 'V') + set_bit(WDOG_ALLOW_RELEASE, &wdd->status); + } + + /* someone wrote to us, so we send the watchdog a keepalive ping */ + watchdog_ping(wdd); + + return len; +} + +/* + * watchdog_ioctl: handle the different ioctl's for the watchdog device. + * @file: file handle to the device + * @cmd: watchdog command + * @arg: argument pointer + * + * The watchdog API defines a common set of functions for all watchdogs + * according to their available features. + */ + +static long watchdog_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + int __user *p = argp; + unsigned int val; + int err; + + if (wdd->ops->ioctl) { + err = wdd->ops->ioctl(wdd, cmd, arg); + if (err != -ENOIOCTLCMD) + return err; + } + + switch (cmd) { + case WDIOC_GETSUPPORT: + return copy_to_user(argp, wdd->info, + sizeof(struct watchdog_info)) ? -EFAULT : 0; + case WDIOC_GETSTATUS: + val = wdd->ops->status ? wdd->ops->status(wdd) : 0; + return put_user(val, p); + case WDIOC_GETBOOTSTATUS: + return put_user(wdd->bootstatus, p); + case WDIOC_SETOPTIONS: + if (get_user(val, p)) + return -EFAULT; + if (val & WDIOS_DISABLECARD) { + err = watchdog_stop(wdd); + if (err < 0) + return err; + } + if (val & WDIOS_ENABLECARD) { + err = watchdog_start(wdd); + if (err < 0) + return err; + } + return 0; + case WDIOC_KEEPALIVE: + if (!(wdd->info->options & WDIOF_KEEPALIVEPING)) + return -EOPNOTSUPP; + watchdog_ping(wdd); + return 0; + case WDIOC_SETTIMEOUT: + if ((wdd->ops->set_timeout == NULL) || + !(wdd->info->options & WDIOF_SETTIMEOUT)) + return -EOPNOTSUPP; + if (get_user(val, p)) + return -EFAULT; + if ((wdd->max_timeout != 0) && + (val < wdd->min_timeout || val > wdd->max_timeout)) + return -EINVAL; + err = wdd->ops->set_timeout(wdd, val); + if (err < 0) + return err; + wdd->timeout = val; + /* If the watchdog is active then we send a keepalive ping + * to make sure that the watchdog keep's running (and if + * possible that it takes the new timeout) */ + watchdog_ping(wdd); + /* Fall */ + case WDIOC_GETTIMEOUT: + /* timeout == 0 means that we don't know the timeout */ + if (wdd->timeout == 0) + return -EOPNOTSUPP; + return put_user(wdd->timeout, p); + default: + return -ENOTTY; + } +} + +/* + * watchdog_open: open the /dev/watchdog device. + * @inode: inode of device + * @file: file handle to device + * + * When the /dev/watchdog device gets opened, we start the watchdog. + * Watch out: the /dev/watchdog device is single open, so we make sure + * it can only be opened once. + */ + +static int watchdog_open(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + + /* the watchdog is single open! */ + if (test_and_set_bit(WDOG_DEV_OPEN, &wdd->status)) + return -EBUSY; + + /* + * If the /dev/watchdog device is open, we don't want the module + * to be unloaded. + */ + if (!try_module_get(wdd->ops->owner)) + goto out; + + err = watchdog_start(wdd); + if (err < 0) + goto out_mod; + + /* dev/watchdog is a virtual (and thus non-seekable) filesystem */ + return nonseekable_open(inode, file); + +out_mod: + module_put(wdd->ops->owner); +out: + clear_bit(WDOG_DEV_OPEN, &wdd->status); + return err; +} + +/* + * watchdog_release: release the /dev/watchdog device. + * @inode: inode of device + * @file: file handle to device + * + * This is the code for when /dev/watchdog gets closed. We will only + * stop the watchdog when we have received the magic char (and nowayout + * was not set), else the watchdog will keep running. + */ + +static int watchdog_release(struct inode *inode, struct file *file) +{ + int err = -EBUSY; + + /* + * We only stop the watchdog if we received the magic character + * or if WDIOF_MAGICCLOSE is not set. If nowayout was set then + * watchdog_stop will fail. + */ + if (test_and_clear_bit(WDOG_ALLOW_RELEASE, &wdd->status) || + !(wdd->info->options & WDIOF_MAGICCLOSE)) + err = watchdog_stop(wdd); + + /* If the watchdog was not stopped, send a keepalive ping */ + if (err < 0) { + pr_crit("%s: watchdog did not stop!\n", wdd->info->identity); + watchdog_ping(wdd); + } + + /* Allow the owner module to be unloaded again */ + module_put(wdd->ops->owner); + + /* make sure that /dev/watchdog can be re-opened */ + clear_bit(WDOG_DEV_OPEN, &wdd->status); + + return 0; +} + +static const struct file_operations watchdog_fops = { + .owner = THIS_MODULE, + .write = watchdog_write, + .unlocked_ioctl = watchdog_ioctl, + .open = watchdog_open, + .release = watchdog_release, +}; + +static struct miscdevice watchdog_miscdev = { + .minor = WATCHDOG_MINOR, + .name = "watchdog", + .fops = &watchdog_fops, +}; + +/* + * watchdog_dev_register: + * @watchdog: watchdog device + * + * Register a watchdog device as /dev/watchdog. /dev/watchdog + * is actually a miscdevice and thus we set it up like that. + */ + +int watchdog_dev_register(struct watchdog_device *watchdog) +{ + int err; + + /* Only one device can register for /dev/watchdog */ + if (test_and_set_bit(0, &watchdog_dev_busy)) { + pr_err("only one watchdog can use /dev/watchdog.\n"); + return -EBUSY; + } + + wdd = watchdog; + + err = misc_register(&watchdog_miscdev); + if (err != 0) { + pr_err("%s: cannot register miscdev on minor=%d (err=%d).\n", + watchdog->info->identity, WATCHDOG_MINOR, err); + goto out; + } + + return 0; + +out: + wdd = NULL; + clear_bit(0, &watchdog_dev_busy); + return err; +} + +/* + * watchdog_dev_unregister: + * @watchdog: watchdog device + * + * Deregister the /dev/watchdog device. + */ + +int watchdog_dev_unregister(struct watchdog_device *watchdog) +{ + /* Check that a watchdog device was registered in the past */ + if (!test_bit(0, &watchdog_dev_busy) || !wdd) + return -ENODEV; + + /* We can only unregister the watchdog device that was registered */ + if (watchdog != wdd) { + pr_err("%s: watchdog was not registered as /dev/watchdog.\n", + watchdog->info->identity); + return -ENODEV; + } + + misc_deregister(&watchdog_miscdev); + wdd = NULL; + clear_bit(0, &watchdog_dev_busy); + return 0; +} diff --git a/drivers/watchdog/watchdog_dev.h b/drivers/watchdog/watchdog_dev.h new file mode 100644 index 000000000000..bc7612be25ce --- /dev/null +++ b/drivers/watchdog/watchdog_dev.h @@ -0,0 +1,33 @@ +/* + * watchdog_core.h + * + * (c) Copyright 2008-2011 Alan Cox <alan@lxorguk.ukuu.org.uk>, + * All Rights Reserved. + * + * (c) Copyright 2008-2011 Wim Van Sebroeck <wim@iguana.be>. + * + * This source code is part of the generic code that can be used + * by all the watchdog timer drivers. + * + * Based on source code of the following authors: + * Matt Domsch <Matt_Domsch@dell.com>, + * Rob Radez <rob@osinvestor.com>, + * Rusty Lynch <rusty@linux.co.intel.com> + * Satyam Sharma <satyam@infradead.org> + * Randy Dunlap <randy.dunlap@oracle.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Neither Alan Cox, CymruNet Ltd., Wim Van Sebroeck nor Iguana vzw. + * admit liability nor provide warranty for any of this software. + * This material is provided "AS-IS" and at no charge. + */ + +/* + * Functions/procedures to be called by the core + */ +int watchdog_dev_register(struct watchdog_device *); +int watchdog_dev_unregister(struct watchdog_device *); diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 52d7eca8c7bf..502b9e988679 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h @@ -34,6 +34,9 @@ struct btrfs_inode { */ struct btrfs_key location; + /* Lock for counters */ + spinlock_t lock; + /* the extent_tree has caches of all the extent mappings to disk */ struct extent_map_tree extent_tree; @@ -134,8 +137,8 @@ struct btrfs_inode { * items we think we'll end up using, and reserved_extents is the number * of extent items we've reserved metadata for. */ - atomic_t outstanding_extents; - atomic_t reserved_extents; + unsigned outstanding_extents; + unsigned reserved_extents; /* * ordered_data_close is set by truncate when a file that used @@ -184,4 +187,13 @@ static inline void btrfs_i_size_write(struct inode *inode, u64 size) BTRFS_I(inode)->disk_i_size = size; } +static inline bool btrfs_is_free_space_inode(struct btrfs_root *root, + struct inode *inode) +{ + if (root == root->fs_info->tree_root || + BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) + return true; + return false; +} + #endif diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 2e667868e0d2..011cab3aca8d 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -54,8 +54,13 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) { int i; for (i = 0; i < BTRFS_MAX_LEVEL; i++) { - if (p->nodes[i] && p->locks[i]) - btrfs_set_lock_blocking(p->nodes[i]); + if (!p->nodes[i] || !p->locks[i]) + continue; + btrfs_set_lock_blocking_rw(p->nodes[i], p->locks[i]); + if (p->locks[i] == BTRFS_READ_LOCK) + p->locks[i] = BTRFS_READ_LOCK_BLOCKING; + else if (p->locks[i] == BTRFS_WRITE_LOCK) + p->locks[i] = BTRFS_WRITE_LOCK_BLOCKING; } } @@ -68,7 +73,7 @@ noinline void btrfs_set_path_blocking(struct btrfs_path *p) * for held */ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, - struct extent_buffer *held) + struct extent_buffer *held, int held_rw) { int i; @@ -79,19 +84,29 @@ noinline void btrfs_clear_path_blocking(struct btrfs_path *p, * really sure by forcing the path to blocking before we clear * the path blocking. */ - if (held) - btrfs_set_lock_blocking(held); + if (held) { + btrfs_set_lock_blocking_rw(held, held_rw); + if (held_rw == BTRFS_WRITE_LOCK) + held_rw = BTRFS_WRITE_LOCK_BLOCKING; + else if (held_rw == BTRFS_READ_LOCK) + held_rw = BTRFS_READ_LOCK_BLOCKING; + } btrfs_set_path_blocking(p); #endif for (i = BTRFS_MAX_LEVEL - 1; i >= 0; i--) { - if (p->nodes[i] && p->locks[i]) - btrfs_clear_lock_blocking(p->nodes[i]); + if (p->nodes[i] && p->locks[i]) { + btrfs_clear_lock_blocking_rw(p->nodes[i], p->locks[i]); + if (p->locks[i] == BTRFS_WRITE_LOCK_BLOCKING) + p->locks[i] = BTRFS_WRITE_LOCK; + else if (p->locks[i] == BTRFS_READ_LOCK_BLOCKING) + p->locks[i] = BTRFS_READ_LOCK; + } } #ifdef CONFIG_DEBUG_LOCK_ALLOC if (held) - btrfs_clear_lock_blocking(held); + btrfs_clear_lock_blocking_rw(held, held_rw); #endif } @@ -119,7 +134,7 @@ noinline void btrfs_release_path(struct btrfs_path *p) if (!p->nodes[i]) continue; if (p->locks[i]) { - btrfs_tree_unlock(p->nodes[i]); + btrfs_tree_unlock_rw(p->nodes[i], p->locks[i]); p->locks[i] = 0; } free_extent_buffer(p->nodes[i]); @@ -167,6 +182,25 @@ struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root) return eb; } +/* loop around taking references on and locking the root node of the + * tree until you end up with a lock on the root. A locked buffer + * is returned, with a reference held. + */ +struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) +{ + struct extent_buffer *eb; + + while (1) { + eb = btrfs_root_node(root); + btrfs_tree_read_lock(eb); + if (eb == root->node) + break; + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + } + return eb; +} + /* cowonly root (everything not a reference counted cow subvolume), just get * put onto a simple dirty list. transaction.c walks this to make sure they * get properly updated on disk. @@ -626,14 +660,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, for (i = start_slot; i < end_slot; i++) { int close = 1; - if (!parent->map_token) { - map_extent_buffer(parent, - btrfs_node_key_ptr_offset(i), - sizeof(struct btrfs_key_ptr), - &parent->map_token, &parent->kaddr, - &parent->map_start, &parent->map_len, - KM_USER1); - } btrfs_node_key(parent, &disk_key, i); if (!progress_passed && comp_keys(&disk_key, progress) < 0) continue; @@ -656,11 +682,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, last_block = blocknr; continue; } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) @@ -701,11 +722,6 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, btrfs_tree_unlock(cur); free_extent_buffer(cur); } - if (parent->map_token) { - unmap_extent_buffer(parent, parent->map_token, - KM_USER1); - parent->map_token = NULL; - } return err; } @@ -746,7 +762,6 @@ static noinline int generic_bin_search(struct extent_buffer *eb, struct btrfs_disk_key *tmp = NULL; struct btrfs_disk_key unaligned; unsigned long offset; - char *map_token = NULL; char *kaddr = NULL; unsigned long map_start = 0; unsigned long map_len = 0; @@ -756,18 +771,13 @@ static noinline int generic_bin_search(struct extent_buffer *eb, mid = (low + high) / 2; offset = p + mid * item_size; - if (!map_token || offset < map_start || + if (!kaddr || offset < map_start || (offset + sizeof(struct btrfs_disk_key)) > map_start + map_len) { - if (map_token) { - unmap_extent_buffer(eb, map_token, KM_USER0); - map_token = NULL; - } err = map_private_extent_buffer(eb, offset, sizeof(struct btrfs_disk_key), - &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + &kaddr, &map_start, &map_len); if (!err) { tmp = (struct btrfs_disk_key *)(kaddr + offset - @@ -790,14 +800,10 @@ static noinline int generic_bin_search(struct extent_buffer *eb, high = mid; else { *slot = mid; - if (map_token) - unmap_extent_buffer(eb, map_token, KM_USER0); return 0; } } *slot = low; - if (map_token) - unmap_extent_buffer(eb, map_token, KM_USER0); return 1; } @@ -890,7 +896,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, mid = path->nodes[level]; - WARN_ON(!path->locks[level]); + WARN_ON(path->locks[level] != BTRFS_WRITE_LOCK && + path->locks[level] != BTRFS_WRITE_LOCK_BLOCKING); WARN_ON(btrfs_header_generation(mid) != trans->transid); orig_ptr = btrfs_node_blockptr(mid, orig_slot); @@ -1228,7 +1235,6 @@ static void reada_for_search(struct btrfs_root *root, u32 nr; u32 blocksize; u32 nscan = 0; - bool map = true; if (level != 1) return; @@ -1250,19 +1256,8 @@ static void reada_for_search(struct btrfs_root *root, nritems = btrfs_header_nritems(node); nr = slot; - if (node->map_token || path->skip_locking) - map = false; while (1) { - if (map && !node->map_token) { - unsigned long offset = btrfs_node_key_ptr_offset(nr); - map_private_extent_buffer(node, offset, - sizeof(struct btrfs_key_ptr), - &node->map_token, - &node->kaddr, - &node->map_start, - &node->map_len, KM_USER1); - } if (direction < 0) { if (nr == 0) break; @@ -1281,11 +1276,6 @@ static void reada_for_search(struct btrfs_root *root, if ((search <= target && target - search <= 65536) || (search > target && search - target <= 65536)) { gen = btrfs_node_ptr_generation(node, nr); - if (map && node->map_token) { - unmap_extent_buffer(node, node->map_token, - KM_USER1); - node->map_token = NULL; - } readahead_tree_block(root, search, blocksize, gen); nread += blocksize; } @@ -1293,10 +1283,6 @@ static void reada_for_search(struct btrfs_root *root, if ((nread > 65536 || nscan > 32)) break; } - if (map && node->map_token) { - unmap_extent_buffer(node, node->map_token, KM_USER1); - node->map_token = NULL; - } } /* @@ -1409,7 +1395,7 @@ static noinline void unlock_up(struct btrfs_path *path, int level, t = path->nodes[i]; if (i >= lowest_unlock && i > skip_level && path->locks[i]) { - btrfs_tree_unlock(t); + btrfs_tree_unlock_rw(t, path->locks[i]); path->locks[i] = 0; } } @@ -1436,7 +1422,7 @@ noinline void btrfs_unlock_up_safe(struct btrfs_path *path, int level) continue; if (!path->locks[i]) continue; - btrfs_tree_unlock(path->nodes[i]); + btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]); path->locks[i] = 0; } } @@ -1485,6 +1471,8 @@ read_block_for_search(struct btrfs_trans_handle *trans, * we can trust our generation number */ free_extent_buffer(tmp); + btrfs_set_path_blocking(p); + tmp = read_tree_block(root, blocknr, blocksize, gen); if (tmp && btrfs_buffer_uptodate(tmp, gen)) { *eb_ret = tmp; @@ -1540,20 +1528,27 @@ read_block_for_search(struct btrfs_trans_handle *trans, static int setup_nodes_for_search(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_path *p, - struct extent_buffer *b, int level, int ins_len) + struct extent_buffer *b, int level, int ins_len, + int *write_lock_level) { int ret; if ((p->search_for_split || ins_len > 0) && btrfs_header_nritems(b) >= BTRFS_NODEPTRS_PER_BLOCK(root) - 3) { int sret; + if (*write_lock_level < level + 1) { + *write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + sret = reada_for_balance(root, p, level); if (sret) goto again; btrfs_set_path_blocking(p); sret = split_node(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); BUG_ON(sret > 0); if (sret) { @@ -1565,13 +1560,19 @@ setup_nodes_for_search(struct btrfs_trans_handle *trans, BTRFS_NODEPTRS_PER_BLOCK(root) / 2) { int sret; + if (*write_lock_level < level + 1) { + *write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + sret = reada_for_balance(root, p, level); if (sret) goto again; btrfs_set_path_blocking(p); sret = balance_level(trans, root, p, level); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); if (sret) { ret = sret; @@ -1615,27 +1616,78 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root int err; int level; int lowest_unlock = 1; + int root_lock; + /* everything at write_lock_level or lower must be write locked */ + int write_lock_level = 0; u8 lowest_level = 0; lowest_level = p->lowest_level; WARN_ON(lowest_level && ins_len > 0); WARN_ON(p->nodes[0] != NULL); - if (ins_len < 0) + if (ins_len < 0) { lowest_unlock = 2; + /* when we are removing items, we might have to go up to level + * two as we update tree pointers Make sure we keep write + * for those levels as well + */ + write_lock_level = 2; + } else if (ins_len > 0) { + /* + * for inserting items, make sure we have a write lock on + * level 1 so we can update keys + */ + write_lock_level = 1; + } + + if (!cow) + write_lock_level = -1; + + if (cow && (p->keep_locks || p->lowest_level)) + write_lock_level = BTRFS_MAX_LEVEL; + again: + /* + * we try very hard to do read locks on the root + */ + root_lock = BTRFS_READ_LOCK; + level = 0; if (p->search_commit_root) { + /* + * the commit roots are read only + * so we always do read locks + */ b = root->commit_root; extent_buffer_get(b); + level = btrfs_header_level(b); if (!p->skip_locking) - btrfs_tree_lock(b); + btrfs_tree_read_lock(b); } else { - if (p->skip_locking) + if (p->skip_locking) { b = btrfs_root_node(root); - else - b = btrfs_lock_root_node(root); + level = btrfs_header_level(b); + } else { + /* we don't know the level of the root node + * until we actually have it read locked + */ + b = btrfs_read_lock_root_node(root); + level = btrfs_header_level(b); + if (level <= write_lock_level) { + /* whoops, must trade for write lock */ + btrfs_tree_read_unlock(b); + free_extent_buffer(b); + b = btrfs_lock_root_node(root); + root_lock = BTRFS_WRITE_LOCK; + + /* the level might have changed, check again */ + level = btrfs_header_level(b); + } + } } + p->nodes[level] = b; + if (!p->skip_locking) + p->locks[level] = root_lock; while (b) { level = btrfs_header_level(b); @@ -1644,10 +1696,6 @@ again: * setup the path here so we can release it under lock * contention with the cow code */ - p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; - if (cow) { /* * if we don't really need to cow this block @@ -1659,6 +1707,16 @@ again: btrfs_set_path_blocking(p); + /* + * must have write locks on this node and the + * parent + */ + if (level + 1 > write_lock_level) { + write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + err = btrfs_cow_block(trans, root, b, p->nodes[level + 1], p->slots[level + 1], &b); @@ -1671,10 +1729,7 @@ cow_done: BUG_ON(!cow && ins_len); p->nodes[level] = b; - if (!p->skip_locking) - p->locks[level] = 1; - - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); /* * we have a lock on b and as long as we aren't changing @@ -1700,7 +1755,7 @@ cow_done: } p->slots[level] = slot; err = setup_nodes_for_search(trans, root, p, b, level, - ins_len); + ins_len, &write_lock_level); if (err == -EAGAIN) goto again; if (err) { @@ -1710,6 +1765,19 @@ cow_done: b = p->nodes[level]; slot = p->slots[level]; + /* + * slot 0 is special, if we change the key + * we have to update the parent pointer + * which means we must have a write lock + * on the parent + */ + if (slot == 0 && cow && + write_lock_level < level + 1) { + write_lock_level = level + 1; + btrfs_release_path(p); + goto again; + } + unlock_up(p, level, lowest_unlock); if (level == lowest_level) { @@ -1728,23 +1796,42 @@ cow_done: } if (!p->skip_locking) { - btrfs_clear_path_blocking(p, NULL); - err = btrfs_try_spin_lock(b); - - if (!err) { - btrfs_set_path_blocking(p); - btrfs_tree_lock(b); - btrfs_clear_path_blocking(p, b); + level = btrfs_header_level(b); + if (level <= write_lock_level) { + err = btrfs_try_tree_write_lock(b); + if (!err) { + btrfs_set_path_blocking(p); + btrfs_tree_lock(b); + btrfs_clear_path_blocking(p, b, + BTRFS_WRITE_LOCK); + } + p->locks[level] = BTRFS_WRITE_LOCK; + } else { + err = btrfs_try_tree_read_lock(b); + if (!err) { + btrfs_set_path_blocking(p); + btrfs_tree_read_lock(b); + btrfs_clear_path_blocking(p, b, + BTRFS_READ_LOCK); + } + p->locks[level] = BTRFS_READ_LOCK; } + p->nodes[level] = b; } } else { p->slots[level] = slot; if (ins_len > 0 && btrfs_leaf_free_space(root, b) < ins_len) { + if (write_lock_level < 1) { + write_lock_level = 1; + btrfs_release_path(p); + goto again; + } + btrfs_set_path_blocking(p); err = split_leaf(trans, root, key, p, ins_len, ret == 0); - btrfs_clear_path_blocking(p, NULL); + btrfs_clear_path_blocking(p, NULL, 0); BUG_ON(err > 0); if (err) { @@ -2025,7 +2112,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, add_root_to_dirty_list(root); extent_buffer_get(c); path->nodes[level] = c; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK; path->slots[level] = 0; return 0; } @@ -2253,14 +2340,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, if (path->slots[0] == i) push_space += data_size; - if (!left->map_token) { - map_extent_buffer(left, (unsigned long)item, - sizeof(struct btrfs_item), - &left->map_token, &left->kaddr, - &left->map_start, &left->map_len, - KM_USER1); - } - this_item_size = btrfs_item_size(left, item); if (this_item_size + sizeof(*item) + push_space > free_space) break; @@ -2271,10 +2350,6 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, break; i--; } - if (left->map_token) { - unmap_extent_buffer(left, left->map_token, KM_USER1); - left->map_token = NULL; - } if (push_items == 0) goto out_unlock; @@ -2316,21 +2391,10 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, push_space = BTRFS_LEAF_DATA_SIZE(root); for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } push_space -= btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } left_nritems -= push_items; btrfs_set_header_nritems(left, left_nritems); @@ -2467,13 +2531,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = 0; i < nr; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } if (!empty && push_items > 0) { if (path->slots[0] < i) @@ -2496,11 +2553,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, push_space += this_item_size + sizeof(*item); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - if (push_items == 0) { ret = 1; goto out; @@ -2530,23 +2582,12 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(left, i); - if (!left->map_token) { - map_extent_buffer(left, (unsigned long)item, - sizeof(struct btrfs_item), - &left->map_token, &left->kaddr, - &left->map_start, &left->map_len, - KM_USER1); - } ioff = btrfs_item_offset(left, item); btrfs_set_item_offset(left, item, ioff - (BTRFS_LEAF_DATA_SIZE(root) - old_left_item_size)); } btrfs_set_header_nritems(left, old_left_nritems + push_items); - if (left->map_token) { - unmap_extent_buffer(left, left->map_token, KM_USER1); - left->map_token = NULL; - } /* fixup right node */ if (push_items > right_nritems) { @@ -2574,21 +2615,9 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, for (i = 0; i < right_nritems; i++) { item = btrfs_item_nr(right, i); - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - push_space = push_space - btrfs_item_size(right, item); btrfs_set_item_offset(right, item, push_space); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } btrfs_mark_buffer_dirty(left); if (right_nritems) @@ -2729,23 +2758,10 @@ static noinline int copy_for_split(struct btrfs_trans_handle *trans, struct btrfs_item *item = btrfs_item_nr(right, i); u32 ioff; - if (!right->map_token) { - map_extent_buffer(right, (unsigned long)item, - sizeof(struct btrfs_item), - &right->map_token, &right->kaddr, - &right->map_start, &right->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(right, item); btrfs_set_item_offset(right, item, ioff + rt_data_off); } - if (right->map_token) { - unmap_extent_buffer(right, right->map_token, KM_USER1); - right->map_token = NULL; - } - btrfs_set_header_nritems(l, mid); ret = 0; btrfs_item_key(right, &disk_key, 0); @@ -3264,23 +3280,10 @@ int btrfs_truncate_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + size_diff); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the data */ if (from_end) { memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + @@ -3377,22 +3380,10 @@ int btrfs_extend_item(struct btrfs_trans_handle *trans, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - data_size); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the data */ memmove_extent_buffer(leaf, btrfs_leaf_data(leaf) + data_end - data_size, btrfs_leaf_data(leaf) + @@ -3494,27 +3485,13 @@ int btrfs_insert_some_items(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - total_data); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), @@ -3608,27 +3585,13 @@ int setup_items_for_insert(struct btrfs_trans_handle *trans, * item0..itemN ... dataN.offset..dataN.size .. data0.size */ /* first correct the data pointers */ - WARN_ON(leaf->map_token); for (i = slot; i < nritems; i++) { u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } - ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff - total_data); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - /* shift the items */ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + nr), btrfs_item_nr_offset(slot), @@ -3840,22 +3803,10 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, u32 ioff; item = btrfs_item_nr(leaf, i); - if (!leaf->map_token) { - map_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - } ioff = btrfs_item_offset(leaf, item); btrfs_set_item_offset(leaf, item, ioff + dsize); } - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot), btrfs_item_nr_offset(slot + nr), sizeof(struct btrfs_item) * @@ -4004,11 +3955,11 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, WARN_ON(!path->keep_locks); again: - cur = btrfs_lock_root_node(root); + cur = btrfs_read_lock_root_node(root); level = btrfs_header_level(cur); WARN_ON(path->nodes[level]); path->nodes[level] = cur; - path->locks[level] = 1; + path->locks[level] = BTRFS_READ_LOCK; if (btrfs_header_generation(cur) < min_trans) { ret = 1; @@ -4098,12 +4049,12 @@ find_next_key: cur = read_node_slot(root, cur, slot); BUG_ON(!cur); - btrfs_tree_lock(cur); + btrfs_tree_read_lock(cur); - path->locks[level - 1] = 1; + path->locks[level - 1] = BTRFS_READ_LOCK; path->nodes[level - 1] = cur; unlock_up(path, level, 1); - btrfs_clear_path_blocking(path, NULL); + btrfs_clear_path_blocking(path, NULL, 0); } out: if (ret == 0) @@ -4218,30 +4169,21 @@ int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path) u32 nritems; int ret; int old_spinning = path->leave_spinning; - int force_blocking = 0; + int next_rw_lock = 0; nritems = btrfs_header_nritems(path->nodes[0]); if (nritems == 0) return 1; - /* - * we take the blocks in an order that upsets lockdep. Using - * blocking mode is the only way around it. - */ -#ifdef CONFIG_DEBUG_LOCK_ALLOC - force_blocking = 1; -#endif - btrfs_item_key_to_cpu(path->nodes[0], &key, nritems - 1); again: level = 1; next = NULL; + next_rw_lock = 0; btrfs_release_path(path); path->keep_locks = 1; - - if (!force_blocking) - path->leave_spinning = 1; + path->leave_spinning = 1; ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); path->keep_locks = 0; @@ -4281,11 +4223,12 @@ again: } if (next) { - btrfs_tree_unlock(next); + btrfs_tree_unlock_rw(next, next_rw_lock); free_extent_buffer(next); } next = c; + next_rw_lock = path->locks[level]; ret = read_block_for_search(NULL, root, path, &next, level, slot, &key); if (ret == -EAGAIN) @@ -4297,15 +4240,14 @@ again: } if (!path->skip_locking) { - ret = btrfs_try_spin_lock(next); + ret = btrfs_try_tree_read_lock(next); if (!ret) { btrfs_set_path_blocking(path); - btrfs_tree_lock(next); - if (!force_blocking) - btrfs_clear_path_blocking(path, next); + btrfs_tree_read_lock(next); + btrfs_clear_path_blocking(path, next, + BTRFS_READ_LOCK); } - if (force_blocking) - btrfs_set_lock_blocking(next); + next_rw_lock = BTRFS_READ_LOCK; } break; } @@ -4314,14 +4256,13 @@ again: level--; c = path->nodes[level]; if (path->locks[level]) - btrfs_tree_unlock(c); + btrfs_tree_unlock_rw(c, path->locks[level]); free_extent_buffer(c); path->nodes[level] = next; path->slots[level] = 0; if (!path->skip_locking) - path->locks[level] = 1; - + path->locks[level] = next_rw_lock; if (!level) break; @@ -4336,16 +4277,14 @@ again: } if (!path->skip_locking) { - btrfs_assert_tree_locked(path->nodes[level]); - ret = btrfs_try_spin_lock(next); + ret = btrfs_try_tree_read_lock(next); if (!ret) { btrfs_set_path_blocking(path); - btrfs_tree_lock(next); - if (!force_blocking) - btrfs_clear_path_blocking(path, next); + btrfs_tree_read_lock(next); + btrfs_clear_path_blocking(path, next, + BTRFS_READ_LOCK); } - if (force_blocking) - btrfs_set_lock_blocking(next); + next_rw_lock = BTRFS_READ_LOCK; } } ret = 0; diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index fe9287b06496..365c4e1dde04 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -755,6 +755,8 @@ struct btrfs_space_info { chunks for this space */ unsigned int chunk_alloc:1; /* set if we are allocating a chunk */ + unsigned int flush:1; /* set if we are trying to make space */ + unsigned int force_alloc; /* set if we need to force a chunk alloc for this space */ @@ -764,7 +766,7 @@ struct btrfs_space_info { struct list_head block_groups[BTRFS_NR_RAID_TYPES]; spinlock_t lock; struct rw_semaphore groups_sem; - atomic_t caching_threads; + wait_queue_head_t wait; }; struct btrfs_block_rsv { @@ -824,6 +826,7 @@ struct btrfs_caching_control { struct list_head list; struct mutex mutex; wait_queue_head_t wait; + struct btrfs_work work; struct btrfs_block_group_cache *block_group; u64 progress; atomic_t count; @@ -1032,6 +1035,8 @@ struct btrfs_fs_info { struct btrfs_workers endio_write_workers; struct btrfs_workers endio_freespace_worker; struct btrfs_workers submit_workers; + struct btrfs_workers caching_workers; + /* * fixup workers take dirty pages that didn't properly go through * the cow mechanism and make them safe to write. It happens @@ -2128,7 +2133,7 @@ static inline bool btrfs_mixed_space_info(struct btrfs_space_info *space_info) /* extent-tree.c */ static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root, - int num_items) + unsigned num_items) { return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * 3 * num_items; @@ -2222,9 +2227,6 @@ void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); void btrfs_clear_space_info_full(struct btrfs_fs_info *info); int btrfs_check_data_free_space(struct inode *inode, u64 bytes); void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); -int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - int num_items); void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root); int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, @@ -2330,7 +2332,7 @@ struct btrfs_path *btrfs_alloc_path(void); void btrfs_free_path(struct btrfs_path *p); void btrfs_set_path_blocking(struct btrfs_path *p); void btrfs_clear_path_blocking(struct btrfs_path *p, - struct extent_buffer *held); + struct extent_buffer *held, int held_rw); void btrfs_unlock_up_safe(struct btrfs_path *p, int level); int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c index 98c68e658a9b..b52c672f4c18 100644 --- a/fs/btrfs/delayed-inode.c +++ b/fs/btrfs/delayed-inode.c @@ -735,7 +735,7 @@ static int btrfs_batch_insert_items(struct btrfs_trans_handle *trans, } /* reset all the locked nodes in the patch to spinning locks. */ - btrfs_clear_path_blocking(path, NULL); + btrfs_clear_path_blocking(path, NULL, 0); /* insert the keys of the items */ ret = setup_items_for_insert(trans, root, path, keys, data_size, diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 685f2593c4f0..c360a848d97f 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c @@ -89,13 +89,8 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans, data_size = sizeof(*dir_item) + name_len + data_len; dir_item = insert_with_overflow(trans, root, path, &key, data_size, name, name_len); - /* - * FIXME: at some point we should handle xattr's that are larger than - * what we can fit in our leaf. We set location to NULL b/c we arent - * pointing at anything else, that will change if we store the xattr - * data in a separate inode. - */ - BUG_ON(IS_ERR(dir_item)); + if (IS_ERR(dir_item)) + return PTR_ERR(dir_item); memset(&location, 0, sizeof(location)); leaf = path->nodes[0]; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index b231ae13b269..07b3ac662e19 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -100,38 +100,83 @@ struct async_submit_bio { struct btrfs_work work; }; -/* These are used to set the lockdep class on the extent buffer locks. - * The class is set by the readpage_end_io_hook after the buffer has - * passed csum validation but before the pages are unlocked. +/* + * Lockdep class keys for extent_buffer->lock's in this root. For a given + * eb, the lockdep key is determined by the btrfs_root it belongs to and + * the level the eb occupies in the tree. + * + * Different roots are used for different purposes and may nest inside each + * other and they require separate keysets. As lockdep keys should be + * static, assign keysets according to the purpose of the root as indicated + * by btrfs_root->objectid. This ensures that all special purpose roots + * have separate keysets. * - * The lockdep class is also set by btrfs_init_new_buffer on freshly - * allocated blocks. + * Lock-nesting across peer nodes is always done with the immediate parent + * node locked thus preventing deadlock. As lockdep doesn't know this, use + * subclass to avoid triggering lockdep warning in such cases. * - * The class is based on the level in the tree block, which allows lockdep - * to know that lower nodes nest inside the locks of higher nodes. + * The key is set by the readpage_end_io_hook after the buffer has passed + * csum validation but before the pages are unlocked. It is also set by + * btrfs_init_new_buffer on freshly allocated blocks. * - * We also add a check to make sure the highest level of the tree is - * the same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this - * code needs update as well. + * We also add a check to make sure the highest level of the tree is the + * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code + * needs update as well. */ #ifdef CONFIG_DEBUG_LOCK_ALLOC # if BTRFS_MAX_LEVEL != 8 # error # endif -static struct lock_class_key btrfs_eb_class[BTRFS_MAX_LEVEL + 1]; -static const char *btrfs_eb_name[BTRFS_MAX_LEVEL + 1] = { - /* leaf */ - "btrfs-extent-00", - "btrfs-extent-01", - "btrfs-extent-02", - "btrfs-extent-03", - "btrfs-extent-04", - "btrfs-extent-05", - "btrfs-extent-06", - "btrfs-extent-07", - /* highest possible level */ - "btrfs-extent-08", + +static struct btrfs_lockdep_keyset { + u64 id; /* root objectid */ + const char *name_stem; /* lock name stem */ + char names[BTRFS_MAX_LEVEL + 1][20]; + struct lock_class_key keys[BTRFS_MAX_LEVEL + 1]; +} btrfs_lockdep_keysets[] = { + { .id = BTRFS_ROOT_TREE_OBJECTID, .name_stem = "root" }, + { .id = BTRFS_EXTENT_TREE_OBJECTID, .name_stem = "extent" }, + { .id = BTRFS_CHUNK_TREE_OBJECTID, .name_stem = "chunk" }, + { .id = BTRFS_DEV_TREE_OBJECTID, .name_stem = "dev" }, + { .id = BTRFS_FS_TREE_OBJECTID, .name_stem = "fs" }, + { .id = BTRFS_CSUM_TREE_OBJECTID, .name_stem = "csum" }, + { .id = BTRFS_ORPHAN_OBJECTID, .name_stem = "orphan" }, + { .id = BTRFS_TREE_LOG_OBJECTID, .name_stem = "log" }, + { .id = BTRFS_TREE_RELOC_OBJECTID, .name_stem = "treloc" }, + { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, .name_stem = "dreloc" }, + { .id = 0, .name_stem = "tree" }, }; + +void __init btrfs_init_lockdep(void) +{ + int i, j; + + /* initialize lockdep class names */ + for (i = 0; i < ARRAY_SIZE(btrfs_lockdep_keysets); i++) { + struct btrfs_lockdep_keyset *ks = &btrfs_lockdep_keysets[i]; + + for (j = 0; j < ARRAY_SIZE(ks->names); j++) + snprintf(ks->names[j], sizeof(ks->names[j]), + "btrfs-%s-%02d", ks->name_stem, j); + } +} + +void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, + int level) +{ + struct btrfs_lockdep_keyset *ks; + + BUG_ON(level >= ARRAY_SIZE(ks->keys)); + + /* find the matching keyset, id 0 is the default entry */ + for (ks = btrfs_lockdep_keysets; ks->id; ks++) + if (ks->id == objectid) + break; + + lockdep_set_class_and_name(&eb->lock, + &ks->keys[level], ks->names[level]); +} + #endif /* @@ -217,7 +262,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, unsigned long len; unsigned long cur_len; unsigned long offset = BTRFS_CSUM_SIZE; - char *map_token = NULL; char *kaddr; unsigned long map_start; unsigned long map_len; @@ -228,8 +272,7 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, len = buf->len - offset; while (len > 0) { err = map_private_extent_buffer(buf, offset, 32, - &map_token, &kaddr, - &map_start, &map_len, KM_USER0); + &kaddr, &map_start, &map_len); if (err) return 1; cur_len = min(len, map_len - (offset - map_start)); @@ -237,7 +280,6 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, crc, cur_len); len -= cur_len; offset += cur_len; - unmap_extent_buffer(buf, map_token, KM_USER0); } if (csum_size > sizeof(inline_result)) { result = kzalloc(csum_size * sizeof(char), GFP_NOFS); @@ -494,15 +536,6 @@ static noinline int check_leaf(struct btrfs_root *root, return 0; } -#ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level) -{ - lockdep_set_class_and_name(&eb->lock, - &btrfs_eb_class[level], - btrfs_eb_name[level]); -} -#endif - static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, struct extent_state *state) { @@ -553,7 +586,8 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, } found_level = btrfs_header_level(eb); - btrfs_set_buffer_lockdep_class(eb, found_level); + btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb), + eb, found_level); ret = csum_tree_block(root, eb, 1); if (ret) { @@ -1598,7 +1632,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, goto fail_bdi; } - fs_info->btree_inode->i_mapping->flags &= ~__GFP_FS; + mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS); INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC); INIT_LIST_HEAD(&fs_info->trans_list); @@ -1802,6 +1836,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, fs_info->thread_pool_size), &fs_info->generic_worker); + btrfs_init_workers(&fs_info->caching_workers, "cache", + 2, &fs_info->generic_worker); + /* a higher idle thresh on the submit workers makes it much more * likely that bios will be send down in a sane order to the * devices @@ -1855,6 +1892,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, btrfs_start_workers(&fs_info->endio_write_workers, 1); btrfs_start_workers(&fs_info->endio_freespace_worker, 1); btrfs_start_workers(&fs_info->delayed_workers, 1); + btrfs_start_workers(&fs_info->caching_workers, 1); fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, @@ -2112,6 +2150,7 @@ fail_sb_buffer: btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_stop_workers(&fs_info->caching_workers); fail_alloc: kfree(fs_info->delayed_root); fail_iput: @@ -2577,6 +2616,7 @@ int close_ctree(struct btrfs_root *root) btrfs_stop_workers(&fs_info->endio_freespace_worker); btrfs_stop_workers(&fs_info->submit_workers); btrfs_stop_workers(&fs_info->delayed_workers); + btrfs_stop_workers(&fs_info->caching_workers); btrfs_close_devices(fs_info->fs_devices); btrfs_mapping_tree_free(&fs_info->mapping_tree); diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a0b610a67aae..bec3ea4bd67f 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -87,10 +87,14 @@ int btree_lock_page_hook(struct page *page); #ifdef CONFIG_DEBUG_LOCK_ALLOC -void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, int level); +void btrfs_init_lockdep(void); +void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level); #else -static inline void btrfs_set_buffer_lockdep_class(struct extent_buffer *eb, - int level) +static inline void btrfs_init_lockdep(void) +{ } +static inline void btrfs_set_buffer_lockdep_class(u64 objectid, + struct extent_buffer *eb, int level) { } #endif diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 71cd456fdb60..4d08ed79405d 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -320,12 +320,12 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, return total_added; } -static int caching_kthread(void *data) +static noinline void caching_thread(struct btrfs_work *work) { - struct btrfs_block_group_cache *block_group = data; - struct btrfs_fs_info *fs_info = block_group->fs_info; - struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; - struct btrfs_root *extent_root = fs_info->extent_root; + struct btrfs_block_group_cache *block_group; + struct btrfs_fs_info *fs_info; + struct btrfs_caching_control *caching_ctl; + struct btrfs_root *extent_root; struct btrfs_path *path; struct extent_buffer *leaf; struct btrfs_key key; @@ -334,9 +334,14 @@ static int caching_kthread(void *data) u32 nritems; int ret = 0; + caching_ctl = container_of(work, struct btrfs_caching_control, work); + block_group = caching_ctl->block_group; + fs_info = block_group->fs_info; + extent_root = fs_info->extent_root; + path = btrfs_alloc_path(); if (!path) - return -ENOMEM; + goto out; last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); @@ -433,13 +438,11 @@ err: free_excluded_extents(extent_root, block_group); mutex_unlock(&caching_ctl->mutex); +out: wake_up(&caching_ctl->wait); put_caching_control(caching_ctl); - atomic_dec(&block_group->space_info->caching_threads); btrfs_put_block_group(block_group); - - return 0; } static int cache_block_group(struct btrfs_block_group_cache *cache, @@ -449,7 +452,6 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, { struct btrfs_fs_info *fs_info = cache->fs_info; struct btrfs_caching_control *caching_ctl; - struct task_struct *tsk; int ret = 0; smp_mb(); @@ -501,6 +503,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, caching_ctl->progress = cache->key.objectid; /* one for caching kthread, one for caching block group list */ atomic_set(&caching_ctl->count, 2); + caching_ctl->work.func = caching_thread; spin_lock(&cache->lock); if (cache->cached != BTRFS_CACHE_NO) { @@ -516,16 +519,9 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); up_write(&fs_info->extent_commit_sem); - atomic_inc(&cache->space_info->caching_threads); btrfs_get_block_group(cache); - tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", - cache->key.objectid); - if (IS_ERR(tsk)) { - ret = PTR_ERR(tsk); - printk(KERN_ERR "error running thread %d\n", ret); - BUG(); - } + btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); return ret; } @@ -2932,9 +2928,10 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, found->full = 0; found->force_alloc = CHUNK_ALLOC_NO_FORCE; found->chunk_alloc = 0; + found->flush = 0; + init_waitqueue_head(&found->wait); *space_info = found; list_add_rcu(&found->list, &info->space_info); - atomic_set(&found->caching_threads, 0); return 0; } @@ -3314,6 +3311,14 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, if (reserved == 0) return 0; + smp_mb(); + if (root->fs_info->delalloc_bytes == 0) { + if (trans) + return 0; + btrfs_wait_ordered_extents(root, 0, 0); + return 0; + } + max_reclaim = min(reserved, to_reclaim); while (loops < 1024) { @@ -3356,6 +3361,8 @@ static int shrink_delalloc(struct btrfs_trans_handle *trans, } } + if (reclaimed >= to_reclaim && !trans) + btrfs_wait_ordered_extents(root, 0, 0); return reclaimed >= to_reclaim; } @@ -3380,15 +3387,36 @@ static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, u64 num_bytes = orig_bytes; int retries = 0; int ret = 0; - bool reserved = false; bool committed = false; + bool flushing = false; again: - ret = -ENOSPC; - if (reserved) - num_bytes = 0; - + ret = 0; spin_lock(&space_info->lock); + /* + * We only want to wait if somebody other than us is flushing and we are + * actually alloed to flush. + */ + while (flush && !flushing && space_info->flush) { + spin_unlock(&space_info->lock); + /* + * If we have a trans handle we can't wait because the flusher + * may have to commit the transaction, which would mean we would + * deadlock since we are waiting for the flusher to finish, but + * hold the current transaction open. + */ + if (trans) + return -EAGAIN; + ret = wait_event_interruptible(space_info->wait, + !space_info->flush); + /* Must have been interrupted, return */ + if (ret) + return -EINTR; + + spin_lock(&space_info->lock); + } + + ret = -ENOSPC; unused = space_info->bytes_used + space_info->bytes_reserved + space_info->bytes_pinned + space_info->bytes_readonly + space_info->bytes_may_use; @@ -3403,8 +3431,7 @@ again: if (unused <= space_info->total_bytes) { unused = space_info->total_bytes - unused; if (unused >= num_bytes) { - if (!reserved) - space_info->bytes_reserved += orig_bytes; + space_info->bytes_reserved += orig_bytes; ret = 0; } else { /* @@ -3429,17 +3456,14 @@ again: * to reclaim space we can actually use it instead of somebody else * stealing it from us. */ - if (ret && !reserved) { - space_info->bytes_reserved += orig_bytes; - reserved = true; + if (ret && flush) { + flushing = true; + space_info->flush = 1; } spin_unlock(&space_info->lock); - if (!ret) - return 0; - - if (!flush) + if (!ret || !flush) goto out; /* @@ -3447,11 +3471,11 @@ again: * metadata until after the IO is completed. */ ret = shrink_delalloc(trans, root, num_bytes, 1); - if (ret > 0) - return 0; - else if (ret < 0) + if (ret < 0) goto out; + ret = 0; + /* * So if we were overcommitted it's possible that somebody else flushed * out enough space and we simply didn't have enough space to reclaim, @@ -3462,11 +3486,11 @@ again: goto again; } - spin_lock(&space_info->lock); /* * Not enough space to be reclaimed, don't bother committing the * transaction. */ + spin_lock(&space_info->lock); if (space_info->bytes_pinned < orig_bytes) ret = -ENOSPC; spin_unlock(&space_info->lock); @@ -3474,10 +3498,13 @@ again: goto out; ret = -EAGAIN; - if (trans || committed) + if (trans) goto out; ret = -ENOSPC; + if (committed) + goto out; + trans = btrfs_join_transaction(root); if (IS_ERR(trans)) goto out; @@ -3489,12 +3516,12 @@ again: } out: - if (reserved) { + if (flushing) { spin_lock(&space_info->lock); - space_info->bytes_reserved -= orig_bytes; + space_info->flush = 0; + wake_up_all(&space_info->wait); spin_unlock(&space_info->lock); } - return ret; } @@ -3704,7 +3731,6 @@ int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, if (commit_trans) { if (trans) return -EAGAIN; - trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); ret = btrfs_commit_transaction(trans, root); @@ -3874,26 +3900,6 @@ int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, return 0; } -int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, - struct btrfs_root *root, - int num_items) -{ - u64 num_bytes; - int ret; - - if (num_items == 0 || root->fs_info->chunk_root == root) - return 0; - - num_bytes = btrfs_calc_trans_metadata_size(root, num_items); - ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, - num_bytes); - if (!ret) { - trans->bytes_reserved += num_bytes; - trans->block_rsv = &root->fs_info->trans_block_rsv; - } - return ret; -} - void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, struct btrfs_root *root) { @@ -3944,6 +3950,30 @@ int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); } +static unsigned drop_outstanding_extent(struct inode *inode) +{ + unsigned dropped_extents = 0; + + spin_lock(&BTRFS_I(inode)->lock); + BUG_ON(!BTRFS_I(inode)->outstanding_extents); + BTRFS_I(inode)->outstanding_extents--; + + /* + * If we have more or the same amount of outsanding extents than we have + * reserved then we need to leave the reserved extents count alone. + */ + if (BTRFS_I(inode)->outstanding_extents >= + BTRFS_I(inode)->reserved_extents) + goto out; + + dropped_extents = BTRFS_I(inode)->reserved_extents - + BTRFS_I(inode)->outstanding_extents; + BTRFS_I(inode)->reserved_extents -= dropped_extents; +out: + spin_unlock(&BTRFS_I(inode)->lock); + return dropped_extents; +} + static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) { return num_bytes >>= 3; @@ -3953,9 +3983,8 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) { struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; - u64 to_reserve; - int nr_extents; - int reserved_extents; + u64 to_reserve = 0; + unsigned nr_extents = 0; int ret; if (btrfs_transaction_in_commit(root->fs_info)) @@ -3963,66 +3992,49 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) num_bytes = ALIGN(num_bytes, root->sectorsize); - nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; - reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + + if (BTRFS_I(inode)->outstanding_extents > + BTRFS_I(inode)->reserved_extents) { + nr_extents = BTRFS_I(inode)->outstanding_extents - + BTRFS_I(inode)->reserved_extents; + BTRFS_I(inode)->reserved_extents += nr_extents; - if (nr_extents > reserved_extents) { - nr_extents -= reserved_extents; to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); - } else { - nr_extents = 0; - to_reserve = 0; } + spin_unlock(&BTRFS_I(inode)->lock); to_reserve += calc_csum_metadata_size(inode, num_bytes); ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); - if (ret) + if (ret) { + unsigned dropped; + /* + * We don't need the return value since our reservation failed, + * we just need to clean up our counter. + */ + dropped = drop_outstanding_extent(inode); + WARN_ON(dropped > 1); return ret; - - atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + } block_rsv_add_bytes(block_rsv, to_reserve, 1); - if (block_rsv->size > 512 * 1024 * 1024) - shrink_delalloc(NULL, root, to_reserve, 0); - return 0; } void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) { struct btrfs_root *root = BTRFS_I(inode)->root; - u64 to_free; - int nr_extents; - int reserved_extents; + u64 to_free = 0; + unsigned dropped; num_bytes = ALIGN(num_bytes, root->sectorsize); - atomic_dec(&BTRFS_I(inode)->outstanding_extents); - WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); - - reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); - do { - int old, new; - - nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); - if (nr_extents >= reserved_extents) { - nr_extents = 0; - break; - } - old = reserved_extents; - nr_extents = reserved_extents - nr_extents; - new = reserved_extents - nr_extents; - old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, - reserved_extents, new); - if (likely(old == reserved_extents)) - break; - reserved_extents = old; - } while (1); + dropped = drop_outstanding_extent(inode); to_free = calc_csum_metadata_size(inode, num_bytes); - if (nr_extents > 0) - to_free += btrfs_calc_trans_metadata_size(root, nr_extents); + if (dropped > 0) + to_free += btrfs_calc_trans_metadata_size(root, dropped); btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, to_free); @@ -4990,14 +5002,10 @@ have_block_group: } /* - * We only want to start kthread caching if we are at - * the point where we will wait for caching to make - * progress, or if our ideal search is over and we've - * found somebody to start caching. + * The caching workers are limited to 2 threads, so we + * can queue as much work as we care to. */ - if (loop > LOOP_CACHING_NOWAIT || - (loop > LOOP_FIND_IDEAL && - atomic_read(&space_info->caching_threads) < 2)) { + if (loop > LOOP_FIND_IDEAL) { ret = cache_block_group(block_group, trans, orig_root, 0); BUG_ON(ret); @@ -5219,8 +5227,7 @@ loop: if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { found_uncached_bg = false; loop++; - if (!ideal_cache_percent && - atomic_read(&space_info->caching_threads)) + if (!ideal_cache_percent) goto search; /* @@ -5623,7 +5630,7 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, if (!buf) return ERR_PTR(-ENOMEM); btrfs_set_header_generation(buf, trans->transid); - btrfs_set_buffer_lockdep_class(buf, level); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); btrfs_tree_lock(buf); clean_tree_block(trans, root, buf); @@ -5910,7 +5917,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, return 1; if (path->locks[level] && !wc->keep_locks) { - btrfs_tree_unlock(eb); + btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; } return 0; @@ -5934,7 +5941,7 @@ static noinline int walk_down_proc(struct btrfs_trans_handle *trans, * keep the tree lock */ if (path->locks[level] && level > 0) { - btrfs_tree_unlock(eb); + btrfs_tree_unlock_rw(eb, path->locks[level]); path->locks[level] = 0; } return 0; @@ -6047,7 +6054,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, BUG_ON(level != btrfs_header_level(next)); path->nodes[level] = next; path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; wc->level = level; if (wc->level == 1) wc->reada_slot = 0; @@ -6118,7 +6125,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, BUG_ON(level == 0); btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; ret = btrfs_lookup_extent_info(trans, root, eb->start, eb->len, @@ -6127,8 +6134,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, BUG_ON(ret); BUG_ON(wc->refs[level] == 0); if (wc->refs[level] == 1) { - btrfs_tree_unlock(eb); - path->locks[level] = 0; + btrfs_tree_unlock_rw(eb, path->locks[level]); return 1; } } @@ -6150,7 +6156,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, btrfs_header_generation(eb) == trans->transid) { btrfs_tree_lock(eb); btrfs_set_lock_blocking(eb); - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; } clean_tree_block(trans, root, eb); } @@ -6229,7 +6235,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, return 0; if (path->locks[level]) { - btrfs_tree_unlock(path->nodes[level]); + btrfs_tree_unlock_rw(path->nodes[level], + path->locks[level]); path->locks[level] = 0; } free_extent_buffer(path->nodes[level]); @@ -6281,7 +6288,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, path->nodes[level] = btrfs_lock_root_node(root); btrfs_set_lock_blocking(path->nodes[level]); path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; memset(&wc->update_progress, 0, sizeof(wc->update_progress)); } else { @@ -6449,7 +6456,7 @@ int btrfs_drop_subtree(struct btrfs_trans_handle *trans, level = btrfs_header_level(node); path->nodes[level] = node; path->slots[level] = 0; - path->locks[level] = 1; + path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; wc->refs[parent_level] = 1; wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; @@ -6524,15 +6531,28 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) return flags; } -static int set_block_group_ro(struct btrfs_block_group_cache *cache) +static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) { struct btrfs_space_info *sinfo = cache->space_info; u64 num_bytes; + u64 min_allocable_bytes; int ret = -ENOSPC; if (cache->ro) return 0; + /* + * We need some metadata space and system metadata space for + * allocating chunks in some corner cases until we force to set + * it to be readonly. + */ + if ((sinfo->flags & + (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && + !force) + min_allocable_bytes = 1 * 1024 * 1024; + else + min_allocable_bytes = 0; + spin_lock(&sinfo->lock); spin_lock(&cache->lock); num_bytes = cache->key.offset - cache->reserved - cache->pinned - @@ -6540,7 +6560,8 @@ static int set_block_group_ro(struct btrfs_block_group_cache *cache) if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + sinfo->bytes_may_use + sinfo->bytes_readonly + - cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { + cache->reserved_pinned + num_bytes + min_allocable_bytes <= + sinfo->total_bytes) { sinfo->bytes_readonly += num_bytes; sinfo->bytes_reserved += cache->reserved_pinned; cache->reserved_pinned = 0; @@ -6571,7 +6592,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, CHUNK_ALLOC_FORCE); - ret = set_block_group_ro(cache); + ret = set_block_group_ro(cache, 0); if (!ret) goto out; alloc_flags = get_alloc_profile(root, cache->space_info->flags); @@ -6579,7 +6600,7 @@ int btrfs_set_block_group_ro(struct btrfs_root *root, CHUNK_ALLOC_FORCE); if (ret < 0) goto out; - ret = set_block_group_ro(cache); + ret = set_block_group_ro(cache, 0); out: btrfs_end_transaction(trans, root); return ret; @@ -7016,7 +7037,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) set_avail_alloc_bits(root->fs_info, cache->flags); if (btrfs_chunk_readonly(root, cache->key.objectid)) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); } list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { @@ -7030,9 +7051,9 @@ int btrfs_read_block_groups(struct btrfs_root *root) * mirrored block groups. */ list_for_each_entry(cache, &space_info->block_groups[3], list) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); list_for_each_entry(cache, &space_info->block_groups[4], list) - set_block_group_ro(cache); + set_block_group_ro(cache, 1); } init_global_block_rsv(info); diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 561262d35689..067b1747421b 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -281,11 +281,10 @@ static int merge_state(struct extent_io_tree *tree, if (other->start == state->end + 1 && other->state == state->state) { merge_cb(tree, state, other); - other->start = state->start; - state->tree = NULL; - rb_erase(&state->rb_node, &tree->state); - free_extent_state(state); - state = NULL; + state->end = other->end; + other->tree = NULL; + rb_erase(&other->rb_node, &tree->state); + free_extent_state(other); } } @@ -351,7 +350,6 @@ static int insert_state(struct extent_io_tree *tree, "%llu %llu\n", (unsigned long long)found->start, (unsigned long long)found->end, (unsigned long long)start, (unsigned long long)end); - free_extent_state(state); return -EEXIST; } state->tree = tree; @@ -500,7 +498,8 @@ again: cached_state = NULL; } - if (cached && cached->tree && cached->start == start) { + if (cached && cached->tree && cached->start <= start && + cached->end > start) { if (clear) atomic_dec(&cached->refs); state = cached; @@ -742,7 +741,8 @@ again: spin_lock(&tree->lock); if (cached_state && *cached_state) { state = *cached_state; - if (state->start == start && state->tree) { + if (state->start <= start && state->end > start && + state->tree) { node = &state->rb_node; goto hit_next; } @@ -783,13 +783,13 @@ hit_next: if (err) goto out; - next_node = rb_next(node); cache_state(state, cached_state); merge_state(tree, state); if (last_end == (u64)-1) goto out; start = last_end + 1; + next_node = rb_next(&state->rb_node); if (next_node && start < end && prealloc && !need_resched()) { state = rb_entry(next_node, struct extent_state, rb_node); @@ -862,7 +862,6 @@ hit_next: * Avoid to free 'prealloc' if it can be merged with * the later extent. */ - atomic_inc(&prealloc->refs); err = insert_state(tree, prealloc, start, this_end, &bits); BUG_ON(err == -EEXIST); @@ -872,7 +871,6 @@ hit_next: goto out; } cache_state(prealloc, cached_state); - free_extent_state(prealloc); prealloc = NULL; start = this_end + 1; goto search_again; @@ -1564,7 +1562,8 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, int bitset = 0; spin_lock(&tree->lock); - if (cached && cached->tree && cached->start == start) + if (cached && cached->tree && cached->start <= start && + cached->end > start) node = &cached->rb_node; else node = tree_search(tree, start); @@ -2432,6 +2431,7 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, pgoff_t index; pgoff_t end; /* Inclusive */ int scanned = 0; + int tag; pagevec_init(&pvec, 0); if (wbc->range_cyclic) { @@ -2442,11 +2442,16 @@ static int extent_write_cache_pages(struct extent_io_tree *tree, end = wbc->range_end >> PAGE_CACHE_SHIFT; scanned = 1; } + if (wbc->sync_mode == WB_SYNC_ALL) + tag = PAGECACHE_TAG_TOWRITE; + else + tag = PAGECACHE_TAG_DIRTY; retry: + if (wbc->sync_mode == WB_SYNC_ALL) + tag_pages_for_writeback(mapping, index, end); while (!done && !nr_to_write_done && (index <= end) && - (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, - PAGECACHE_TAG_DIRTY, min(end - index, - (pgoff_t)PAGEVEC_SIZE-1) + 1))) { + (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, + min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1))) { unsigned i; scanned = 1; @@ -3020,8 +3025,15 @@ static struct extent_buffer *__alloc_extent_buffer(struct extent_io_tree *tree, return NULL; eb->start = start; eb->len = len; - spin_lock_init(&eb->lock); - init_waitqueue_head(&eb->lock_wq); + rwlock_init(&eb->lock); + atomic_set(&eb->write_locks, 0); + atomic_set(&eb->read_locks, 0); + atomic_set(&eb->blocking_readers, 0); + atomic_set(&eb->blocking_writers, 0); + atomic_set(&eb->spinning_readers, 0); + atomic_set(&eb->spinning_writers, 0); + init_waitqueue_head(&eb->write_lock_wq); + init_waitqueue_head(&eb->read_lock_wq); #if LEAK_DEBUG spin_lock_irqsave(&leak_lock, flags); @@ -3117,7 +3129,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, i = 0; } for (; i < num_pages; i++, index++) { - p = find_or_create_page(mapping, index, GFP_NOFS | __GFP_HIGHMEM); + p = find_or_create_page(mapping, index, GFP_NOFS); if (!p) { WARN_ON(1); goto free_eb; @@ -3264,6 +3276,22 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, return was_dirty; } +static int __eb_straddles_pages(u64 start, u64 len) +{ + if (len < PAGE_CACHE_SIZE) + return 1; + if (start & (PAGE_CACHE_SIZE - 1)) + return 1; + if ((start + len) & (PAGE_CACHE_SIZE - 1)) + return 1; + return 0; +} + +static int eb_straddles_pages(struct extent_buffer *eb) +{ + return __eb_straddles_pages(eb->start, eb->len); +} + int clear_extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb, struct extent_state **cached_state) @@ -3275,8 +3303,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); clear_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); - clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - cached_state, GFP_NOFS); + if (eb_straddles_pages(eb)) { + clear_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + cached_state, GFP_NOFS); + } for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if (page) @@ -3294,8 +3324,10 @@ int set_extent_buffer_uptodate(struct extent_io_tree *tree, num_pages = num_extent_pages(eb->start, eb->len); - set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, - NULL, GFP_NOFS); + if (eb_straddles_pages(eb)) { + set_extent_uptodate(tree, eb->start, eb->start + eb->len - 1, + NULL, GFP_NOFS); + } for (i = 0; i < num_pages; i++) { page = extent_buffer_page(eb, i); if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || @@ -3318,9 +3350,12 @@ int extent_range_uptodate(struct extent_io_tree *tree, int uptodate; unsigned long index; - ret = test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL); - if (ret) - return 1; + if (__eb_straddles_pages(start, end - start + 1)) { + ret = test_range_bit(tree, start, end, + EXTENT_UPTODATE, 1, NULL); + if (ret) + return 1; + } while (start <= end) { index = start >> PAGE_CACHE_SHIFT; page = find_get_page(tree->mapping, index); @@ -3348,10 +3383,12 @@ int extent_buffer_uptodate(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 1; - ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1, cached_state); - if (ret) - return ret; + if (eb_straddles_pages(eb)) { + ret = test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, cached_state); + if (ret) + return ret; + } num_pages = num_extent_pages(eb->start, eb->len); for (i = 0; i < num_pages; i++) { @@ -3384,9 +3421,11 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, if (test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags)) return 0; - if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, - EXTENT_UPTODATE, 1, NULL)) { - return 0; + if (eb_straddles_pages(eb)) { + if (test_range_bit(tree, eb->start, eb->start + eb->len - 1, + EXTENT_UPTODATE, 1, NULL)) { + return 0; + } } if (start) { @@ -3490,9 +3529,8 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, page = extent_buffer_page(eb, i); cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); memcpy(dst, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER1); dst += cur; len -= cur; @@ -3502,9 +3540,9 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv, } int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, char **token, char **map, + unsigned long min_len, char **map, unsigned long *map_start, - unsigned long *map_len, int km) + unsigned long *map_len) { size_t offset = start & (PAGE_CACHE_SIZE - 1); char *kaddr; @@ -3534,42 +3572,12 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start, } p = extent_buffer_page(eb, i); - kaddr = kmap_atomic(p, km); - *token = kaddr; + kaddr = page_address(p); *map = kaddr + offset; *map_len = PAGE_CACHE_SIZE - offset; return 0; } -int map_extent_buffer(struct extent_buffer *eb, unsigned long start, - unsigned long min_len, - char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km) -{ - int err; - int save = 0; - if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, km); - eb->map_token = NULL; - save = 1; - } - err = map_private_extent_buffer(eb, start, min_len, token, map, - map_start, map_len, km); - if (!err && save) { - eb->map_token = *token; - eb->kaddr = *map; - eb->map_start = *map_start; - eb->map_len = *map_len; - } - return err; -} - -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km) -{ - kunmap_atomic(token, km); -} - int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, unsigned long start, unsigned long len) @@ -3593,9 +3601,8 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv, cur = min(len, (PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); ret = memcmp(ptr, kaddr + offset, cur); - kunmap_atomic(kaddr, KM_USER0); if (ret) break; @@ -3628,9 +3635,8 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER1); + kaddr = page_address(page); memcpy(kaddr + offset, src, cur); - kunmap_atomic(kaddr, KM_USER1); src += cur; len -= cur; @@ -3659,9 +3665,8 @@ void memset_extent_buffer(struct extent_buffer *eb, char c, WARN_ON(!PageUptodate(page)); cur = min(len, PAGE_CACHE_SIZE - offset); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); memset(kaddr + offset, c, cur); - kunmap_atomic(kaddr, KM_USER0); len -= cur; offset = 0; @@ -3692,9 +3697,8 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src, cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset)); - kaddr = kmap_atomic(page, KM_USER0); + kaddr = page_address(page); read_extent_buffer(src, kaddr + offset, src_offset, cur); - kunmap_atomic(kaddr, KM_USER0); src_offset += cur; len -= cur; @@ -3707,20 +3711,17 @@ static void move_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); if (dst_page == src_page) { memmove(dst_kaddr + dst_off, dst_kaddr + src_off, len); } else { - char *src_kaddr = kmap_atomic(src_page, KM_USER1); + char *src_kaddr = page_address(src_page); char *p = dst_kaddr + dst_off + len; char *s = src_kaddr + src_off + len; while (len--) *--p = *--s; - - kunmap_atomic(src_kaddr, KM_USER1); } - kunmap_atomic(dst_kaddr, KM_USER0); } static inline bool areas_overlap(unsigned long src, unsigned long dst, unsigned long len) @@ -3733,20 +3734,17 @@ static void copy_pages(struct page *dst_page, struct page *src_page, unsigned long dst_off, unsigned long src_off, unsigned long len) { - char *dst_kaddr = kmap_atomic(dst_page, KM_USER0); + char *dst_kaddr = page_address(dst_page); char *src_kaddr; if (dst_page != src_page) { - src_kaddr = kmap_atomic(src_page, KM_USER1); + src_kaddr = page_address(src_page); } else { src_kaddr = dst_kaddr; BUG_ON(areas_overlap(src_off, dst_off, len)); } memcpy(dst_kaddr + dst_off, src_kaddr + src_off, len); - kunmap_atomic(dst_kaddr, KM_USER0); - if (dst_page != src_page) - kunmap_atomic(src_kaddr, KM_USER1); } void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset, diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index a11a92ee2d30..21a7ca9e7282 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -120,8 +120,6 @@ struct extent_state { struct extent_buffer { u64 start; unsigned long len; - char *map_token; - char *kaddr; unsigned long map_start; unsigned long map_len; struct page *first_page; @@ -130,14 +128,26 @@ struct extent_buffer { struct rcu_head rcu_head; atomic_t refs; - /* the spinlock is used to protect most operations */ - spinlock_t lock; + /* count of read lock holders on the extent buffer */ + atomic_t write_locks; + atomic_t read_locks; + atomic_t blocking_writers; + atomic_t blocking_readers; + atomic_t spinning_readers; + atomic_t spinning_writers; + + /* protects write locks */ + rwlock_t lock; - /* - * when we keep the lock held while blocking, waiters go onto - * the wq + /* readers use lock_wq while they wait for the write + * lock holders to unlock */ - wait_queue_head_t lock_wq; + wait_queue_head_t write_lock_wq; + + /* writers use read_lock_wq while they wait for readers + * to unlock + */ + wait_queue_head_t read_lock_wq; }; static inline void extent_set_compress_type(unsigned long *bio_flags, @@ -279,15 +289,10 @@ int clear_extent_buffer_uptodate(struct extent_io_tree *tree, int extent_buffer_uptodate(struct extent_io_tree *tree, struct extent_buffer *eb, struct extent_state *cached_state); -int map_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, - unsigned long *map_start, - unsigned long *map_len, int km); int map_private_extent_buffer(struct extent_buffer *eb, unsigned long offset, - unsigned long min_len, char **token, char **map, + unsigned long min_len, char **map, unsigned long *map_start, - unsigned long *map_len, int km); -void unmap_extent_buffer(struct extent_buffer *eb, char *token, int km); + unsigned long *map_len); int extent_range_uptodate(struct extent_io_tree *tree, u64 start, u64 end); int extent_clear_unlock_delalloc(struct inode *inode, diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 90d4ee52cd45..08bcfa92a222 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c @@ -177,6 +177,15 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root, WARN_ON(bio->bi_vcnt <= 0); + /* + * the free space stuff is only read when it hasn't been + * updated in the current transaction. So, we can safely + * read from the commit root and sidestep a nasty deadlock + * between reading the free space cache and updating the csum tree. + */ + if (btrfs_is_free_space_inode(root, inode)) + path->search_commit_root = 1; + disk_bytenr = (u64)bio->bi_sector << 9; if (dio) offset = logical_offset; @@ -664,10 +673,6 @@ int btrfs_csum_file_blocks(struct btrfs_trans_handle *trans, struct btrfs_sector_sum *sector_sum; u32 nritems; u32 ins_size; - char *eb_map; - char *eb_token; - unsigned long map_len; - unsigned long map_start; u16 csum_size = btrfs_super_csum_size(&root->fs_info->super_copy); @@ -814,30 +819,9 @@ found: item_end = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_csum_item); item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + btrfs_item_size_nr(leaf, path->slots[0])); - eb_token = NULL; next_sector: - if (!eb_token || - (unsigned long)item + csum_size >= map_start + map_len) { - int err; - - if (eb_token) - unmap_extent_buffer(leaf, eb_token, KM_USER1); - eb_token = NULL; - err = map_private_extent_buffer(leaf, (unsigned long)item, - csum_size, - &eb_token, &eb_map, - &map_start, &map_len, KM_USER1); - if (err) - eb_token = NULL; - } - if (eb_token) { - memcpy(eb_token + ((unsigned long)item & (PAGE_CACHE_SIZE - 1)), - §or_sum->sum, csum_size); - } else { - write_extent_buffer(leaf, §or_sum->sum, - (unsigned long)item, csum_size); - } + write_extent_buffer(leaf, §or_sum->sum, (unsigned long)item, csum_size); total_bytes += root->sectorsize; sector_sum++; @@ -850,10 +834,7 @@ next_sector: goto next_sector; } } - if (eb_token) { - unmap_extent_buffer(leaf, eb_token, KM_USER1); - eb_token = NULL; - } + btrfs_mark_buffer_dirty(path->nodes[0]); if (total_bytes < sums->len) { btrfs_release_path(path); diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 59cbdb120ad0..a35e51c9f235 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1081,7 +1081,8 @@ static noinline int prepare_pages(struct btrfs_root *root, struct file *file, again: for (i = 0; i < num_pages; i++) { - pages[i] = grab_cache_page(inode->i_mapping, index + i); + pages[i] = find_or_create_page(inode->i_mapping, index + i, + GFP_NOFS); if (!pages[i]) { faili = i - 1; err = -ENOMEM; @@ -1238,9 +1239,11 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file, * managed to copy. */ if (num_pages > dirty_pages) { - if (copied > 0) - atomic_inc( - &BTRFS_I(inode)->outstanding_extents); + if (copied > 0) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } btrfs_delalloc_release_space(inode, (num_pages - dirty_pages) << PAGE_CACHE_SHIFT); diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index bf0d61567f3d..6377713f639c 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -98,6 +98,12 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, return inode; spin_lock(&block_group->lock); + if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM) { + printk(KERN_INFO "Old style space inode found, converting.\n"); + BTRFS_I(inode)->flags &= ~BTRFS_INODE_NODATASUM; + block_group->disk_cache_state = BTRFS_DC_CLEAR; + } + if (!btrfs_fs_closing(root->fs_info)) { block_group->inode = igrab(inode); block_group->iref = 1; @@ -135,7 +141,7 @@ int __create_free_space_inode(struct btrfs_root *root, btrfs_set_inode_gid(leaf, inode_item, 0); btrfs_set_inode_mode(leaf, inode_item, S_IFREG | 0600); btrfs_set_inode_flags(leaf, inode_item, BTRFS_INODE_NOCOMPRESS | - BTRFS_INODE_PREALLOC | BTRFS_INODE_NODATASUM); + BTRFS_INODE_PREALLOC); btrfs_set_inode_nlink(leaf, inode_item, 1); btrfs_set_inode_transid(leaf, inode_item, trans->transid); btrfs_set_inode_block_group(leaf, inode_item, offset); @@ -239,17 +245,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space_header *header; struct extent_buffer *leaf; struct page *page; - u32 *checksums = NULL, *crc; - char *disk_crcs = NULL; struct btrfs_key key; struct list_head bitmaps; u64 num_entries; u64 num_bitmaps; u64 generation; - u32 cur_crc = ~(u32)0; pgoff_t index = 0; - unsigned long first_page_offset; - int num_checksums; int ret = 0; INIT_LIST_HEAD(&bitmaps); @@ -292,16 +293,6 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (!num_entries) goto out; - /* Setup everything for doing checksumming */ - num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; - checksums = crc = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); - if (!checksums) - goto out; - first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); - disk_crcs = kzalloc(first_page_offset, GFP_NOFS); - if (!disk_crcs) - goto out; - ret = readahead_cache(inode); if (ret) goto out; @@ -311,18 +302,12 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_free_space *e; void *addr; unsigned long offset = 0; - unsigned long start_offset = 0; int need_loop = 0; if (!num_entries && !num_bitmaps) break; - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } - - page = grab_cache_page(inode->i_mapping, index); + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) goto free_cache; @@ -342,8 +327,15 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (index == 0) { u64 *gen; - memcpy(disk_crcs, addr, first_page_offset); - gen = addr + (sizeof(u32) * num_checksums); + /* + * We put a bogus crc in the front of the first page in + * case old kernels try to mount a fs with the new + * format to make sure they discard the cache. + */ + addr += sizeof(u64); + offset += sizeof(u64); + + gen = addr; if (*gen != BTRFS_I(inode)->generation) { printk(KERN_ERR "btrfs: space cache generation" " (%llu) does not match inode (%llu)\n", @@ -355,24 +347,10 @@ int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, page_cache_release(page); goto free_cache; } - crc = (u32 *)disk_crcs; - } - entry = addr + start_offset; - - /* First lets check our crc before we do anything fun */ - cur_crc = ~(u32)0; - cur_crc = btrfs_csum_data(root, addr + start_offset, cur_crc, - PAGE_CACHE_SIZE - start_offset); - btrfs_csum_final(cur_crc, (char *)&cur_crc); - if (cur_crc != *crc) { - printk(KERN_ERR "btrfs: crc mismatch for page %lu\n", - index); - kunmap(page); - unlock_page(page); - page_cache_release(page); - goto free_cache; + addr += sizeof(u64); + offset += sizeof(u64); } - crc++; + entry = addr; while (1) { if (!num_entries) @@ -470,8 +448,6 @@ next: ret = 1; out: - kfree(checksums); - kfree(disk_crcs); return ret; free_cache: __btrfs_remove_free_space_cache(ctl); @@ -569,8 +545,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, struct btrfs_key key; u64 start, end, len; u64 bytes = 0; - u32 *crc, *checksums; - unsigned long first_page_offset; + u32 crc = ~(u32)0; int index = 0, num_pages = 0; int entries = 0; int bitmaps = 0; @@ -590,34 +565,13 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; - /* Since the first page has all of our checksums and our generation we - * need to calculate the offset into the page that we can start writing - * our entries. - */ - first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); - filemap_write_and_wait(inode->i_mapping); btrfs_wait_ordered_range(inode, inode->i_size & ~(root->sectorsize - 1), (u64)-1); - /* make sure we don't overflow that first page */ - if (first_page_offset + sizeof(struct btrfs_free_space_entry) >= PAGE_CACHE_SIZE) { - /* this is really the same as running out of space, where we also return 0 */ - printk(KERN_CRIT "Btrfs: free space cache was too big for the crc page\n"); - ret = 0; - goto out_update; - } - - /* We need a checksum per page. */ - crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); - if (!crc) - return -1; - pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); - if (!pages) { - kfree(crc); + if (!pages) return -1; - } /* Get the cluster for this block_group if it exists */ if (block_group && !list_empty(&block_group->cluster_list)) @@ -640,7 +594,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, * know and don't freak out. */ while (index < num_pages) { - page = grab_cache_page(inode->i_mapping, index); + page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); if (!page) { int i; @@ -648,7 +602,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, unlock_page(pages[i]); page_cache_release(pages[i]); } - goto out_free; + goto out; } pages[index] = page; index++; @@ -668,17 +622,11 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, /* Write out the extent entries */ do { struct btrfs_free_space_entry *entry; - void *addr; + void *addr, *orig; unsigned long offset = 0; - unsigned long start_offset = 0; next_page = false; - if (index == 0) { - start_offset = first_page_offset; - offset = start_offset; - } - if (index >= num_pages) { out_of_space = true; break; @@ -686,10 +634,26 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, page = pages[index]; - addr = kmap(page); - entry = addr + start_offset; + orig = addr = kmap(page); + if (index == 0) { + u64 *gen; - memset(addr, 0, PAGE_CACHE_SIZE); + /* + * We're going to put in a bogus crc for this page to + * make sure that old kernels who aren't aware of this + * format will be sure to discard the cache. + */ + addr += sizeof(u64); + offset += sizeof(u64); + + gen = addr; + *gen = trans->transid; + addr += sizeof(u64); + offset += sizeof(u64); + } + entry = addr; + + memset(addr, 0, PAGE_CACHE_SIZE - offset); while (node && !next_page) { struct btrfs_free_space *e; @@ -752,13 +716,19 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, next_page = true; entry++; } - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr + start_offset, *crc, - PAGE_CACHE_SIZE - start_offset); - kunmap(page); - btrfs_csum_final(*crc, (char *)crc); - crc++; + /* Generate bogus crc value */ + if (index == 0) { + u32 *tmp; + crc = btrfs_csum_data(root, orig + sizeof(u64), crc, + PAGE_CACHE_SIZE - sizeof(u64)); + btrfs_csum_final(crc, (char *)&crc); + crc++; + tmp = orig; + *tmp = crc; + } + + kunmap(page); bytes += PAGE_CACHE_SIZE; @@ -779,11 +749,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, addr = kmap(page); memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); - *crc = ~(u32)0; - *crc = btrfs_csum_data(root, addr, *crc, PAGE_CACHE_SIZE); kunmap(page); - btrfs_csum_final(*crc, (char *)crc); - crc++; bytes += PAGE_CACHE_SIZE; list_del_init(&entry->list); @@ -796,7 +762,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, i_size_read(inode) - 1, &cached_state, GFP_NOFS); ret = 0; - goto out_free; + goto out; } /* Zero out the rest of the pages just to make sure */ @@ -811,20 +777,6 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, index++; } - /* Write the checksums and trans id to the first page */ - { - void *addr; - u64 *gen; - - page = pages[0]; - - addr = kmap(page); - memcpy(addr, checksums, sizeof(u32) * num_pages); - gen = addr + (sizeof(u32) * num_pages); - *gen = trans->transid; - kunmap(page); - } - ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, bytes, &cached_state); btrfs_drop_pages(pages, num_pages); @@ -833,7 +785,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (ret) { ret = 0; - goto out_free; + goto out; } BTRFS_I(inode)->generation = trans->transid; @@ -850,7 +802,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, bytes - 1, EXTENT_DIRTY | EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); - goto out_free; + goto out; } leaf = path->nodes[0]; if (ret > 0) { @@ -866,7 +818,7 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, EXTENT_DO_ACCOUNTING, 0, 0, NULL, GFP_NOFS); btrfs_release_path(path); - goto out_free; + goto out; } } header = btrfs_item_ptr(leaf, path->slots[0], @@ -879,11 +831,8 @@ int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, ret = 1; -out_free: - kfree(checksums); +out: kfree(pages); - -out_update: if (ret != 1) { invalidate_inode_pages2_range(inode->i_mapping, 0, index); BTRFS_I(inode)->generation = 0; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index caa26ab5ed68..13e6255182e3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -750,15 +750,6 @@ static u64 get_extent_allocation_hint(struct inode *inode, u64 start, return alloc_hint; } -static inline bool is_free_space_inode(struct btrfs_root *root, - struct inode *inode) -{ - if (root == root->fs_info->tree_root || - BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) - return true; - return false; -} - /* * when extent_io.c finds a delayed allocation range in the file, * the call backs end up in this code. The basic idea is to @@ -791,7 +782,7 @@ static noinline int cow_file_range(struct inode *inode, struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; int ret = 0; - BUG_ON(is_free_space_inode(root, inode)); + BUG_ON(btrfs_is_free_space_inode(root, inode)); trans = btrfs_join_transaction(root); BUG_ON(IS_ERR(trans)); trans->block_rsv = &root->fs_info->delalloc_block_rsv; @@ -1072,7 +1063,7 @@ static noinline int run_delalloc_nocow(struct inode *inode, path = btrfs_alloc_path(); BUG_ON(!path); - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (nolock) trans = btrfs_join_transaction_nolock(root); @@ -1298,7 +1289,9 @@ static int btrfs_split_extent_hook(struct inode *inode, if (!(orig->state & EXTENT_DELALLOC)) return 0; - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); return 0; } @@ -1316,7 +1309,9 @@ static int btrfs_merge_extent_hook(struct inode *inode, if (!(other->state & EXTENT_DELALLOC)) return 0; - atomic_dec(&BTRFS_I(inode)->outstanding_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); return 0; } @@ -1337,12 +1332,15 @@ static int btrfs_set_bit_hook(struct inode *inode, if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + } else { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); + } spin_lock(&root->fs_info->delalloc_lock); BTRFS_I(inode)->delalloc_bytes += len; @@ -1370,12 +1368,15 @@ static int btrfs_clear_bit_hook(struct inode *inode, if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { struct btrfs_root *root = BTRFS_I(inode)->root; u64 len = state->end + 1 - state->start; - bool do_list = !is_free_space_inode(root, inode); + bool do_list = !btrfs_is_free_space_inode(root, inode); - if (*bits & EXTENT_FIRST_DELALLOC) + if (*bits & EXTENT_FIRST_DELALLOC) { *bits &= ~EXTENT_FIRST_DELALLOC; - else if (!(*bits & EXTENT_DO_ACCOUNTING)) - atomic_dec(&BTRFS_I(inode)->outstanding_extents); + } else if (!(*bits & EXTENT_DO_ACCOUNTING)) { + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents--; + spin_unlock(&BTRFS_I(inode)->lock); + } if (*bits & EXTENT_DO_ACCOUNTING) btrfs_delalloc_release_metadata(inode, len); @@ -1477,7 +1478,7 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; - if (is_free_space_inode(root, inode)) + if (btrfs_is_free_space_inode(root, inode)) ret = btrfs_bio_wq_end_io(root->fs_info, bio, 2); else ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); @@ -1726,7 +1727,7 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) return 0; BUG_ON(!ordered_extent); - nolock = is_free_space_inode(root, inode); + nolock = btrfs_is_free_space_inode(root, inode); if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) { BUG_ON(!list_empty(&ordered_extent->list)); @@ -2531,13 +2532,6 @@ static void btrfs_read_locked_inode(struct inode *inode) inode_item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)inode_item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - inode->i_mode = btrfs_inode_mode(leaf, inode_item); inode->i_nlink = btrfs_inode_nlink(leaf, inode_item); inode->i_uid = btrfs_inode_uid(leaf, inode_item); @@ -2575,11 +2569,6 @@ cache_acl: if (!maybe_acls) cache_no_acl(inode); - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } - btrfs_free_path(path); switch (inode->i_mode & S_IFMT) { @@ -2624,13 +2613,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, struct btrfs_inode_item *item, struct inode *inode) { - if (!leaf->map_token) - map_private_extent_buffer(leaf, (unsigned long)item, - sizeof(struct btrfs_inode_item), - &leaf->map_token, &leaf->kaddr, - &leaf->map_start, &leaf->map_len, - KM_USER1); - btrfs_set_inode_uid(leaf, item, inode->i_uid); btrfs_set_inode_gid(leaf, item, inode->i_gid); btrfs_set_inode_size(leaf, item, BTRFS_I(inode)->disk_i_size); @@ -2659,11 +2641,6 @@ static void fill_inode_item(struct btrfs_trans_handle *trans, btrfs_set_inode_rdev(leaf, item, inode->i_rdev); btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags); btrfs_set_inode_block_group(leaf, item, 0); - - if (leaf->map_token) { - unmap_extent_buffer(leaf, leaf->map_token, KM_USER1); - leaf->map_token = NULL; - } } /* @@ -2684,7 +2661,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, * The data relocation inode should also be directly updated * without delay */ - if (!is_free_space_inode(root, inode) + if (!btrfs_is_free_space_inode(root, inode) && root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) { ret = btrfs_delayed_update_inode(trans, root, inode); if (!ret) @@ -3398,7 +3375,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) ret = -ENOMEM; again: - page = grab_cache_page(mapping, index); + page = find_or_create_page(mapping, index, GFP_NOFS); if (!page) { btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); goto out; @@ -3634,7 +3611,7 @@ void btrfs_evict_inode(struct inode *inode) truncate_inode_pages(&inode->i_data, 0); if (inode->i_nlink && (btrfs_root_refs(&root->root_item) != 0 || - is_free_space_inode(root, inode))) + btrfs_is_free_space_inode(root, inode))) goto no_delete; if (is_bad_inode(inode)) { @@ -4271,7 +4248,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (BTRFS_I(inode)->dummy_inode) return 0; - if (btrfs_fs_closing(root->fs_info) && is_free_space_inode(root, inode)) + if (btrfs_fs_closing(root->fs_info) && btrfs_is_free_space_inode(root, inode)) nolock = true; if (wbc->sync_mode == WB_SYNC_ALL) { @@ -6728,8 +6705,9 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) ei->index_cnt = (u64)-1; ei->last_unlink_trans = 0; - atomic_set(&ei->outstanding_extents, 0); - atomic_set(&ei->reserved_extents, 0); + spin_lock_init(&ei->lock); + ei->outstanding_extents = 0; + ei->reserved_extents = 0; ei->ordered_data_close = 0; ei->orphan_meta_reserved = 0; @@ -6767,8 +6745,8 @@ void btrfs_destroy_inode(struct inode *inode) WARN_ON(!list_empty(&inode->i_dentry)); WARN_ON(inode->i_data.nrpages); - WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); - WARN_ON(atomic_read(&BTRFS_I(inode)->reserved_extents)); + WARN_ON(BTRFS_I(inode)->outstanding_extents); + WARN_ON(BTRFS_I(inode)->reserved_extents); /* * This can happen where we create an inode, but somebody else also @@ -6823,7 +6801,7 @@ int btrfs_drop_inode(struct inode *inode) struct btrfs_root *root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0 && - !is_free_space_inode(root, inode)) + !btrfs_is_free_space_inode(root, inode)) return 1; else return generic_drop_inode(inode); diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 622543309eb2..0b980afc5edd 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -859,8 +859,8 @@ again: /* step one, lock all the pages */ for (i = 0; i < num_pages; i++) { struct page *page; - page = grab_cache_page(inode->i_mapping, - start_index + i); + page = find_or_create_page(inode->i_mapping, + start_index + i, GFP_NOFS); if (!page) break; @@ -930,7 +930,9 @@ again: GFP_NOFS); if (i_done != num_pages) { - atomic_inc(&BTRFS_I(inode)->outstanding_extents); + spin_lock(&BTRFS_I(inode)->lock); + BTRFS_I(inode)->outstanding_extents++; + spin_unlock(&BTRFS_I(inode)->lock); btrfs_delalloc_release_space(inode, (num_pages - i_done) << PAGE_CACHE_SHIFT); } diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 66fa43dc3f0f..d77b67c4b275 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c @@ -24,185 +24,197 @@ #include "extent_io.h" #include "locking.h" -static inline void spin_nested(struct extent_buffer *eb) -{ - spin_lock(&eb->lock); -} +void btrfs_assert_tree_read_locked(struct extent_buffer *eb); /* - * Setting a lock to blocking will drop the spinlock and set the - * flag that forces other procs who want the lock to wait. After - * this you can safely schedule with the lock held. + * if we currently have a spinning reader or writer lock + * (indicated by the rw flag) this will bump the count + * of blocking holders and drop the spinlock. */ -void btrfs_set_lock_blocking(struct extent_buffer *eb) +void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - set_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - spin_unlock(&eb->lock); + if (rw == BTRFS_WRITE_LOCK) { + if (atomic_read(&eb->blocking_writers) == 0) { + WARN_ON(atomic_read(&eb->spinning_writers) != 1); + atomic_dec(&eb->spinning_writers); + btrfs_assert_tree_locked(eb); + atomic_inc(&eb->blocking_writers); + write_unlock(&eb->lock); + } + } else if (rw == BTRFS_READ_LOCK) { + btrfs_assert_tree_read_locked(eb); + atomic_inc(&eb->blocking_readers); + WARN_ON(atomic_read(&eb->spinning_readers) == 0); + atomic_dec(&eb->spinning_readers); + read_unlock(&eb->lock); } - /* exit with the spin lock released and the bit set */ + return; } /* - * clearing the blocking flag will take the spinlock again. - * After this you can't safely schedule + * if we currently have a blocking lock, take the spinlock + * and drop our blocking count */ -void btrfs_clear_lock_blocking(struct extent_buffer *eb) +void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw) { - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) { - spin_nested(eb); - clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags); - smp_mb__after_clear_bit(); + if (rw == BTRFS_WRITE_LOCK_BLOCKING) { + BUG_ON(atomic_read(&eb->blocking_writers) != 1); + write_lock(&eb->lock); + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_inc(&eb->spinning_writers); + if (atomic_dec_and_test(&eb->blocking_writers)) + wake_up(&eb->write_lock_wq); + } else if (rw == BTRFS_READ_LOCK_BLOCKING) { + BUG_ON(atomic_read(&eb->blocking_readers) == 0); + read_lock(&eb->lock); + atomic_inc(&eb->spinning_readers); + if (atomic_dec_and_test(&eb->blocking_readers)) + wake_up(&eb->read_lock_wq); } - /* exit with the spin lock held */ + return; } /* - * unfortunately, many of the places that currently set a lock to blocking - * don't end up blocking for very long, and often they don't block - * at all. For a dbench 50 run, if we don't spin on the blocking bit - * at all, the context switch rate can jump up to 400,000/sec or more. - * - * So, we're still stuck with this crummy spin on the blocking bit, - * at least until the most common causes of the short blocks - * can be dealt with. + * take a spinning read lock. This will wait for any blocking + * writers */ -static int btrfs_spin_on_block(struct extent_buffer *eb) +void btrfs_tree_read_lock(struct extent_buffer *eb) { - int i; - - for (i = 0; i < 512; i++) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - if (need_resched()) - break; - cpu_relax(); +again: + wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + wait_event(eb->write_lock_wq, + atomic_read(&eb->blocking_writers) == 0); + goto again; } - return 0; + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); } /* - * This is somewhat different from trylock. It will take the - * spinlock but if it finds the lock is set to blocking, it will - * return without the lock held. - * - * returns 1 if it was able to take the lock and zero otherwise - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers */ -int btrfs_try_spin_lock(struct extent_buffer *eb) +int btrfs_try_tree_read_lock(struct extent_buffer *eb) { - int i; + if (atomic_read(&eb->blocking_writers)) + return 0; - if (btrfs_spin_on_block(eb)) { - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); + read_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers)) { + read_unlock(&eb->lock); + return 0; } - /* spin for a bit on the BLOCKING flag */ - for (i = 0; i < 2; i++) { - cpu_relax(); - if (!btrfs_spin_on_block(eb)) - break; - - spin_nested(eb); - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 1; - spin_unlock(&eb->lock); - } - return 0; + atomic_inc(&eb->read_locks); + atomic_inc(&eb->spinning_readers); + return 1; } /* - * the autoremove wake function will return 0 if it tried to wake up - * a process that was already awake, which means that process won't - * count as an exclusive wakeup. The waitq code will continue waking - * procs until it finds one that was actually sleeping. - * - * For btrfs, this isn't quite what we want. We want a single proc - * to be notified that the lock is ready for taking. If that proc - * already happen to be awake, great, it will loop around and try for - * the lock. - * - * So, btrfs_wake_function always returns 1, even when the proc that we - * tried to wake up was already awake. + * returns 1 if we get the read lock and 0 if we don't + * this won't wait for blocking writers or readers */ -static int btrfs_wake_function(wait_queue_t *wait, unsigned mode, - int sync, void *key) +int btrfs_try_tree_write_lock(struct extent_buffer *eb) { - autoremove_wake_function(wait, mode, sync, key); + if (atomic_read(&eb->blocking_writers) || + atomic_read(&eb->blocking_readers)) + return 0; + write_lock(&eb->lock); + if (atomic_read(&eb->blocking_writers) || + atomic_read(&eb->blocking_readers)) { + write_unlock(&eb->lock); + return 0; + } + atomic_inc(&eb->write_locks); + atomic_inc(&eb->spinning_writers); return 1; } /* - * returns with the extent buffer spinlocked. - * - * This will spin and/or wait as required to take the lock, and then - * return with the spinlock held. - * - * After this call, scheduling is not safe without first calling - * btrfs_set_lock_blocking() + * drop a spinning read lock + */ +void btrfs_tree_read_unlock(struct extent_buffer *eb) +{ + btrfs_assert_tree_read_locked(eb); + WARN_ON(atomic_read(&eb->spinning_readers) == 0); + atomic_dec(&eb->spinning_readers); + atomic_dec(&eb->read_locks); + read_unlock(&eb->lock); +} + +/* + * drop a blocking read lock + */ +void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb) +{ + btrfs_assert_tree_read_locked(eb); + WARN_ON(atomic_read(&eb->blocking_readers) == 0); + if (atomic_dec_and_test(&eb->blocking_readers)) + wake_up(&eb->read_lock_wq); + atomic_dec(&eb->read_locks); +} + +/* + * take a spinning write lock. This will wait for both + * blocking readers or writers */ int btrfs_tree_lock(struct extent_buffer *eb) { - DEFINE_WAIT(wait); - wait.func = btrfs_wake_function; - - if (!btrfs_spin_on_block(eb)) - goto sleep; - - while(1) { - spin_nested(eb); - - /* nobody is blocking, exit with the spinlock held */ - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - return 0; - - /* - * we have the spinlock, but the real owner is blocking. - * wait for them - */ - spin_unlock(&eb->lock); - - /* - * spin for a bit, and if the blocking flag goes away, - * loop around - */ - cpu_relax(); - if (btrfs_spin_on_block(eb)) - continue; -sleep: - prepare_to_wait_exclusive(&eb->lock_wq, &wait, - TASK_UNINTERRUPTIBLE); - - if (test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - schedule(); - - finish_wait(&eb->lock_wq, &wait); +again: + wait_event(eb->read_lock_wq, atomic_read(&eb->blocking_readers) == 0); + wait_event(eb->write_lock_wq, atomic_read(&eb->blocking_writers) == 0); + write_lock(&eb->lock); + if (atomic_read(&eb->blocking_readers)) { + write_unlock(&eb->lock); + wait_event(eb->read_lock_wq, + atomic_read(&eb->blocking_readers) == 0); + goto again; } + if (atomic_read(&eb->blocking_writers)) { + write_unlock(&eb->lock); + wait_event(eb->write_lock_wq, + atomic_read(&eb->blocking_writers) == 0); + goto again; + } + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_inc(&eb->spinning_writers); + atomic_inc(&eb->write_locks); return 0; } +/* + * drop a spinning or a blocking write lock. + */ int btrfs_tree_unlock(struct extent_buffer *eb) { - /* - * if we were a blocking owner, we don't have the spinlock held - * just clear the bit and look for waiters - */ - if (test_and_clear_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - smp_mb__after_clear_bit(); - else - spin_unlock(&eb->lock); - - if (waitqueue_active(&eb->lock_wq)) - wake_up(&eb->lock_wq); + int blockers = atomic_read(&eb->blocking_writers); + + BUG_ON(blockers > 1); + + btrfs_assert_tree_locked(eb); + atomic_dec(&eb->write_locks); + + if (blockers) { + WARN_ON(atomic_read(&eb->spinning_writers)); + atomic_dec(&eb->blocking_writers); + smp_wmb(); + wake_up(&eb->write_lock_wq); + } else { + WARN_ON(atomic_read(&eb->spinning_writers) != 1); + atomic_dec(&eb->spinning_writers); + write_unlock(&eb->lock); + } return 0; } void btrfs_assert_tree_locked(struct extent_buffer *eb) { - if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) - assert_spin_locked(&eb->lock); + BUG_ON(!atomic_read(&eb->write_locks)); +} + +void btrfs_assert_tree_read_locked(struct extent_buffer *eb) +{ + BUG_ON(!atomic_read(&eb->read_locks)); } diff --git a/fs/btrfs/locking.h b/fs/btrfs/locking.h index 5c33a560a2f1..17247ddb81a0 100644 --- a/fs/btrfs/locking.h +++ b/fs/btrfs/locking.h @@ -19,11 +19,43 @@ #ifndef __BTRFS_LOCKING_ #define __BTRFS_LOCKING_ +#define BTRFS_WRITE_LOCK 1 +#define BTRFS_READ_LOCK 2 +#define BTRFS_WRITE_LOCK_BLOCKING 3 +#define BTRFS_READ_LOCK_BLOCKING 4 + int btrfs_tree_lock(struct extent_buffer *eb); int btrfs_tree_unlock(struct extent_buffer *eb); int btrfs_try_spin_lock(struct extent_buffer *eb); -void btrfs_set_lock_blocking(struct extent_buffer *eb); -void btrfs_clear_lock_blocking(struct extent_buffer *eb); +void btrfs_tree_read_lock(struct extent_buffer *eb); +void btrfs_tree_read_unlock(struct extent_buffer *eb); +void btrfs_tree_read_unlock_blocking(struct extent_buffer *eb); +void btrfs_set_lock_blocking_rw(struct extent_buffer *eb, int rw); +void btrfs_clear_lock_blocking_rw(struct extent_buffer *eb, int rw); void btrfs_assert_tree_locked(struct extent_buffer *eb); +int btrfs_try_tree_read_lock(struct extent_buffer *eb); +int btrfs_try_tree_write_lock(struct extent_buffer *eb); + +static inline void btrfs_tree_unlock_rw(struct extent_buffer *eb, int rw) +{ + if (rw == BTRFS_WRITE_LOCK || rw == BTRFS_WRITE_LOCK_BLOCKING) + btrfs_tree_unlock(eb); + else if (rw == BTRFS_READ_LOCK_BLOCKING) + btrfs_tree_read_unlock_blocking(eb); + else if (rw == BTRFS_READ_LOCK) + btrfs_tree_read_unlock(eb); + else + BUG(); +} + +static inline void btrfs_set_lock_blocking(struct extent_buffer *eb) +{ + btrfs_set_lock_blocking_rw(eb, BTRFS_WRITE_LOCK); +} + +static inline void btrfs_clear_lock_blocking(struct extent_buffer *eb) +{ + btrfs_clear_lock_blocking_rw(eb, BTRFS_WRITE_LOCK_BLOCKING); +} #endif diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 5e0a3dc79a45..59bb1764273d 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -2955,7 +2955,8 @@ static int relocate_file_extent_cluster(struct inode *inode, page_cache_sync_readahead(inode->i_mapping, ra, NULL, index, last_index + 1 - index); - page = grab_cache_page(inode->i_mapping, index); + page = find_or_create_page(inode->i_mapping, index, + GFP_NOFS); if (!page) { btrfs_delalloc_release_metadata(inode, PAGE_CACHE_SIZE); diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c index c0f7ecaf1e79..bc1f6ad18442 100644 --- a/fs/btrfs/struct-funcs.c +++ b/fs/btrfs/struct-funcs.c @@ -50,36 +50,22 @@ u##bits btrfs_##name(struct extent_buffer *eb, \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ type *p; \ - /* ugly, but we want the fast path here */ \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - p = (type *)(eb->kaddr + part_offset - eb->map_start); \ - return le##bits##_to_cpu(p->member); \ - } \ - { \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - u##bits res; \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - if (err) { \ - __le##bits leres; \ - read_eb_member(eb, s, type, member, &leres); \ - return le##bits##_to_cpu(leres); \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - res = le##bits##_to_cpu(p->member); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - return res; \ - } \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + u##bits res; \ + err = map_private_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits leres; \ + read_eb_member(eb, s, type, member, &leres); \ + return le##bits##_to_cpu(leres); \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + res = le##bits##_to_cpu(p->member); \ + return res; \ } \ void btrfs_set_##name(struct extent_buffer *eb, \ type *s, u##bits val) \ @@ -87,36 +73,21 @@ void btrfs_set_##name(struct extent_buffer *eb, \ unsigned long part_offset = (unsigned long)s; \ unsigned long offset = part_offset + offsetof(type, member); \ type *p; \ - /* ugly, but we want the fast path here */ \ - if (eb->map_token && offset >= eb->map_start && \ - offset + sizeof(((type *)0)->member) <= eb->map_start + \ - eb->map_len) { \ - p = (type *)(eb->kaddr + part_offset - eb->map_start); \ - p->member = cpu_to_le##bits(val); \ - return; \ - } \ - { \ - int err; \ - char *map_token; \ - char *kaddr; \ - int unmap_on_exit = (eb->map_token == NULL); \ - unsigned long map_start; \ - unsigned long map_len; \ - err = map_extent_buffer(eb, offset, \ - sizeof(((type *)0)->member), \ - &map_token, &kaddr, \ - &map_start, &map_len, KM_USER1); \ - if (err) { \ - __le##bits val2; \ - val2 = cpu_to_le##bits(val); \ - write_eb_member(eb, s, type, member, &val2); \ - return; \ - } \ - p = (type *)(kaddr + part_offset - map_start); \ - p->member = cpu_to_le##bits(val); \ - if (unmap_on_exit) \ - unmap_extent_buffer(eb, map_token, KM_USER1); \ - } \ + int err; \ + char *kaddr; \ + unsigned long map_start; \ + unsigned long map_len; \ + err = map_private_extent_buffer(eb, offset, \ + sizeof(((type *)0)->member), \ + &kaddr, &map_start, &map_len); \ + if (err) { \ + __le##bits val2; \ + val2 = cpu_to_le##bits(val); \ + write_eb_member(eb, s, type, member, &val2); \ + return; \ + } \ + p = (type *)(kaddr + part_offset - map_start); \ + p->member = cpu_to_le##bits(val); \ } #include "ctree.h" @@ -125,15 +96,6 @@ void btrfs_node_key(struct extent_buffer *eb, struct btrfs_disk_key *disk_key, int nr) { unsigned long ptr = btrfs_node_key_ptr_offset(nr); - if (eb->map_token && ptr >= eb->map_start && - ptr + sizeof(*disk_key) <= eb->map_start + eb->map_len) { - memcpy(disk_key, eb->kaddr + ptr - eb->map_start, - sizeof(*disk_key)); - return; - } else if (eb->map_token) { - unmap_extent_buffer(eb, eb->map_token, KM_USER1); - eb->map_token = NULL; - } read_eb_member(eb, (struct btrfs_key_ptr *)ptr, struct btrfs_key_ptr, key, disk_key); } diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 51dcec86757f..eb55863bb4ae 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -260,7 +260,7 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, { struct btrfs_trans_handle *h; struct btrfs_transaction *cur_trans; - int retries = 0; + u64 num_bytes = 0; int ret; if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) @@ -274,6 +274,19 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, h->block_rsv = NULL; goto got_it; } + + /* + * Do the reservation before we join the transaction so we can do all + * the appropriate flushing if need be. + */ + if (num_items > 0 && root != root->fs_info->chunk_root) { + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_block_rsv_add(NULL, root, + &root->fs_info->trans_block_rsv, + num_bytes); + if (ret) + return ERR_PTR(ret); + } again: h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); if (!h) @@ -310,24 +323,9 @@ again: goto again; } - if (num_items > 0) { - ret = btrfs_trans_reserve_metadata(h, root, num_items); - if (ret == -EAGAIN && !retries) { - retries++; - btrfs_commit_transaction(h, root); - goto again; - } else if (ret == -EAGAIN) { - /* - * We have already retried and got EAGAIN, so really we - * don't have space, so set ret to -ENOSPC. - */ - ret = -ENOSPC; - } - - if (ret < 0) { - btrfs_end_transaction(h, root); - return ERR_PTR(ret); - } + if (num_bytes) { + h->block_rsv = &root->fs_info->trans_block_rsv; + h->bytes_reserved = num_bytes; } got_it: @@ -499,10 +497,17 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, } if (lock && cur_trans->blocked && !cur_trans->in_commit) { - if (throttle) + if (throttle) { + /* + * We may race with somebody else here so end up having + * to call end_transaction on ourselves again, so inc + * our use_count. + */ + trans->use_count++; return btrfs_commit_transaction(trans, root); - else + } else { wake_up_process(info->transaction_kthread); + } } WARN_ON(cur_trans != info->running_transaction); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 4ce8a9f41d1e..ac278dd83175 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -1730,8 +1730,8 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, btrfs_read_buffer(next, ptr_gen); btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1796,8 +1796,8 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[*level]; btrfs_tree_lock(next); - clean_tree_block(trans, root, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, root, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); @@ -1864,8 +1864,8 @@ static int walk_log_tree(struct btrfs_trans_handle *trans, next = path->nodes[orig_level]; btrfs_tree_lock(next); - clean_tree_block(trans, log, next); btrfs_set_lock_blocking(next); + clean_tree_block(trans, log, next); btrfs_wait_tree_block_writeback(next); btrfs_tree_unlock(next); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 19450bc53632..b89e372c7544 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3595,7 +3595,7 @@ int btrfs_read_sys_array(struct btrfs_root *root) if (!sb) return -ENOMEM; btrfs_set_buffer_uptodate(sb); - btrfs_set_buffer_lockdep_class(sb, 0); + btrfs_set_buffer_lockdep_class(root->root_key.objectid, sb, 0); write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE); array_size = btrfs_super_sys_array_size(super_copy); diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 5366fe452ab0..d733b9cfea34 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c @@ -102,43 +102,57 @@ static int do_setxattr(struct btrfs_trans_handle *trans, if (!path) return -ENOMEM; - /* first lets see if we already have this xattr */ - di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, - strlen(name), -1); - if (IS_ERR(di)) { - ret = PTR_ERR(di); - goto out; - } - - /* ok we already have this xattr, lets remove it */ - if (di) { - /* if we want create only exit */ - if (flags & XATTR_CREATE) { - ret = -EEXIST; + if (flags & XATTR_REPLACE) { + di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), name, + name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } else if (!di) { + ret = -ENODATA; goto out; } - ret = btrfs_delete_one_dir_name(trans, root, path, di); - BUG_ON(ret); + if (ret) + goto out; btrfs_release_path(path); + } - /* if we don't have a value then we are removing the xattr */ - if (!value) +again: + ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), + name, name_len, value, size); + if (ret == -EEXIST) { + if (flags & XATTR_CREATE) goto out; - } else { + /* + * We can't use the path we already have since we won't have the + * proper locking for a delete, so release the path and + * re-lookup to delete the thing. + */ btrfs_release_path(path); + di = btrfs_lookup_xattr(trans, root, path, btrfs_ino(inode), + name, name_len, -1); + if (IS_ERR(di)) { + ret = PTR_ERR(di); + goto out; + } else if (!di) { + /* Shouldn't happen but just in case... */ + btrfs_release_path(path); + goto again; + } - if (flags & XATTR_REPLACE) { - /* we couldn't find the attr to replace */ - ret = -ENODATA; + ret = btrfs_delete_one_dir_name(trans, root, path, di); + if (ret) goto out; + + /* + * We have a value to set, so go back and try to insert it now. + */ + if (value) { + btrfs_release_path(path); + goto again; } } - - /* ok we have to create a completely new xattr */ - ret = btrfs_insert_xattr_item(trans, root, path, btrfs_ino(inode), - name, name_len, value, size); - BUG_ON(ret); out: btrfs_free_path(path); return ret; diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h index 43c7c43b06f5..b36c5572b3f3 100644 --- a/fs/ecryptfs/ecryptfs_kernel.h +++ b/fs/ecryptfs/ecryptfs_kernel.h @@ -29,6 +29,7 @@ #define ECRYPTFS_KERNEL_H #include <keys/user-type.h> +#include <keys/encrypted-type.h> #include <linux/fs.h> #include <linux/fs_stack.h> #include <linux/namei.h> @@ -36,125 +37,18 @@ #include <linux/hash.h> #include <linux/nsproxy.h> #include <linux/backing-dev.h> +#include <linux/ecryptfs.h> -/* Version verification for shared data structures w/ userspace */ -#define ECRYPTFS_VERSION_MAJOR 0x00 -#define ECRYPTFS_VERSION_MINOR 0x04 -#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 -/* These flags indicate which features are supported by the kernel - * module; userspace tools such as the mount helper read - * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine - * how to behave. */ -#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 -#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 -#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 -#define ECRYPTFS_VERSIONING_POLICY 0x00000008 -#define ECRYPTFS_VERSIONING_XATTR 0x00000010 -#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 -#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 -#define ECRYPTFS_VERSIONING_HMAC 0x00000080 -#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 -#define ECRYPTFS_VERSIONING_GCM 0x00000200 -#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ - | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ - | ECRYPTFS_VERSIONING_PUBKEY \ - | ECRYPTFS_VERSIONING_XATTR \ - | ECRYPTFS_VERSIONING_MULTKEY \ - | ECRYPTFS_VERSIONING_DEVMISC \ - | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) -#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 -#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH -#define ECRYPTFS_SALT_SIZE 8 -#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) -/* The original signature size is only for what is stored on disk; all - * in-memory representations are expanded hex, so it better adapted to - * be passed around or referenced on the command line */ -#define ECRYPTFS_SIG_SIZE 8 -#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) -#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX -#define ECRYPTFS_MAX_KEY_BYTES 64 -#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 #define ECRYPTFS_DEFAULT_IV_BYTES 16 -#define ECRYPTFS_FILE_VERSION 0x03 #define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096 #define ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE 8192 #define ECRYPTFS_DEFAULT_MSG_CTX_ELEMS 32 #define ECRYPTFS_DEFAULT_SEND_TIMEOUT HZ #define ECRYPTFS_MAX_MSG_CTX_TTL (HZ*3) -#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 #define ECRYPTFS_DEFAULT_NUM_USERS 4 #define ECRYPTFS_MAX_NUM_USERS 32768 #define ECRYPTFS_XATTR_NAME "user.ecryptfs" -#define RFC2440_CIPHER_DES3_EDE 0x02 -#define RFC2440_CIPHER_CAST_5 0x03 -#define RFC2440_CIPHER_BLOWFISH 0x04 -#define RFC2440_CIPHER_AES_128 0x07 -#define RFC2440_CIPHER_AES_192 0x08 -#define RFC2440_CIPHER_AES_256 0x09 -#define RFC2440_CIPHER_TWOFISH 0x0a -#define RFC2440_CIPHER_CAST_6 0x0b - -#define RFC2440_CIPHER_RSA 0x01 - -/** - * For convenience, we may need to pass around the encrypted session - * key between kernel and userspace because the authentication token - * may not be extractable. For example, the TPM may not release the - * private key, instead requiring the encrypted data and returning the - * decrypted data. - */ -struct ecryptfs_session_key { -#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 -#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 -#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 -#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 - u32 flags; - u32 encrypted_key_size; - u32 decrypted_key_size; - u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; - u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; -}; - -struct ecryptfs_password { - u32 password_bytes; - s32 hash_algo; - u32 hash_iterations; - u32 session_key_encryption_key_bytes; -#define ECRYPTFS_PERSISTENT_PASSWORD 0x01 -#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 - u32 flags; - /* Iterated-hash concatenation of salt and passphrase */ - u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; - u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; - /* Always in expanded hex */ - u8 salt[ECRYPTFS_SALT_SIZE]; -}; - -enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; - -struct ecryptfs_private_key { - u32 key_size; - u32 data_len; - u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; - char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1]; - u8 data[]; -}; - -/* May be a password or a private key */ -struct ecryptfs_auth_tok { - u16 version; /* 8-bit major and 8-bit minor */ - u16 token_type; -#define ECRYPTFS_ENCRYPT_ONLY 0x00000001 - u32 flags; - struct ecryptfs_session_key session_key; - u8 reserved[32]; - union { - struct ecryptfs_password password; - struct ecryptfs_private_key private_key; - } token; -} __attribute__ ((packed)); - void ecryptfs_dump_auth_tok(struct ecryptfs_auth_tok *auth_tok); extern void ecryptfs_to_hex(char *dst, char *src, size_t src_size); extern void ecryptfs_from_hex(char *dst, char *src, int dst_size); @@ -185,11 +79,47 @@ struct ecryptfs_page_crypt_context { } param; }; +#if defined(CONFIG_ENCRYPTED_KEYS) || defined(CONFIG_ENCRYPTED_KEYS_MODULE) +static inline struct ecryptfs_auth_tok * +ecryptfs_get_encrypted_key_payload_data(struct key *key) +{ + if (key->type == &key_type_encrypted) + return (struct ecryptfs_auth_tok *) + (&((struct encrypted_key_payload *)key->payload.data)->payload_data); + else + return NULL; +} + +static inline struct key *ecryptfs_get_encrypted_key(char *sig) +{ + return request_key(&key_type_encrypted, sig, NULL); +} + +#else +static inline struct ecryptfs_auth_tok * +ecryptfs_get_encrypted_key_payload_data(struct key *key) +{ + return NULL; +} + +static inline struct key *ecryptfs_get_encrypted_key(char *sig) +{ + return ERR_PTR(-ENOKEY); +} + +#endif /* CONFIG_ENCRYPTED_KEYS */ + static inline struct ecryptfs_auth_tok * ecryptfs_get_key_payload_data(struct key *key) { - return (struct ecryptfs_auth_tok *) - (((struct user_key_payload*)key->payload.data)->data); + struct ecryptfs_auth_tok *auth_tok; + + auth_tok = ecryptfs_get_encrypted_key_payload_data(key); + if (!auth_tok) + return (struct ecryptfs_auth_tok *) + (((struct user_key_payload *)key->payload.data)->data); + else + return auth_tok; } #define ECRYPTFS_MAX_KEYSET_SIZE 1024 diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index fa8049ecdc64..c47253350123 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -1635,11 +1635,14 @@ int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key, (*auth_tok_key) = request_key(&key_type_user, sig, NULL); if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { - printk(KERN_ERR "Could not find key with description: [%s]\n", - sig); - rc = process_request_key_err(PTR_ERR(*auth_tok_key)); - (*auth_tok_key) = NULL; - goto out; + (*auth_tok_key) = ecryptfs_get_encrypted_key(sig); + if (!(*auth_tok_key) || IS_ERR(*auth_tok_key)) { + printk(KERN_ERR "Could not find key with description: [%s]\n", + sig); + rc = process_request_key_err(PTR_ERR(*auth_tok_key)); + (*auth_tok_key) = NULL; + goto out; + } } down_write(&(*auth_tok_key)->sem); rc = ecryptfs_verify_auth_tok_from_key(*auth_tok_key, auth_tok); diff --git a/fs/ext2/acl.h b/fs/ext2/acl.h index 5c0a6a4fb052..503bfb0ed79b 100644 --- a/fs/ext2/acl.h +++ b/fs/ext2/acl.h @@ -61,7 +61,6 @@ extern int ext2_init_acl (struct inode *, struct inode *); #else #include <linux/sched.h> #define ext2_get_acl NULL -#define ext2_get_acl NULL #define ext2_set_acl NULL static inline int diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 516516e0c2a2..3bc073a4cf82 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -1018,13 +1018,13 @@ hostdata_error: fsname++; if (lm->lm_mount == NULL) { fs_info(sdp, "Now mounting FS...\n"); - complete(&sdp->sd_locking_init); + complete_all(&sdp->sd_locking_init); return 0; } ret = lm->lm_mount(sdp, fsname); if (ret == 0) fs_info(sdp, "Joined cluster. Now mounting FS...\n"); - complete(&sdp->sd_locking_init); + complete_all(&sdp->sd_locking_init); return ret; } diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c index 4496872cf4e7..9cbd11a3f804 100644 --- a/fs/jfs/jfs_dmap.c +++ b/fs/jfs/jfs_dmap.c @@ -3161,7 +3161,7 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, { int rc; int dbitno, word, rembits, nb, nwords, wbitno, agno; - s8 oldroot, *leaf; + s8 oldroot; struct dmaptree *tp = (struct dmaptree *) & dp->tree; /* save the current value of the root (i.e. maximum free string) @@ -3169,9 +3169,6 @@ static int dbAllocDmapBU(struct bmap * bmp, struct dmap * dp, s64 blkno, */ oldroot = tp->stree[ROOT]; - /* pick up a pointer to the leaves of the dmap tree */ - leaf = tp->stree + LEAFIND; - /* determine the bit number and word within the dmap of the * starting block. */ diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index f6cc0c09ec63..af9606057dde 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1143,7 +1143,6 @@ int txCommit(tid_t tid, /* transaction identifier */ struct jfs_log *log; struct tblock *tblk; struct lrd *lrd; - int lsn; struct inode *ip; struct jfs_inode_info *jfs_ip; int k, n; @@ -1310,7 +1309,7 @@ int txCommit(tid_t tid, /* transaction identifier */ */ lrd->type = cpu_to_le16(LOG_COMMIT); lrd->length = 0; - lsn = lmLog(log, tblk, lrd, NULL); + lmLog(log, tblk, lrd, NULL); lmGroupCommit(log, tblk); @@ -2935,7 +2934,6 @@ int jfs_sync(void *arg) { struct inode *ip; struct jfs_inode_info *jfs_ip; - int rc; tid_t tid; do { @@ -2961,7 +2959,7 @@ int jfs_sync(void *arg) */ TXN_UNLOCK(); tid = txBegin(ip->i_sb, COMMIT_INODE); - rc = txCommit(tid, 1, &ip, 0); + txCommit(tid, 1, &ip, 0); txEnd(tid); mutex_unlock(&jfs_ip->commit_mutex); diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index 29b1f1a21142..e17545e15664 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -893,7 +893,7 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, unchar *i_fastsymlink; s64 xlen = 0; int bmask = 0, xsize; - s64 extent = 0, xaddr; + s64 xaddr; struct metapage *mp; struct super_block *sb; struct tblock *tblk; @@ -993,7 +993,6 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry, txAbort(tid, 0); goto out3; } - extent = xaddr; ip->i_size = ssize - 1; while (ssize) { /* This is kind of silly since PATH_MAX == 4K */ diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c index e374050a911c..8392cb85bd54 100644 --- a/fs/lockd/clntproc.c +++ b/fs/lockd/clntproc.c @@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc) /* We appear to be out of the grace period */ wake_up_all(&host->h_gracewait); } - dprintk("lockd: server returns status %d\n", resp->status); + dprintk("lockd: server returns status %d\n", + ntohl(resp->status)); return 0; /* Okay, call complete */ } @@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl) goto out; if (resp->status != nlm_lck_denied_nolocks) - printk("lockd: unexpected unlock status: %d\n", resp->status); + printk("lockd: unexpected unlock status: %d\n", + ntohl(resp->status)); /* What to do now? I'm out of my depth... */ status = -ENOLCK; out: @@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status) return -ENOLCK; #endif } - printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); + printk(KERN_NOTICE "lockd: unexpected server status %d\n", + ntohl(status)); return -ENOLCK; } diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 81515545ba75..2cde5d954750 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -77,6 +77,7 @@ config NFS_V4 config NFS_V4_1 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" depends on NFS_FS && NFS_V4 && EXPERIMENTAL + select SUNRPC_BACKCHANNEL select PNFS_FILE_LAYOUT help This option enables support for minor version 1 of the NFSv4 protocol diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index d4d1954e9bb9..74780f9f852c 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf static u32 initiate_file_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; bool found = false; @@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp, LIST_HEAD(free_me_list); spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { - if (nfs_compare_fh(&args->cbl_fh, - &NFS_I(lo->plh_inode)->fh)) - continue; - ino = igrab(lo->plh_inode); - if (!ino) - continue; - found = true; - /* Without this, layout can be freed as soon - * as we release cl_lock. - */ - get_layout_hdr(lo); - break; + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (nfs_compare_fh(&args->cbl_fh, + &NFS_I(lo->plh_inode)->fh)) + continue; + ino = igrab(lo->plh_inode); + if (!ino) + continue; + found = true; + /* Without this, layout can be freed as soon + * as we release cl_lock. + */ + get_layout_hdr(lo); + break; + } + if (found) + break; } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + if (!found) return NFS4ERR_NOMATCHING_LAYOUT; @@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp, static u32 initiate_bulk_draining(struct nfs_client *clp, struct cb_layoutrecallargs *args) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; struct inode *ino; u32 rv = NFS4ERR_NOMATCHING_LAYOUT; @@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp, }; spin_lock(&clp->cl_lock); - list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { if ((args->cbl_recall_type == RETURN_FSID) && - memcmp(&NFS_SERVER(lo->plh_inode)->fsid, - &args->cbl_fsid, sizeof(struct nfs_fsid))) - continue; - if (!igrab(lo->plh_inode)) + memcmp(&server->fsid, &args->cbl_fsid, + sizeof(struct nfs_fsid))) continue; - get_layout_hdr(lo); - BUG_ON(!list_empty(&lo->plh_bulk_recall)); - list_add(&lo->plh_bulk_recall, &recall_list); + + list_for_each_entry(lo, &server->layouts, plh_layouts) { + if (!igrab(lo->plh_inode)) + continue; + get_layout_hdr(lo); + BUG_ON(!list_empty(&lo->plh_bulk_recall)); + list_add(&lo->plh_bulk_recall, &recall_list); + } } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); + list_for_each_entry_safe(lo, tmp, &recall_list, plh_bulk_recall) { ino = lo->plh_inode; diff --git a/fs/nfs/client.c b/fs/nfs/client.c index b3dc2b88b65b..19ea7d9c75e6 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_ cred = rpc_lookup_machine_cred(); if (!IS_ERR(cred)) clp->cl_machine_cred = cred; -#if defined(CONFIG_NFS_V4_1) - INIT_LIST_HEAD(&clp->cl_layouts); -#endif nfs_fscache_get_client_cookie(clp); return clp; @@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp) nfs4_deviceid_purge_client(clp); kfree(clp->cl_hostname); + kfree(clp->server_scope); kfree(clp); dprintk("<-- nfs_free_client()\n"); @@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void) INIT_LIST_HEAD(&server->client_link); INIT_LIST_HEAD(&server->master_link); INIT_LIST_HEAD(&server->delegations); + INIT_LIST_HEAD(&server->layouts); atomic_set(&server->active, 0); @@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, dprintk("<-- %s %p\n", __func__, clp); return clp; } -EXPORT_SYMBOL(nfs4_set_ds_client); +EXPORT_SYMBOL_GPL(nfs4_set_ds_client); /* * Session has been established, and the client marked ready. diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c index dd25c2aec375..321a66bc3846 100644 --- a/fs/nfs/delegation.c +++ b/fs/nfs/delegation.c @@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode) return err; } -static void nfs_mark_return_delegation(struct nfs_delegation *delegation) +static void nfs_mark_return_delegation(struct nfs_server *server, + struct nfs_delegation *delegation) { - struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client; - set_bit(NFS_DELEGATION_RETURN, &delegation->flags); - set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); + set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state); } /** @@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server, if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) continue; if (delegation->type & flags) - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server) list_for_each_entry_rcu(delegation, &server->delegations, super_list) { if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) continue; - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); } } @@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp) int nfs_async_inode_return_delegation(struct inode *inode, const nfs4_stateid *stateid) { - struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; + struct nfs_server *server = NFS_SERVER(inode); + struct nfs_client *clp = server->nfs_client; struct nfs_delegation *delegation; rcu_read_lock(); @@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode, rcu_read_unlock(); return -ENOENT; } - nfs_mark_return_delegation(delegation); + nfs_mark_return_delegation(server, delegation); rcu_read_unlock(); nfs_delegation_run_state_manager(clp); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2a55347a2daa..ab12913dd473 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb); extern char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen); extern struct vfsmount *nfs_d_automount(struct path *path); +#ifdef CONFIG_NFS_V4 +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *); +#endif /* getroot.c */ extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, @@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *, extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); #endif +struct nfs_pageio_descriptor; /* read.c */ extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, const struct rpc_call_ops *call_ops); extern void nfs_read_prepare(struct rpc_task *task, void *calldata); +extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, + struct list_head *head); + +extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_readdata_release(struct nfs_read_data *rdata); /* write.c */ +extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc, + struct list_head *head); +extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio); +extern void nfs_writedata_release(struct nfs_write_data *wdata); extern void nfs_commit_free(struct nfs_write_data *p); extern int nfs_initiate_write(struct nfs_write_data *data, struct rpc_clnt *clnt, diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1f063bacd285..8102391bb374 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -119,7 +119,7 @@ Elong: } #ifdef CONFIG_NFS_V4 -static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) +rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) { struct gss_api_mech *mech; struct xdr_netobj oid; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index b788f2eb1ba0..1909ee8be350 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -48,6 +48,7 @@ enum nfs4_client_state { NFS4CLNT_SESSION_RESET, NFS4CLNT_RECALL_SLOT, NFS4CLNT_LEASE_CONFIRM, + NFS4CLNT_SERVER_SCOPE_MISMATCH, }; enum nfs4_session_state { @@ -66,6 +67,8 @@ struct nfs4_minor_version_ops { int cache_reply); int (*validate_stateid)(struct nfs_delegation *, const nfs4_stateid *); + int (*find_root_sec)(struct nfs_server *, struct nfs_fh *, + struct nfs_fsinfo *); const struct nfs4_state_recovery_ops *reboot_recovery_ops; const struct nfs4_state_recovery_ops *nograce_recovery_ops; const struct nfs4_state_maintenance_ops *state_renewal_ops; @@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *); extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); extern void nfs41_handle_recall_slot(struct nfs_client *clp); +extern void nfs41_handle_server_scope(struct nfs_client *, + struct server_scope **); extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c index f9d03abcd04c..be93a622872c 100644 --- a/fs/nfs/nfs4filelayout.c +++ b/fs/nfs/nfs4filelayout.c @@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data) __func__, data->inode->i_ino, data->args.pgbase, (size_t)data->args.count, offset); + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s USE DS:ip %x %hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr); /* No multipath support. Use first DS */ data->ds_clp = ds->ds_clp; @@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) struct nfs_fh *fh; int status; + if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags)) + return PNFS_NOT_ATTEMPTED; + /* Retrieve the correct rpc_client for the byte range */ j = nfs4_fl_calc_j_index(lseg, offset); idx = nfs4_fl_calc_ds_index(lseg, j); @@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync) set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); return PNFS_NOT_ATTEMPTED; } - dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, + dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__, data->inode->i_ino, sync, (size_t) data->args.count, offset, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + ds->ds_remotestr); data->write_done_cb = filelayout_write_done_cb; data->ds_clp = ds->ds_clp; @@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); + /* FIXME: remove this check when layout segment support is added */ + if (lgr->range.offset != 0 || + lgr->range.length != NFS4_MAX_UINT64) { + dprintk("%s Only whole file layouts supported. Use MDS i/o\n", + __func__); + goto out; + } + if (fl->pattern_offset > lgr->range.offset) { dprintk("%s pattern_offset %lld too large\n", __func__, fl->pattern_offset); @@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo, goto out; } else dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); + /* Found deviceid is being reaped */ + if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) + goto out_put; + fl->dsaddr = dsaddr; if (fl->first_stripe_index < 0 || @@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid, * return true : coalesce page * return false : don't coalesce page */ -bool +static bool filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req) { @@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, !nfs_generic_pg_test(pgio, prev, req)) return false; - if (!pgio->pg_lseg) - return 1; p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; @@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, return (p_stripe == r_stripe); } +void +filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_read_mds(pgio); +} + +void +filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, + struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + 0, + NFS4_MAX_UINT64, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_write_mds(pgio); +} + +static const struct nfs_pageio_ops filelayout_pg_read_ops = { + .pg_init = filelayout_pg_init_read, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops filelayout_pg_write_ops = { + .pg_init = filelayout_pg_init_write, + .pg_test = filelayout_pg_test, + .pg_doio = pnfs_generic_pg_writepages, +}; + static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) { return !FILELAYOUT_LSEG(lseg)->commit_through_mds; @@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = { .owner = THIS_MODULE, .alloc_lseg = filelayout_alloc_lseg, .free_lseg = filelayout_free_lseg, - .pg_test = filelayout_pg_test, + .pg_read_ops = &filelayout_pg_read_ops, + .pg_write_ops = &filelayout_pg_write_ops, .mark_pnfs_commit = filelayout_mark_pnfs_commit, .choose_commit_list = filelayout_choose_commit_list, .commit_pagelist = filelayout_commit_pagelist, @@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void) pnfs_unregister_layoutdriver(&filelayout_type); } +MODULE_ALIAS("nfs-layouttype4-1"); + module_init(nfs4filelayout_init); module_exit(nfs4filelayout_exit); diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h index cebe01e3795e..2e42284253fa 100644 --- a/fs/nfs/nfs4filelayout.h +++ b/fs/nfs/nfs4filelayout.h @@ -47,10 +47,17 @@ enum stripetype4 { }; /* Individual ip address */ +struct nfs4_pnfs_ds_addr { + struct sockaddr_storage da_addr; + size_t da_addrlen; + struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */ + char *da_remotestr; /* human readable addr+port */ +}; + struct nfs4_pnfs_ds { struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ - u32 ds_ip_addr; - u32 ds_port; + char *ds_remotestr; /* comma sep list of addrs */ + struct list_head ds_addrs; struct nfs_client *ds_clp; atomic_t ds_count; }; @@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg) generic_hdr); } +static inline struct nfs4_deviceid_node * +FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg) +{ + return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node; +} + extern struct nfs_fh * nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index 3b7bf1377264..ed388aae9689 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds) printk("%s NULL device\n", __func__); return; } - printk(" ip_addr %x port %hu\n" + printk(" ds %s\n" " ref count %d\n" " client %p\n" " cl_exchange_flags %x\n", - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + ds->ds_remotestr, atomic_read(&ds->ds_count), ds->ds_clp, ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); } -/* nfs4_ds_cache_lock is held */ -static struct nfs4_pnfs_ds * -_data_server_lookup_locked(u32 ip_addr, u32 port) +static bool +same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2) { - struct nfs4_pnfs_ds *ds; + struct sockaddr_in *a, *b; + struct sockaddr_in6 *a6, *b6; + + if (addr1->sa_family != addr2->sa_family) + return false; + + switch (addr1->sa_family) { + case AF_INET: + a = (struct sockaddr_in *)addr1; + b = (struct sockaddr_in *)addr2; + + if (a->sin_addr.s_addr == b->sin_addr.s_addr && + a->sin_port == b->sin_port) + return true; + break; + + case AF_INET6: + a6 = (struct sockaddr_in6 *)addr1; + b6 = (struct sockaddr_in6 *)addr2; + + /* LINKLOCAL addresses must have matching scope_id */ + if (ipv6_addr_scope(&a6->sin6_addr) == + IPV6_ADDR_SCOPE_LINKLOCAL && + a6->sin6_scope_id != b6->sin6_scope_id) + return false; + + if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) && + a6->sin6_port == b6->sin6_port) + return true; + break; + + default: + dprintk("%s: unhandled address family: %u\n", + __func__, addr1->sa_family); + return false; + } - dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", - ntohl(ip_addr), ntohs(port)); + return false; +} - list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { - if (ds->ds_ip_addr == ip_addr && - ds->ds_port == port) { - return ds; +/* + * Lookup DS by addresses. The first matching address returns true. + * nfs4_ds_cache_lock is held + */ +static struct nfs4_pnfs_ds * +_data_server_lookup_locked(struct list_head *dsaddrs) +{ + struct nfs4_pnfs_ds *ds; + struct nfs4_pnfs_ds_addr *da1, *da2; + + list_for_each_entry(da1, dsaddrs, da_node) { + list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { + list_for_each_entry(da2, &ds->ds_addrs, da_node) { + if (same_sockaddr( + (struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) + return ds; + } } } return NULL; } /* + * Compare two lists of addresses. + */ +static bool +_data_server_match_all_addrs_locked(struct list_head *dsaddrs1, + struct list_head *dsaddrs2) +{ + struct nfs4_pnfs_ds_addr *da1, *da2; + size_t count1 = 0, + count2 = 0; + + list_for_each_entry(da1, dsaddrs1, da_node) + count1++; + + list_for_each_entry(da2, dsaddrs2, da_node) { + bool found = false; + count2++; + list_for_each_entry(da1, dsaddrs1, da_node) { + if (same_sockaddr((struct sockaddr *)&da1->da_addr, + (struct sockaddr *)&da2->da_addr)) { + found = true; + break; + } + } + if (!found) + return false; + } + + return (count1 == count2); +} + +/* * Create an rpc connection to the nfs4_pnfs_ds data server - * Currently only support IPv4 + * Currently only supports IPv4 and IPv6 addresses */ static int nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) { - struct nfs_client *clp; - struct sockaddr_in sin; + struct nfs_client *clp = ERR_PTR(-EIO); + struct nfs4_pnfs_ds_addr *da; int status = 0; - dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), + dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr, mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); - sin.sin_family = AF_INET; - sin.sin_addr.s_addr = ds->ds_ip_addr; - sin.sin_port = ds->ds_port; + BUG_ON(list_empty(&ds->ds_addrs)); + + list_for_each_entry(da, &ds->ds_addrs, da_node) { + dprintk("%s: DS %s: trying address %s\n", + __func__, ds->ds_remotestr, da->da_remotestr); + + clp = nfs4_set_ds_client(mds_srv->nfs_client, + (struct sockaddr *)&da->da_addr, + da->da_addrlen, IPPROTO_TCP); + if (!IS_ERR(clp)) + break; + } - clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin, - sizeof(sin), IPPROTO_TCP); if (IS_ERR(clp)) { status = PTR_ERR(clp); goto out; @@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; } ds->ds_clp = clp; - dprintk("%s [existing] ip=%x, port=%hu\n", __func__, - ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); + dprintk("%s [existing] server=%s\n", __func__, + ds->ds_remotestr); goto out; } @@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) goto out_put; ds->ds_clp = clp; - dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), - ntohs(ds->ds_port)); + dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr); out: return status; out_put: @@ -147,12 +231,25 @@ out_put: static void destroy_ds(struct nfs4_pnfs_ds *ds) { + struct nfs4_pnfs_ds_addr *da; + dprintk("--> %s\n", __func__); ifdebug(FACILITY) print_ds(ds); if (ds->ds_clp) nfs_put_client(ds->ds_clp); + + while (!list_empty(&ds->ds_addrs)) { + da = list_first_entry(&ds->ds_addrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } + + kfree(ds->ds_remotestr); kfree(ds); } @@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) kfree(dsaddr); } +/* + * Create a string with a human readable address and port to avoid + * complicated setup around many dprinks. + */ +static char * +nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags) +{ + struct nfs4_pnfs_ds_addr *da; + char *remotestr; + size_t len; + char *p; + + len = 3; /* '{', '}' and eol */ + list_for_each_entry(da, dsaddrs, da_node) { + len += strlen(da->da_remotestr) + 1; /* string plus comma */ + } + + remotestr = kzalloc(len, gfp_flags); + if (!remotestr) + return NULL; + + p = remotestr; + *(p++) = '{'; + len--; + list_for_each_entry(da, dsaddrs, da_node) { + size_t ll = strlen(da->da_remotestr); + + if (ll > len) + goto out_err; + + memcpy(p, da->da_remotestr, ll); + p += ll; + len -= ll; + + if (len < 1) + goto out_err; + (*p++) = ','; + len--; + } + if (len < 2) + goto out_err; + *(p++) = '}'; + *p = '\0'; + return remotestr; +out_err: + kfree(remotestr); + return NULL; +} + static struct nfs4_pnfs_ds * -nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) +nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *tmp_ds, *ds; + struct nfs4_pnfs_ds *tmp_ds, *ds = NULL; + char *remotestr; - ds = kzalloc(sizeof(*tmp_ds), gfp_flags); + if (list_empty(dsaddrs)) { + dprintk("%s: no addresses defined\n", __func__); + goto out; + } + + ds = kzalloc(sizeof(*ds), gfp_flags); if (!ds) goto out; + /* this is only used for debugging, so it's ok if its NULL */ + remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags); + spin_lock(&nfs4_ds_cache_lock); - tmp_ds = _data_server_lookup_locked(ip_addr, port); + tmp_ds = _data_server_lookup_locked(dsaddrs); if (tmp_ds == NULL) { - ds->ds_ip_addr = ip_addr; - ds->ds_port = port; + INIT_LIST_HEAD(&ds->ds_addrs); + list_splice_init(dsaddrs, &ds->ds_addrs); + ds->ds_remotestr = remotestr; atomic_set(&ds->ds_count, 1); INIT_LIST_HEAD(&ds->ds_node); ds->ds_clp = NULL; list_add(&ds->ds_node, &nfs4_data_server_cache); - dprintk("%s add new data server ip 0x%x\n", __func__, - ds->ds_ip_addr); + dprintk("%s add new data server %s\n", __func__, + ds->ds_remotestr); } else { + if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs, + dsaddrs)) { + dprintk("%s: multipath address mismatch: %s != %s", + __func__, tmp_ds->ds_remotestr, remotestr); + } + kfree(remotestr); kfree(ds); atomic_inc(&tmp_ds->ds_count); - dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", - __func__, tmp_ds->ds_ip_addr, + dprintk("%s data server %s found, inc'ed ds_count to %d\n", + __func__, tmp_ds->ds_remotestr, atomic_read(&tmp_ds->ds_count)); ds = tmp_ds; } @@ -213,18 +375,22 @@ out: } /* - * Currently only support ipv4, and one multi-path address. + * Currently only supports ipv4, ipv6 and one multi-path address. */ -static struct nfs4_pnfs_ds * -decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) +static struct nfs4_pnfs_ds_addr * +decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags) { - struct nfs4_pnfs_ds *ds = NULL; - char *buf; - const char *ipend, *pstr; - u32 ip_addr, port; - int nlen, rlen, i; + struct nfs4_pnfs_ds_addr *da = NULL; + char *buf, *portstr; + u32 port; + int nlen, rlen; int tmp[2]; __be32 *p; + char *netid, *match_netid; + size_t len, match_netid_len; + char *startsep = ""; + char *endsep = ""; + /* r_netid */ p = xdr_inline_decode(streamp, 4); @@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla if (unlikely(!p)) goto out_err; - /* Check that netid is "tcp" */ - if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { - dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); + netid = kmalloc(nlen+1, gfp_flags); + if (unlikely(!netid)) goto out_err; - } - /* r_addr */ + netid[nlen] = '\0'; + memcpy(netid, p, nlen); + + /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */ p = xdr_inline_decode(streamp, 4); if (unlikely(!p)) - goto out_err; + goto out_free_netid; rlen = be32_to_cpup(p); p = xdr_inline_decode(streamp, rlen); if (unlikely(!p)) - goto out_err; + goto out_free_netid; - /* ipv6 length plus port is legal */ - if (rlen > INET6_ADDRSTRLEN + 8) { + /* port is ".ABC.DEF", 8 chars max */ + if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) { dprintk("%s: Invalid address, length %d\n", __func__, rlen); - goto out_err; + goto out_free_netid; } buf = kmalloc(rlen + 1, gfp_flags); if (!buf) { dprintk("%s: Not enough memory\n", __func__); - goto out_err; + goto out_free_netid; } buf[rlen] = '\0'; memcpy(buf, p, rlen); - /* replace the port dots with dashes for the in4_pton() delimiter*/ - for (i = 0; i < 2; i++) { - char *res = strrchr(buf, '.'); - if (!res) { - dprintk("%s: Failed finding expected dots in port\n", - __func__); - goto out_free; - } - *res = '-'; + /* replace port '.' with '-' */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot in port\n", + __func__); + goto out_free_buf; + } + *portstr = '-'; + + /* find '.' between address and port */ + portstr = strrchr(buf, '.'); + if (!portstr) { + dprintk("%s: Failed finding expected dot between address and " + "port\n", __func__); + goto out_free_buf; } + *portstr = '\0'; - /* Currently only support ipv4 address */ - if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { - dprintk("%s: Only ipv4 addresses supported\n", __func__); - goto out_free; + da = kzalloc(sizeof(*da), gfp_flags); + if (unlikely(!da)) + goto out_free_buf; + + INIT_LIST_HEAD(&da->da_node); + + if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr, + sizeof(da->da_addr))) { + dprintk("%s: error parsing address %s\n", __func__, buf); + goto out_free_da; } - /* port */ - pstr = ipend; - sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]); + portstr++; + sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]); port = htons((tmp[0] << 8) | (tmp[1])); - ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); - dprintk("%s: Decoded address and port %s\n", __func__, buf); -out_free: + switch (da->da_addr.ss_family) { + case AF_INET: + ((struct sockaddr_in *)&da->da_addr)->sin_port = port; + da->da_addrlen = sizeof(struct sockaddr_in); + match_netid = "tcp"; + match_netid_len = 3; + break; + + case AF_INET6: + ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port; + da->da_addrlen = sizeof(struct sockaddr_in6); + match_netid = "tcp6"; + match_netid_len = 4; + startsep = "["; + endsep = "]"; + break; + + default: + dprintk("%s: unsupported address family: %u\n", + __func__, da->da_addr.ss_family); + goto out_free_da; + } + + if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) { + dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n", + __func__, netid, match_netid); + goto out_free_da; + } + + /* save human readable address */ + len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7; + da->da_remotestr = kzalloc(len, gfp_flags); + + /* NULL is ok, only used for dprintk */ + if (da->da_remotestr) + snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep, + buf, endsep, ntohs(port)); + + dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr); kfree(buf); + kfree(netid); + return da; + +out_free_da: + kfree(da); +out_free_buf: + dprintk("%s: Error parsing DS addr: %s\n", __func__, buf); + kfree(buf); +out_free_netid: + kfree(netid); out_err: - return ds; + return NULL; } /* Decode opaque device data and return the result */ @@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) struct xdr_stream stream; struct xdr_buf buf; struct page *scratch; + struct list_head dsaddrs; + struct nfs4_pnfs_ds_addr *da; /* set up xdr stream */ scratch = alloc_page(gfp_flags); @@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) NFS_SERVER(ino)->nfs_client, &pdev->dev_id); + INIT_LIST_HEAD(&dsaddrs); + for (i = 0; i < dsaddr->ds_num; i++) { int j; u32 mp_count; @@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags) goto out_err_free_deviceid; mp_count = be32_to_cpup(p); /* multipath count */ - if (mp_count > 1) { - printk(KERN_WARNING - "%s: Multipath count %d not supported, " - "skipping all greater than 1\n", __func__, - mp_count); - } for (j = 0; j < mp_count; j++) { - if (j == 0) { - dsaddr->ds_list[i] = decode_and_add_ds(&stream, - ino, gfp_flags); - if (dsaddr->ds_list[i] == NULL) - goto out_err_free_deviceid; - } else { - u32 len; - /* skip extra multipath */ - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - - /* read len, skip */ - p = xdr_inline_decode(&stream, 4); - if (unlikely(!p)) - goto out_err_free_deviceid; - len = be32_to_cpup(p); - - p = xdr_inline_decode(&stream, len); - if (unlikely(!p)) - goto out_err_free_deviceid; - } + da = decode_ds_addr(&stream, gfp_flags); + if (da) + list_add_tail(&da->da_node, &dsaddrs); + } + if (list_empty(&dsaddrs)) { + dprintk("%s: no suitable DS addresses found\n", + __func__); + goto out_err_free_deviceid; + } + + dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); + if (!dsaddr->ds_list[i]) + goto out_err_drain_dsaddrs; + + /* If DS was already in cache, free ds addrs */ + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, + struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); } } __free_page(scratch); return dsaddr; +out_err_drain_dsaddrs: + while (!list_empty(&dsaddrs)) { + da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, + da_node); + list_del_init(&da->da_node); + kfree(da->da_remotestr); + kfree(da); + } out_err_free_deviceid: nfs4_fl_free_deviceid(dsaddr); /* stripe_indicies was part of dsaddr */ @@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) static void filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, - int err, u32 ds_addr) + int err, const char *ds_remotestr) { u32 *p = (u32 *)&dsaddr->id_node.deviceid; - printk(KERN_ERR "NFS: data server %x connection error %d." + printk(KERN_ERR "NFS: data server %s connection error %d." " Deviceid [%x%x%x%x] marked out of use.\n", - ds_addr, err, p[0], p[1], p[2], p[3]); + ds_remotestr, err, p[0], p[1], p[2], p[3]); spin_lock(&nfs4_ds_cache_lock); dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; @@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) err = nfs4_ds_connect(s, ds); if (err) { filelayout_mark_devid_negative(dsaddr, err, - ntohl(ds->ds_ip_addr)); + ds->ds_remotestr); return NULL; } } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 26bece8f3083..079614deca3f 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, struct nfs_fattr *fattr, struct iattr *sattr, struct nfs4_state *state); - +#ifdef CONFIG_NFS_V4_1 +static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *); +static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *); +#endif /* Prevent leaks of NFSv4 errors into userland */ static int nfs4_map_errors(int err) { @@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta return ret; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_open_expired(sp, state); +} +#endif + /* * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* * fields corresponding to attributes that were used to store the verifier. @@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *info) { + int minor_version = server->nfs_client->cl_minorversion; int status = nfs4_lookup_root(server, fhandle, info); if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) /* * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM * by nfs4_map_errors() as this function exits. */ - status = nfs4_find_root_sec(server, fhandle, info); + status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info); if (status == 0) status = nfs4_server_capabilities(server, fhandle); if (status == 0) @@ -4441,6 +4459,20 @@ out: return err; } +#if defined(CONFIG_NFS_V4_1) +static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request) +{ + int status; + struct nfs_server *server = NFS_SERVER(state->inode); + + status = nfs41_test_stateid(server, state); + if (status == NFS_OK) + return 0; + nfs41_free_stateid(server, state); + return nfs4_lock_expired(state, request); +} +#endif + static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) { struct nfs_inode *nfsi = NFS_I(state->inode); @@ -4779,6 +4811,16 @@ out_inval: return -NFS4ERR_INVAL; } +static bool +nfs41_same_server_scope(struct server_scope *a, struct server_scope *b) +{ + if (a->server_scope_sz == b->server_scope_sz && + memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0) + return true; + + return false; +} + /* * nfs4_proc_exchange_id() * @@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) init_utsname()->domainname, clp->cl_rpcclient->cl_auth->au_flavor); + res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); + if (unlikely(!res.server_scope)) + return -ENOMEM; + status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); if (!status) status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); + + if (!status) { + if (clp->server_scope && + !nfs41_same_server_scope(clp->server_scope, + res.server_scope)) { + dprintk("%s: server_scope mismatch detected\n", + __func__); + set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state); + kfree(clp->server_scope); + clp->server_scope = NULL; + } + + if (!clp->server_scope) + clp->server_scope = res.server_scope; + else + kfree(res.server_scope); + } + dprintk("<-- %s status= %d\n", __func__, status); return status; } @@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata) { struct nfs4_layoutreturn *lrp = calldata; struct nfs_server *server; - struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; + struct pnfs_layout_hdr *lo = lrp->args.layout; dprintk("--> %s\n", __func__); @@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata) struct nfs4_layoutreturn *lrp = calldata; dprintk("--> %s\n", __func__); - put_layout_hdr(NFS_I(lrp->args.inode)->layout); + put_layout_hdr(lrp->args.layout); kfree(calldata); dprintk("<-- %s\n", __func__); } @@ -5901,6 +5965,143 @@ out: rpc_put_task(task); return status; } + +static int +_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs41_secinfo_no_name_args args = { + .style = SECINFO_STYLE_CURRENT_FH, + }; + struct nfs4_secinfo_res res = { + .flavors = flavors, + }; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME], + .rpc_argp = &args, + .rpc_resp = &res, + }; + return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); +} + +static int +nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + switch (err) { + case 0: + case -NFS4ERR_WRONGSEC: + case -NFS4ERR_NOTSUPP: + break; + default: + err = nfs4_handle_exception(server, err, &exception); + } + } while (exception.retry); + return err; +} + +static int +nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsinfo *info) +{ + int err; + struct page *page; + rpc_authflavor_t flavor; + struct nfs4_secinfo_flavors *flavors; + + page = alloc_page(GFP_KERNEL); + if (!page) { + err = -ENOMEM; + goto out; + } + + flavors = page_address(page); + err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors); + + /* + * Fall back on "guess and check" method if + * the server doesn't support SECINFO_NO_NAME + */ + if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) { + err = nfs4_find_root_sec(server, fhandle, info); + goto out_freepage; + } + if (err) + goto out_freepage; + + flavor = nfs_find_best_sec(flavors); + if (err == 0) + err = nfs4_lookup_root_sec(server, fhandle, info, flavor); + +out_freepage: + put_page(page); + if (err == -EACCES) + return -EPERM; +out: + return err; +} +static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_test_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_test_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs41_test_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} + +static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + int status; + struct nfs41_free_stateid_args args = { + .stateid = &state->stateid, + }; + struct nfs41_free_stateid_res res; + struct rpc_message msg = { + .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID], + .rpc_argp = &args, + .rpc_resp = &res, + }; + + args.seq_args.sa_session = res.seq_res.sr_session = NULL; + status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1); + return status; +} + +static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(server, + _nfs4_free_stateid(server, state), + &exception); + } while (exception.retry); + return err; +} #endif /* CONFIG_NFS_V4_1 */ struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { @@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = { struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, - .recover_open = nfs4_open_expired, - .recover_lock = nfs4_lock_expired, + .recover_open = nfs41_open_expired, + .recover_lock = nfs41_lock_expired, .establish_clid = nfs41_init_clientid, .get_clid_cred = nfs4_get_exchange_id_cred, }; @@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = { .minor_version = 0, .call_sync = _nfs4_call_sync, .validate_stateid = nfs4_validate_delegation_stateid, + .find_root_sec = nfs4_find_root_sec, .reboot_recovery_ops = &nfs40_reboot_recovery_ops, .nograce_recovery_ops = &nfs40_nograce_recovery_ops, .state_renewal_ops = &nfs40_state_renewal_ops, @@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = { .minor_version = 1, .call_sync = _nfs4_call_sync_session, .validate_stateid = nfs41_validate_delegation_stateid, + .find_root_sec = nfs41_find_root_sec, .reboot_recovery_ops = &nfs41_reboot_recovery_ops, .nograce_recovery_ops = &nfs41_nograce_recovery_ops, .state_renewal_ops = &nfs41_state_renewal_ops, diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 7acfe8843626..72ab97ef3d61 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp) goto out_error; } clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); - set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); + + if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, + &clp->cl_state)) + nfs4_state_start_reclaim_nograce(clp); + else + set_bit(NFS4CLNT_RECLAIM_REBOOT, + &clp->cl_state); + pnfs_destroy_all_layouts(clp); } diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index e6e8f3b9a1de..c191a9baa422 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int); 1 /* FIXME: opaque lrf_body always empty at the moment */) #define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 1 + decode_stateid_maxsz) +#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1) +#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz +#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1) +#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \ + XDR_QUADLEN(NFS4_STATEID_SIZE)) +#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1) #else /* CONFIG_NFS_V4_1 */ #define encode_sequence_maxsz 0 #define decode_sequence_maxsz 0 @@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int); decode_sequence_maxsz + \ decode_putfh_maxsz + \ decode_layoutreturn_maxsz) +#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_putrootfh_maxsz +\ + encode_secinfo_no_name_maxsz) +#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_putrootfh_maxsz + \ + decode_secinfo_no_name_maxsz) +#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_test_stateid_maxsz) +#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_test_stateid_maxsz) +#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \ + encode_sequence_maxsz + \ + encode_free_stateid_maxsz) +#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \ + decode_sequence_maxsz + \ + decode_free_stateid_maxsz) const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + compound_encode_hdr_maxsz + @@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr, hdr->nops++; hdr->replen += decode_layoutreturn_maxsz; } + +static int +encode_secinfo_no_name(struct xdr_stream *xdr, + const struct nfs41_secinfo_no_name_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 8); + *p++ = cpu_to_be32(OP_SECINFO_NO_NAME); + *p++ = cpu_to_be32(args->style); + hdr->nops++; + hdr->replen += decode_secinfo_no_name_maxsz; + return 0; +} + +static void encode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + + p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_TEST_STATEID); + *p++ = cpu_to_be32(1); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_test_stateid_maxsz; +} + +static void encode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args, + struct compound_hdr *hdr) +{ + __be32 *p; + p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE); + *p++ = cpu_to_be32(OP_FREE_STATEID); + xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE); + hdr->nops++; + hdr->replen += decode_free_stateid_maxsz; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req, encode_layoutreturn(xdr, args, &hdr); encode_nops(&hdr); } + +/* + * Encode SECINFO_NO_NAME request + */ +static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_secinfo_no_name_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_putrootfh(xdr, &hdr); + encode_secinfo_no_name(xdr, args, &hdr); + encode_nops(&hdr); + return 0; +} + +/* + * Encode TEST_STATEID request + */ +static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_test_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_test_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} + +/* + * Encode FREE_STATEID request + */ +static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req, + struct xdr_stream *xdr, + struct nfs41_free_stateid_args *args) +{ + struct compound_hdr hdr = { + .minorversion = nfs4_xdr_minorversion(&args->seq_args), + }; + + encode_compound_hdr(xdr, req, &hdr); + encode_sequence(xdr, &args->seq_args, &hdr); + encode_free_stateid(xdr, args, &hdr); + encode_nops(&hdr); +} #endif /* CONFIG_NFS_V4_1 */ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) @@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr, if (unlikely(status)) return status; - /* Throw away server_scope */ + /* Save server_scope */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) return status; + if (unlikely(dummy > NFS4_OPAQUE_LIMIT)) + return -EIO; + + memcpy(res->server_scope->server_scope, dummy_str, dummy); + res->server_scope->server_scope_sz = dummy; + /* Throw away Implementation id array */ status = decode_opaque_inline(xdr, &dummy, &dummy_str); if (unlikely(status)) @@ -5322,6 +5449,55 @@ out_overflow: print_overflow_msg(__func__, xdr); return -EIO; } + +static int decode_test_stateid(struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + __be32 *p; + int status; + int num_res; + + status = decode_op_hdr(xdr, OP_TEST_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + num_res = be32_to_cpup(p++); + if (num_res != 1) + goto out; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); +out: + return -EIO; +} + +static int decode_free_stateid(struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + __be32 *p; + int status; + + status = decode_op_hdr(xdr, OP_FREE_STATEID); + if (status) + return status; + + p = xdr_inline_decode(xdr, 4); + if (unlikely(!p)) + goto out_overflow; + res->status = be32_to_cpup(p++); + return res->status; +out_overflow: + print_overflow_msg(__func__, xdr); + return -EIO; +} #endif /* CONFIG_NFS_V4_1 */ /* @@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp, out: return status; } + +/* + * Decode SECINFO_NO_NAME response + */ +static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs4_secinfo_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_putrootfh(xdr); + if (status) + goto out; + status = decode_secinfo(xdr, res); +out: + return status; +} + +/* + * Decode TEST_STATEID response + */ +static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_test_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_test_stateid(xdr, res); +out: + return status; +} + +/* + * Decode FREE_STATEID response + */ +static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + struct nfs41_free_stateid_res *res) +{ + struct compound_hdr hdr; + int status; + + status = decode_compound_hdr(xdr, &hdr); + if (status) + goto out; + status = decode_sequence(xdr, &res->seq_res, rqstp); + if (status) + goto out; + status = decode_free_stateid(xdr, res); +out: + return status; +} #endif /* CONFIG_NFS_V4_1 */ /** @@ -6663,6 +6905,9 @@ struct rpc_procinfo nfs4_procedures[] = { PROC(LAYOUTGET, enc_layoutget, dec_layoutget), PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), + PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name), + PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid), + PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid), #endif /* CONFIG_NFS_V4_1 */ }; diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 8ff2ea3f10ef..9383ca7245bc 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio, if (!pnfs_generic_pg_test(pgio, prev, req)) return false; - if (pgio->pg_lseg == NULL) - return true; - return pgio->pg_count + req->wb_bytes <= OBJIO_LSEG(pgio->pg_lseg)->max_io_size; } +static const struct nfs_pageio_ops objio_pg_read_ops = { + .pg_init = pnfs_generic_pg_init_read, + .pg_test = objio_pg_test, + .pg_doio = pnfs_generic_pg_readpages, +}; + +static const struct nfs_pageio_ops objio_pg_write_ops = { + .pg_init = pnfs_generic_pg_init_write, + .pg_test = objio_pg_test, + .pg_doio = pnfs_generic_pg_writepages, +}; + static struct pnfs_layoutdriver_type objlayout_type = { .id = LAYOUT_OSD2_OBJECTS, .name = "LAYOUT_OSD2_OBJECTS", @@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { .read_pagelist = objlayout_read_pagelist, .write_pagelist = objlayout_write_pagelist, - .pg_test = objio_pg_test, + .pg_read_ops = &objio_pg_read_ops, + .pg_write_ops = &objio_pg_write_ops, .free_deviceid_node = objio_free_deviceid_node, @@ -1055,5 +1065,7 @@ objlayout_exit(void) __func__); } +MODULE_ALIAS("nfs-layouttype4-2"); + module_init(objlayout_init); module_exit(objlayout_exit); diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 18449f43c568..b60970cc7f1f 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test); */ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int io_flags) { @@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc, desc->pg_bsize = bsize; desc->pg_base = 0; desc->pg_moreio = 0; + desc->pg_recoalesce = 0; desc->pg_inode = inode; - desc->pg_doio = doio; + desc->pg_ops = pg_ops; desc->pg_ioflags = io_flags; desc->pg_error = 0; desc->pg_lseg = NULL; - desc->pg_test = nfs_generic_pg_test; - pnfs_pageio_init(desc, inode); } /** @@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, return false; if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) return false; - return pgio->pg_test(pgio, prev, req); + return pgio->pg_ops->pg_test(pgio, prev, req); } /** @@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, if (!nfs_can_coalesce_requests(prev, req, desc)) return 0; } else { + if (desc->pg_ops->pg_init) + desc->pg_ops->pg_init(desc, req); desc->pg_base = req->wb_pgbase; } nfs_list_remove_request(req); @@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) { if (!list_empty(&desc->pg_list)) { - int error = desc->pg_doio(desc); + int error = desc->pg_ops->pg_doio(desc); if (error < 0) desc->pg_error = error; else @@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. */ -int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, +static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { while (!nfs_pageio_do_add_request(desc, req)) { @@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_error < 0) return 0; desc->pg_moreio = 0; + if (desc->pg_recoalesce) + return 0; } return 1; } +static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + + do { + list_splice_init(&desc->pg_list, &head); + desc->pg_bytes_written -= desc->pg_count; + desc->pg_count = 0; + desc->pg_base = 0; + desc->pg_recoalesce = 0; + + while (!list_empty(&head)) { + struct nfs_page *req; + + req = list_first_entry(&head, struct nfs_page, wb_list); + nfs_list_remove_request(req); + if (__nfs_pageio_add_request(desc, req)) + continue; + if (desc->pg_error < 0) + return 0; + break; + } + } while (desc->pg_recoalesce); + return 1; +} + +int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) +{ + int ret; + + do { + ret = __nfs_pageio_add_request(desc, req); + if (ret) + break; + if (desc->pg_error < 0) + break; + ret = nfs_do_recoalesce(desc); + } while (ret); + return ret; +} + /** * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor * @desc: pointer to io descriptor */ void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) { - nfs_pageio_doio(desc); + for (;;) { + nfs_pageio_doio(desc); + if (!desc->pg_recoalesce) + break; + if (!nfs_do_recoalesce(desc)) + break; + } } /** @@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) if (!list_empty(&desc->pg_list)) { struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); if (index != prev->wb_index + 1) - nfs_pageio_doio(desc); + nfs_pageio_complete(desc); } } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 29c0ca7fc347..38e5508555c6 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -28,6 +28,7 @@ */ #include <linux/nfs_fs.h> +#include <linux/nfs_page.h> #include "internal.h" #include "pnfs.h" #include "iostat.h" @@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) void pnfs_destroy_all_layouts(struct nfs_client *clp) { + struct nfs_server *server; struct pnfs_layout_hdr *lo; LIST_HEAD(tmp_list); + nfs4_deviceid_mark_client_invalid(clp); + nfs4_deviceid_purge_client(clp); + spin_lock(&clp->cl_lock); - list_splice_init(&clp->cl_layouts, &tmp_list); + rcu_read_lock(); + list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { + if (!list_empty(&server->layouts)) + list_splice_init(&server->layouts, &tmp_list); + } + rcu_read_unlock(); spin_unlock(&clp->cl_lock); while (!list_empty(&tmp_list)) { @@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino) lrp->args.stateid = stateid; lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; lrp->args.inode = ino; + lrp->args.layout = lo; lrp->clp = NFS_SERVER(ino)->nfs_client; status = nfs4_proc_layoutreturn(lrp); @@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino, }; unsigned pg_offset; struct nfs_inode *nfsi = NFS_I(ino); - struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; + struct nfs_server *server = NFS_SERVER(ino); + struct nfs_client *clp = server->nfs_client; struct pnfs_layout_hdr *lo; struct pnfs_layout_segment *lseg = NULL; bool first = false; @@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino, */ spin_lock(&clp->cl_lock); BUG_ON(!list_empty(&lo->plh_layouts)); - list_add_tail(&lo->plh_layouts, &clp->cl_layouts); + list_add_tail(&lo->plh_layouts, &server->layouts); spin_unlock(&clp->cl_lock); } @@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino, arg.offset -= pg_offset; arg.length += pg_offset; } - arg.length = PAGE_CACHE_ALIGN(arg.length); + if (arg.length != NFS4_MAX_UINT64) + arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); if (!lseg && first) { @@ -991,6 +1004,7 @@ out_unlock: spin_unlock(&ino->i_lock); goto out; } +EXPORT_SYMBOL_GPL(pnfs_update_layout); int pnfs_layout_process(struct nfs4_layoutget *lgp) @@ -1048,35 +1062,71 @@ out_forget_reply: goto out; } +void +pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_READ, + GFP_KERNEL); + /* If no lseg, fall back to read through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_read_mds(pgio); + +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); + +void +pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) +{ + BUG_ON(pgio->pg_lseg != NULL); + + pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, + req->wb_context, + req_offset(req), + req->wb_bytes, + IOMODE_RW, + GFP_NOFS); + /* If no lseg, fall back to write through mds */ + if (pgio->pg_lseg == NULL) + nfs_pageio_reset_write_mds(pgio); +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write); + bool -pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, - struct nfs_page *req) +pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { - enum pnfs_iomode access_type; - gfp_t gfp_flags; + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; - /* We assume that pg_ioflags == 0 iff we're reading a page */ - if (pgio->pg_ioflags == 0) { - access_type = IOMODE_READ; - gfp_flags = GFP_KERNEL; - } else { - access_type = IOMODE_RW; - gfp_flags = GFP_NOFS; - } + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0); + return true; +} - if (pgio->pg_lseg == NULL) { - if (pgio->pg_count != prev->wb_bytes) - return true; - /* This is first coelesce call for a series of nfs_pages */ - pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, - prev->wb_context, - req_offset(prev), - pgio->pg_count, - access_type, - gfp_flags); - if (pgio->pg_lseg == NULL) - return true; - } +bool +pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + struct nfs_server *server = NFS_SERVER(inode); + struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld; + + if (ld == NULL) + return false; + nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags); + return true; +} + +bool +pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, + struct nfs_page *req) +{ + if (pgio->pg_lseg == NULL) + return nfs_generic_pg_test(pgio, prev, req); /* * Test if a nfs_page is fully contained in the pnfs_layout_range. @@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_write_done); -enum pnfs_try_status +static void +pnfs_write_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_write_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_write_mds(desc); + desc->pg_recoalesce = 1; + nfs_writedata_release(data); +} + +static enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *wdata, - const struct rpc_call_ops *call_ops, int how) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg, + int how) { struct inode *inode = wdata->inode; enum pnfs_try_status trypnfs; struct nfs_server *nfss = NFS_SERVER(inode); wdata->mds_ops = call_ops; + wdata->lseg = get_lseg(lseg); dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, inode->i_ino, wdata->args.count, wdata->args.offset, how); @@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata, return trypnfs; } +static void +pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how) +{ + struct nfs_write_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_write_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages); + /* * Called by non rpc-based layout drivers */ @@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data) } EXPORT_SYMBOL_GPL(pnfs_ld_read_done); +static void +pnfs_read_through_mds(struct nfs_pageio_descriptor *desc, + struct nfs_read_data *data) +{ + list_splice_tail_init(&data->pages, &desc->pg_list); + if (data->req && list_empty(&data->req->wb_list)) + nfs_list_add_request(data->req, &desc->pg_list); + nfs_pageio_reset_read_mds(desc); + desc->pg_recoalesce = 1; + nfs_readdata_release(data); +} + /* * Call the appropriate parallel I/O subsystem read function. */ -enum pnfs_try_status +static enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *rdata, - const struct rpc_call_ops *call_ops) + const struct rpc_call_ops *call_ops, + struct pnfs_layout_segment *lseg) { struct inode *inode = rdata->inode; struct nfs_server *nfss = NFS_SERVER(inode); enum pnfs_try_status trypnfs; rdata->mds_ops = call_ops; + rdata->lseg = get_lseg(lseg); dprintk("%s: Reading ino:%lu %u@%llu\n", __func__, inode->i_ino, rdata->args.count, rdata->args.offset); @@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata, return trypnfs; } +static void +pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + struct nfs_read_data *data; + const struct rpc_call_ops *call_ops = desc->pg_rpc_callops; + struct pnfs_layout_segment *lseg = desc->pg_lseg; + + desc->pg_lseg = NULL; + while (!list_empty(head)) { + enum pnfs_try_status trypnfs; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + trypnfs = pnfs_try_to_read_data(data, call_ops, lseg); + if (trypnfs == PNFS_NOT_ATTEMPTED) + pnfs_read_through_mds(desc, data); + } + put_lseg(lseg); +} + +int +pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret != 0) { + put_lseg(desc->pg_lseg); + desc->pg_lseg = NULL; + return ret; + } + pnfs_do_multiple_reads(desc, &head); + return 0; +} +EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages); + /* * Currently there is only one (whole file) write lseg. */ diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 96bf4e6f45be..078670dfbe04 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type { void (*free_lseg) (struct pnfs_layout_segment *lseg); /* test for nfs page cache coalescing */ - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + const struct nfs_pageio_ops *pg_read_ops; + const struct nfs_pageio_ops *pg_write_ops; /* Returns true if layoutdriver wants to divert this request to * driver's commit routine. @@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); /* pnfs.c */ void get_layout_hdr(struct pnfs_layout_hdr *lo); void put_lseg(struct pnfs_layout_segment *lseg); -struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags); + +bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *); +bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int); + void set_pnfs_layoutdriver(struct nfs_server *, u32 id); void unset_pnfs_layoutdriver(struct nfs_server *); -enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, - const struct rpc_call_ops *, int); -enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, - const struct rpc_call_ops *); +void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); +void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); +int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); int pnfs_layout_process(struct nfs4_layoutget *lgp); void pnfs_free_lseg_list(struct list_head *tmp_list); @@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync); int _pnfs_return_layout(struct inode *); int pnfs_ld_write_done(struct nfs_write_data *); int pnfs_ld_read_done(struct nfs_read_data *); +struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, + struct nfs_open_context *ctx, + loff_t pos, + u64 count, + enum pnfs_iomode iomode, + gfp_t gfp_flags); + +void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); + +/* nfs4_deviceid_flags */ +enum { + NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ +}; /* pnfs_dev.c */ struct nfs4_deviceid_node { @@ -189,13 +203,13 @@ struct nfs4_deviceid_node { struct hlist_node tmpnode; const struct pnfs_layoutdriver_type *ld; const struct nfs_client *nfs_client; + unsigned long flags; struct nfs4_deviceid deviceid; atomic_t ref; }; void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); -struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, const struct pnfs_layoutdriver_type *, @@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino) return 0; } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) -{ - struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld; - - if (ld) - pgio->pg_test = ld->pg_test; -} - #else /* CONFIG_NFS_V4_1 */ static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) @@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg) { } -static inline struct pnfs_layout_segment * -pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, - loff_t pos, u64 count, enum pnfs_iomode access_type, - gfp_t gfp_flags) -{ - return NULL; -} - -static inline enum pnfs_try_status -pnfs_try_to_read_data(struct nfs_read_data *data, - const struct rpc_call_ops *call_ops) -{ - return PNFS_NOT_ATTEMPTED; -} - -static inline enum pnfs_try_status -pnfs_try_to_write_data(struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, int how) -{ - return PNFS_NOT_ATTEMPTED; -} - static inline int pnfs_return_layout(struct inode *ino) { return 0; @@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s) { } -static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, - struct inode *inode) +static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode) { + return false; +} + +static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) +{ + return false; } static inline void diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index f0f8e1e22f6c..6fda5228ef56 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_lock(); d = _lookup_deviceid(ld, clp, id, hash); - if (d && !atomic_inc_not_zero(&d->ref)) - d = NULL; + if (d != NULL) + atomic_inc(&d->ref); rcu_read_unlock(); return d; } @@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld, EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); /* - * Unhash and put deviceid + * Remove a deviceid from cache * * @clp nfs_client associated with deviceid * @id the deviceid to unhash * * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. */ -struct nfs4_deviceid_node * -nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, +void +nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, const struct nfs_client *clp, const struct nfs4_deviceid *id) { struct nfs4_deviceid_node *d; @@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, rcu_read_unlock(); if (!d) { spin_unlock(&nfs4_deviceid_lock); - return NULL; + return; } hlist_del_init_rcu(&d->node); spin_unlock(&nfs4_deviceid_lock); @@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, /* balance the initial ref set in pnfs_insert_deviceid */ if (atomic_dec_and_test(&d->ref)) - return d; - - return NULL; -} -EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid); - -/* - * Delete a deviceid from cache - * - * @clp struct nfs_client qualifying the deviceid - * @id deviceid to delete - */ -void -nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld, - const struct nfs_client *clp, const struct nfs4_deviceid *id) -{ - struct nfs4_deviceid_node *d; - - d = nfs4_unhash_put_deviceid(ld, clp, id); - if (!d) - return; - d->ld->free_deviceid_node(d); + d->ld->free_deviceid_node(d); } EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); @@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d, INIT_HLIST_NODE(&d->tmpnode); d->ld = ld; d->nfs_client = nfs_client; + d->flags = 0; d->deviceid = *id; atomic_set(&d->ref, 1); } @@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node); * * @d deviceid node to put * - * @ret true iff the node was deleted + * return true iff the node was deleted + * Note that since the test for d->ref == 0 is sufficient to establish + * that the node is no longer hashed in the global device id cache. */ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) { - if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) + if (!atomic_dec_and_test(&d->ref)) return false; - hlist_del_init_rcu(&d->node); - spin_unlock(&nfs4_deviceid_lock); - synchronize_rcu(); d->ld->free_deviceid_node(d); return true; } @@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp) for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) _deviceid_purge_client(clp, h); } + +/* + * Stop use of all deviceids associated with an nfs_client + */ +void +nfs4_deviceid_mark_client_invalid(struct nfs_client *clp) +{ + struct nfs4_deviceid_node *d; + struct hlist_node *n; + int i; + + rcu_read_lock(); + for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){ + hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node) + if (d->nfs_client == clp) + set_bit(NFS_DEVICEID_INVALID, &d->flags); + } + rcu_read_unlock(); +} diff --git a/fs/nfs/read.c b/fs/nfs/read.c index a68679f538fc..2171c043ab08 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -30,8 +30,7 @@ #define NFSDBG_FACILITY NFSDBG_PAGECACHE -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc); +static const struct nfs_pageio_ops nfs_pageio_read_ops; static const struct rpc_call_ops nfs_read_partial_ops; static const struct rpc_call_ops nfs_read_full_ops; @@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p) mempool_free(p, nfs_rdata_mempool); } -static void nfs_readdata_release(struct nfs_read_data *rdata) +void nfs_readdata_release(struct nfs_read_data *rdata) { put_lseg(rdata->lseg); put_nfs_open_context(rdata->args.context); @@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) } } +static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, + NFS_SERVER(inode)->rsize, 0); +} + +void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_read_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; +} +EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); + +static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, + struct inode *inode) +{ + if (!pnfs_pageio_init_read(pgio, inode)) + nfs_pageio_init_read_mds(pgio, inode); +} + int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); - nfs_pageio_init(&pgio, inode, NULL, 0, 0); - nfs_list_add_request(new, &pgio.pg_list); - pgio.pg_count = len; - - if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) - nfs_pagein_multi(&pgio); - else - nfs_pagein_one(&pgio); + nfs_pageio_init_read(&pgio, inode); + nfs_pageio_add_request(&pgio, new); + nfs_pageio_complete(&pgio); return 0; } @@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read); /* * Set up the NFS read request struct */ -static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, - const struct rpc_call_ops *call_ops, - unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg) +static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, + unsigned int count, unsigned int offset) { struct inode *inode = req->wb_context->dentry->d_inode; data->req = req; data->inode = inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, data->res.count = count; data->res.eof = 0; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_read(struct nfs_read_data *data, + const struct rpc_call_ops *call_ops) +{ + struct inode *inode = data->args.context->dentry->d_inode; return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); } +static int +nfs_do_multiple_reads(struct list_head *head, + const struct rpc_call_ops *call_ops) +{ + struct nfs_read_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_read_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_read(data, call_ops); + if (ret == 0) + ret = ret2; + } + return ret; +} + static void nfs_async_read_error(struct list_head *head) { @@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head) * won't see the new data until our attribute cache is updated. This is more * or less conventional NFS client behavior. */ -static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_read_data *data; - size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; + size_t rsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; - LIST_HEAD(list); nfs_list_remove_request(req); + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes,rsize); @@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) data = nfs_readdata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + data->pagevec[0] = page; + nfs_read_rpcsetup(req, data, len, offset); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while(nbytes != 0); atomic_set(&req->wb_complete, requests); - - BUG_ON(desc->pg_lseg != NULL); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_read_data, pages); - list_del_init(&data->pages); - - data->pagevec[0] = page; - - if (nbytes < rsize) - rsize = nbytes; - ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, - rsize, offset, lseg); - if (ret == 0) - ret = ret2; - offset += rsize; - nbytes -= rsize; - } while (nbytes != 0); - put_lseg(lseg); - desc->pg_lseg = NULL; - + desc->pg_rpc_callops = &nfs_read_partial_ops; return ret; - out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_read_data, pages); - list_del(&data->pages); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_read_data, list); + list_del(&data->list); nfs_readdata_free(data); } SetPageError(page); @@ -327,19 +336,19 @@ out_bad: return -ENOMEM; } -static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) +static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_read_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret = -ENOMEM; + int ret = 0; data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); if (!data) { nfs_async_read_error(head); + ret = -ENOMEM; goto out; } @@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_READ, GFP_KERNEL); - ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, - 0, lseg); + nfs_read_rpcsetup(req, data, desc->pg_count, 0); + list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_read_full_ops; out: - put_lseg(lseg); - desc->pg_lseg = NULL; return ret; } +int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_pagein_multi(desc, head); + return nfs_pagein_one(desc, head); +} + +static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_pagein(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); + return ret; +} + +static const struct nfs_pageio_ops nfs_pageio_read_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_readpages, +}; + /* * This is the callback from RPC telling us whether a reply was * received or some error occurred (timeout or socket shutdown). @@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, .pgio = &pgio, }; struct inode *inode = mapping->host; - struct nfs_server *server = NFS_SERVER(inode); - size_t rsize = server->rsize; unsigned long npages; int ret = -ESTALE; @@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping, if (ret == 0) goto read_complete; /* all pages were read */ - if (rsize < PAGE_CACHE_SIZE) - nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); - else - nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); + nfs_pageio_init_read(&pgio, inode); ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c index 8d6864c2a5fa..b2fbbde58e44 100644 --- a/fs/nfs/unlink.c +++ b/fs/nfs/unlink.c @@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n alias = d_lookup(parent, &data->args.name); if (alias != NULL) { - int ret = 0; + int ret; void *devname_garbage = NULL; /* @@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * the sillyrename information to the aliased dentry. */ nfs_free_dname(data); + ret = nfs_copy_dname(alias, data); spin_lock(&alias->d_lock); - if (alias->d_inode != NULL && + if (ret == 0 && alias->d_inode != NULL && !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { devname_garbage = alias->d_fsdata; alias->d_fsdata = data; alias->d_flags |= DCACHE_NFSFS_RENAMED; ret = 1; - } + } else + ret = 0; spin_unlock(&alias->d_lock); nfs_dec_sillycount(dir); dput(alias); @@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n * point dentry is definitely not a root, so we won't need * that anymore. */ - if (devname_garbage) - kfree(devname_garbage); + kfree(devname_garbage); return ret; } data->dir = igrab(dir); @@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data) if (parent == NULL) goto out_free; dir = parent->d_inode; - if (nfs_copy_dname(dentry, data) != 0) - goto out_dput; /* Non-exclusive lock protects against concurrent lookup() calls */ spin_lock(&dir->i_lock); if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { @@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) struct nfs_renamedata *data = calldata; struct inode *old_dir = data->old_dir; struct inode *new_dir = data->new_dir; + struct dentry *old_dentry = data->old_dentry; + struct dentry *new_dentry = data->new_dentry; if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); @@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata) } if (task->tk_status != 0) { - nfs_cancel_async_unlink(data->old_dentry); + nfs_cancel_async_unlink(old_dentry); return; } - nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); - d_move(data->old_dentry, data->new_dentry); + d_drop(old_dentry); + d_drop(new_dentry); } /** @@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir, * and only performs the unlink once the last reference to it is put. * * The final cleanup is done during dentry_iput. + * + * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server + * could take responsibility for keeping open files referenced. The server + * would also need to ensure that opened-but-deleted files were kept over + * reboots. However, we may not assume a server does so. (RFC 5661 + * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can + * use to advertise that it does this; some day we may take advantage of + * it.)) */ int nfs_sillyrename(struct inode *dir, struct dentry *dentry) @@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry) if (error) goto out_dput; + /* populate unlinkdata with the right dname */ + error = nfs_copy_dname(sdentry, + (struct nfs_unlinkdata *)dentry->d_fsdata); + if (error) { + nfs_cancel_async_unlink(dentry); + goto out_dput; + } + /* run the rename task, undo unlink if it fails */ task = nfs_async_rename(dir, dir, dentry, sdentry); if (IS_ERR(task)) { diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 00e37501fa3b..b39b37f80913 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p) mempool_free(p, nfs_wdata_mempool); } -static void nfs_writedata_release(struct nfs_write_data *wdata) +void nfs_writedata_release(struct nfs_write_data *wdata) { put_lseg(wdata->lseg); put_nfs_open_context(wdata->args.context); @@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write); /* * Set up the argument/result storage required for the RPC call. */ -static int nfs_write_rpcsetup(struct nfs_page *req, +static void nfs_write_rpcsetup(struct nfs_page *req, struct nfs_write_data *data, - const struct rpc_call_ops *call_ops, unsigned int count, unsigned int offset, - struct pnfs_layout_segment *lseg, int how) { struct inode *inode = req->wb_context->dentry->d_inode; @@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->req = req; data->inode = inode = req->wb_context->dentry->d_inode; data->cred = req->wb_context->cred; - data->lseg = get_lseg(lseg); data->args.fh = NFS_FH(inode); data->args.offset = req_offset(req) + offset; @@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req, data->args.context = get_nfs_open_context(req->wb_context); data->args.lock_context = req->wb_lock_context; data->args.stable = NFS_UNSTABLE; - if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { - data->args.stable = NFS_DATA_SYNC; - if (!nfs_need_commit(NFS_I(inode))) - data->args.stable = NFS_FILE_SYNC; + switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { + case 0: + break; + case FLUSH_COND_STABLE: + if (nfs_need_commit(NFS_I(inode))) + break; + default: + data->args.stable = NFS_FILE_SYNC; } data->res.fattr = &data->fattr; data->res.count = count; data->res.verf = &data->verf; nfs_fattr_init(&data->fattr); +} - if (data->lseg && - (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) - return 0; +static int nfs_do_write(struct nfs_write_data *data, + const struct rpc_call_ops *call_ops, + int how) +{ + struct inode *inode = data->args.context->dentry->d_inode; return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); } +static int nfs_do_multiple_writes(struct list_head *head, + const struct rpc_call_ops *call_ops, + int how) +{ + struct nfs_write_data *data; + int ret = 0; + + while (!list_empty(head)) { + int ret2; + + data = list_entry(head->next, struct nfs_write_data, list); + list_del_init(&data->list); + + ret2 = nfs_do_write(data, call_ops, how); + if (ret == 0) + ret = ret2; + } + return ret; +} + /* If a nfs_flush_* function fails, it should remove reqs from @head and * call this on each, which will prepare them to be retried on next * writeback using standard nfs. @@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req) * Generate multiple small requests to write out a single * contiguous dirty area on one page. */ -static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) +static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req = nfs_list_entry(desc->pg_list.next); struct page *page = req->wb_page; struct nfs_write_data *data; - size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; + size_t wsize = desc->pg_bsize, nbytes; unsigned int offset; int requests = 0; int ret = 0; - struct pnfs_layout_segment *lseg; - LIST_HEAD(list); nfs_list_remove_request(req); @@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) desc->pg_ioflags &= ~FLUSH_COND_STABLE; + offset = 0; nbytes = desc->pg_count; do { size_t len = min(nbytes, wsize); @@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) data = nfs_writedata_alloc(1); if (!data) goto out_bad; - list_add(&data->pages, &list); + data->pagevec[0] = page; + nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags); + list_add(&data->list, res); requests++; nbytes -= len; + offset += len; } while (nbytes != 0); atomic_set(&req->wb_complete, requests); - - BUG_ON(desc->pg_lseg); - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); - ClearPageError(page); - offset = 0; - nbytes = desc->pg_count; - do { - int ret2; - - data = list_entry(list.next, struct nfs_write_data, pages); - list_del_init(&data->pages); - - data->pagevec[0] = page; - - if (nbytes < wsize) - wsize = nbytes; - ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, - wsize, offset, lseg, desc->pg_ioflags); - if (ret == 0) - ret = ret2; - offset += wsize; - nbytes -= wsize; - } while (nbytes != 0); - - put_lseg(lseg); - desc->pg_lseg = NULL; + desc->pg_rpc_callops = &nfs_write_partial_ops; return ret; out_bad: - while (!list_empty(&list)) { - data = list_entry(list.next, struct nfs_write_data, pages); - list_del(&data->pages); + while (!list_empty(res)) { + data = list_entry(res->next, struct nfs_write_data, list); + list_del(&data->list); nfs_writedata_free(data); } nfs_redirty_request(req); @@ -987,14 +986,13 @@ out_bad: * This is the case if nfs_updatepage detects a conflicting request * that has been written but not committed. */ -static int nfs_flush_one(struct nfs_pageio_descriptor *desc) +static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res) { struct nfs_page *req; struct page **pages; struct nfs_write_data *data; struct list_head *head = &desc->pg_list; - struct pnfs_layout_segment *lseg = desc->pg_lseg; - int ret; + int ret = 0; data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, desc->pg_count)); @@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc) *pages++ = req->wb_page; } req = nfs_list_entry(data->pages.next); - if ((!lseg) && list_is_singular(&data->pages)) - lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, - req_offset(req), desc->pg_count, - IOMODE_RW, GFP_NOFS); if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) desc->pg_ioflags &= ~FLUSH_COND_STABLE; /* Set up the argument struct */ - ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); + nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags); + list_add(&data->list, res); + desc->pg_rpc_callops = &nfs_write_full_ops; out: - put_lseg(lseg); /* Cleans any gotten in ->pg_test */ - desc->pg_lseg = NULL; return ret; } -static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, +int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head) +{ + if (desc->pg_bsize < PAGE_CACHE_SIZE) + return nfs_flush_multi(desc, head); + return nfs_flush_one(desc, head); +} + +static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) +{ + LIST_HEAD(head); + int ret; + + ret = nfs_generic_flush(desc, &head); + if (ret == 0) + ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops, + desc->pg_ioflags); + return ret; +} + +static const struct nfs_pageio_ops nfs_pageio_write_ops = { + .pg_test = nfs_generic_pg_test, + .pg_doio = nfs_generic_pg_writepages, +}; + +static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags) { - size_t wsize = NFS_SERVER(inode)->wsize; + nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, + NFS_SERVER(inode)->wsize, ioflags); +} + +void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio) +{ + pgio->pg_ops = &nfs_pageio_write_ops; + pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize; +} +EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds); - if (wsize < PAGE_CACHE_SIZE) - nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); - else - nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); +static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, + struct inode *inode, int ioflags) +{ + if (!pnfs_pageio_init_write(pgio, inode, ioflags)) + nfs_pageio_init_write_mds(pgio, inode, ioflags); } /* diff --git a/fs/proc/generic.c b/fs/proc/generic.c index f1637f17c37c..9d99131d0d65 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -620,8 +620,7 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent, if (!ent) goto out; memset(ent, 0, sizeof(struct proc_dir_entry)); - memcpy(((char *) ent) + sizeof(struct proc_dir_entry), fn, len + 1); - ent->name = ((char *) ent) + sizeof(*ent); + memcpy(ent->name, fn, len + 1); ent->namelen = len; ent->mode = mode; ent->nlink = nlink; diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 9020ac15baaa..f738024ccc8e 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -197,15 +197,15 @@ static __net_init int proc_net_ns_init(struct net *net) int err; err = -ENOMEM; - netd = kzalloc(sizeof(*netd), GFP_KERNEL); + netd = kzalloc(sizeof(*netd) + 4, GFP_KERNEL); if (!netd) goto out; netd->data = net; netd->nlink = 2; - netd->name = "net"; netd->namelen = 3; netd->parent = &proc_root; + memcpy(netd->name, "net", 4); err = -EEXIST; net_statd = proc_net_mkdir(net, "stat", netd); diff --git a/fs/proc/root.c b/fs/proc/root.c index d6c3b416529b..9a8a2b77b874 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -186,13 +186,13 @@ static const struct inode_operations proc_root_inode_operations = { struct proc_dir_entry proc_root = { .low_ino = PROC_ROOT_INO, .namelen = 5, - .name = "/proc", .mode = S_IFDIR | S_IRUGO | S_IXUGO, .nlink = 2, .count = ATOMIC_INIT(1), .proc_iops = &proc_root_inode_operations, .proc_fops = &proc_root_operations, .parent = &proc_root, + .name = "/proc", }; int pid_ns_prepare_proc(struct pid_namespace *ns) diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index b2b411985591..d1fe74506c4c 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1224,6 +1224,9 @@ _xfs_buf_ioapply( rw = READ; } + /* we only use the buffer cache for meta-data */ + rw |= REQ_META; + next_chunk: atomic_inc(&bp->b_io_remaining); nr_pages = BIO_MAX_SECTORS >> (PAGE_SHIFT - BBSHIFT); diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 825390e1c138..7f7b42469ea7 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -149,7 +149,9 @@ xfs_file_fsync( xfs_iflags_clear(ip, XFS_ITRUNCATED); + xfs_ilock(ip, XFS_IOLOCK_SHARED); xfs_ioend_wait(ip); + xfs_iunlock(ip, XFS_IOLOCK_SHARED); if (mp->m_flags & XFS_MOUNT_BARRIER) { /* diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 6544c3236bc8..b9c172b3fbbe 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -1194,9 +1194,14 @@ xfs_setup_inode( break; } - /* if there is no attribute fork no ACL can exist on this inode */ - if (!XFS_IFORK_Q(ip)) + /* + * If there is no attribute fork no ACL can exist on this inode, + * and it can't have any file capabilities attached to it either. + */ + if (!XFS_IFORK_Q(ip)) { + inode_has_no_xattr(inode); cache_no_acl(inode); + } xfs_iflags_clear(ip, XFS_INEW); barrier(); diff --git a/fs/xfs/xfs_da_btree.c b/fs/xfs/xfs_da_btree.c index 2925726529f8..5bfcb8779f9f 100644 --- a/fs/xfs/xfs_da_btree.c +++ b/fs/xfs/xfs_da_btree.c @@ -692,6 +692,24 @@ xfs_da_join(xfs_da_state_t *state) return(error); } +#ifdef DEBUG +static void +xfs_da_blkinfo_onlychild_validate(struct xfs_da_blkinfo *blkinfo, __u16 level) +{ + __be16 magic = blkinfo->magic; + + if (level == 1) { + ASSERT(magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || + magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); + } else + ASSERT(magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); + ASSERT(!blkinfo->forw); + ASSERT(!blkinfo->back); +} +#else /* !DEBUG */ +#define xfs_da_blkinfo_onlychild_validate(blkinfo, level) +#endif /* !DEBUG */ + /* * We have only one entry in the root. Copy the only remaining child of * the old root to block 0 as the new root node. @@ -700,8 +718,6 @@ STATIC int xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) { xfs_da_intnode_t *oldroot; - /* REFERENCED */ - xfs_da_blkinfo_t *blkinfo; xfs_da_args_t *args; xfs_dablk_t child; xfs_dabuf_t *bp; @@ -732,15 +748,9 @@ xfs_da_root_join(xfs_da_state_t *state, xfs_da_state_blk_t *root_blk) if (error) return(error); ASSERT(bp != NULL); - blkinfo = bp->data; - if (be16_to_cpu(oldroot->hdr.level) == 1) { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DIR2_LEAFN_MAGIC) || - blkinfo->magic == cpu_to_be16(XFS_ATTR_LEAF_MAGIC)); - } else { - ASSERT(blkinfo->magic == cpu_to_be16(XFS_DA_NODE_MAGIC)); - } - ASSERT(!blkinfo->forw); - ASSERT(!blkinfo->back); + xfs_da_blkinfo_onlychild_validate(bp->data, + be16_to_cpu(oldroot->hdr.level)); + memcpy(root_blk->bp->data, bp->data, state->blocksize); xfs_da_log_buf(args->trans, root_blk->bp, 0, state->blocksize - 1); error = xfs_da_shrink_inode(args, child, bp); diff --git a/include/keys/encrypted-type.h b/include/keys/encrypted-type.h index 95855017a32b..1d4541370a64 100644 --- a/include/keys/encrypted-type.h +++ b/include/keys/encrypted-type.h @@ -1,6 +1,11 @@ /* * Copyright (C) 2010 IBM Corporation - * Author: Mimi Zohar <zohar@us.ibm.com> + * Copyright (C) 2010 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Authors: + * Mimi Zohar <zohar@us.ibm.com> + * Roberto Sassu <roberto.sassu@polito.it> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -15,13 +20,17 @@ struct encrypted_key_payload { struct rcu_head rcu; + char *format; /* datablob: format */ char *master_desc; /* datablob: master key name */ char *datalen; /* datablob: decrypted key length */ u8 *iv; /* datablob: iv */ u8 *encrypted_data; /* datablob: encrypted data */ unsigned short datablob_len; /* length of datablob */ unsigned short decrypted_datalen; /* decrypted data length */ - u8 decrypted_data[0]; /* decrypted data + datablob + hmac */ + unsigned short payload_datalen; /* payload data length */ + unsigned short encrypted_key_format; /* encrypted key format */ + u8 *decrypted_data; /* decrypted data */ + u8 payload_data[0]; /* payload data + datablob + hmac */ }; extern struct key_type key_type_encrypted; diff --git a/include/linux/ecryptfs.h b/include/linux/ecryptfs.h new file mode 100644 index 000000000000..2224a8c0cb64 --- /dev/null +++ b/include/linux/ecryptfs.h @@ -0,0 +1,113 @@ +#ifndef _LINUX_ECRYPTFS_H +#define _LINUX_ECRYPTFS_H + +/* Version verification for shared data structures w/ userspace */ +#define ECRYPTFS_VERSION_MAJOR 0x00 +#define ECRYPTFS_VERSION_MINOR 0x04 +#define ECRYPTFS_SUPPORTED_FILE_VERSION 0x03 +/* These flags indicate which features are supported by the kernel + * module; userspace tools such as the mount helper read + * ECRYPTFS_VERSIONING_MASK from a sysfs handle in order to determine + * how to behave. */ +#define ECRYPTFS_VERSIONING_PASSPHRASE 0x00000001 +#define ECRYPTFS_VERSIONING_PUBKEY 0x00000002 +#define ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH 0x00000004 +#define ECRYPTFS_VERSIONING_POLICY 0x00000008 +#define ECRYPTFS_VERSIONING_XATTR 0x00000010 +#define ECRYPTFS_VERSIONING_MULTKEY 0x00000020 +#define ECRYPTFS_VERSIONING_DEVMISC 0x00000040 +#define ECRYPTFS_VERSIONING_HMAC 0x00000080 +#define ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION 0x00000100 +#define ECRYPTFS_VERSIONING_GCM 0x00000200 +#define ECRYPTFS_VERSIONING_MASK (ECRYPTFS_VERSIONING_PASSPHRASE \ + | ECRYPTFS_VERSIONING_PLAINTEXT_PASSTHROUGH \ + | ECRYPTFS_VERSIONING_PUBKEY \ + | ECRYPTFS_VERSIONING_XATTR \ + | ECRYPTFS_VERSIONING_MULTKEY \ + | ECRYPTFS_VERSIONING_DEVMISC \ + | ECRYPTFS_VERSIONING_FILENAME_ENCRYPTION) +#define ECRYPTFS_MAX_PASSWORD_LENGTH 64 +#define ECRYPTFS_MAX_PASSPHRASE_BYTES ECRYPTFS_MAX_PASSWORD_LENGTH +#define ECRYPTFS_SALT_SIZE 8 +#define ECRYPTFS_SALT_SIZE_HEX (ECRYPTFS_SALT_SIZE*2) +/* The original signature size is only for what is stored on disk; all + * in-memory representations are expanded hex, so it better adapted to + * be passed around or referenced on the command line */ +#define ECRYPTFS_SIG_SIZE 8 +#define ECRYPTFS_SIG_SIZE_HEX (ECRYPTFS_SIG_SIZE*2) +#define ECRYPTFS_PASSWORD_SIG_SIZE ECRYPTFS_SIG_SIZE_HEX +#define ECRYPTFS_MAX_KEY_BYTES 64 +#define ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES 512 +#define ECRYPTFS_FILE_VERSION 0x03 +#define ECRYPTFS_MAX_PKI_NAME_BYTES 16 + +#define RFC2440_CIPHER_DES3_EDE 0x02 +#define RFC2440_CIPHER_CAST_5 0x03 +#define RFC2440_CIPHER_BLOWFISH 0x04 +#define RFC2440_CIPHER_AES_128 0x07 +#define RFC2440_CIPHER_AES_192 0x08 +#define RFC2440_CIPHER_AES_256 0x09 +#define RFC2440_CIPHER_TWOFISH 0x0a +#define RFC2440_CIPHER_CAST_6 0x0b + +#define RFC2440_CIPHER_RSA 0x01 + +/** + * For convenience, we may need to pass around the encrypted session + * key between kernel and userspace because the authentication token + * may not be extractable. For example, the TPM may not release the + * private key, instead requiring the encrypted data and returning the + * decrypted data. + */ +struct ecryptfs_session_key { +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_DECRYPT 0x00000001 +#define ECRYPTFS_USERSPACE_SHOULD_TRY_TO_ENCRYPT 0x00000002 +#define ECRYPTFS_CONTAINS_DECRYPTED_KEY 0x00000004 +#define ECRYPTFS_CONTAINS_ENCRYPTED_KEY 0x00000008 + u32 flags; + u32 encrypted_key_size; + u32 decrypted_key_size; + u8 encrypted_key[ECRYPTFS_MAX_ENCRYPTED_KEY_BYTES]; + u8 decrypted_key[ECRYPTFS_MAX_KEY_BYTES]; +}; + +struct ecryptfs_password { + u32 password_bytes; + s32 hash_algo; + u32 hash_iterations; + u32 session_key_encryption_key_bytes; +#define ECRYPTFS_PERSISTENT_PASSWORD 0x01 +#define ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET 0x02 + u32 flags; + /* Iterated-hash concatenation of salt and passphrase */ + u8 session_key_encryption_key[ECRYPTFS_MAX_KEY_BYTES]; + u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; + /* Always in expanded hex */ + u8 salt[ECRYPTFS_SALT_SIZE]; +}; + +enum ecryptfs_token_types {ECRYPTFS_PASSWORD, ECRYPTFS_PRIVATE_KEY}; + +struct ecryptfs_private_key { + u32 key_size; + u32 data_len; + u8 signature[ECRYPTFS_PASSWORD_SIG_SIZE + 1]; + char pki_type[ECRYPTFS_MAX_PKI_NAME_BYTES + 1]; + u8 data[]; +}; + +/* May be a password or a private key */ +struct ecryptfs_auth_tok { + u16 version; /* 8-bit major and 8-bit minor */ + u16 token_type; +#define ECRYPTFS_ENCRYPT_ONLY 0x00000001 + u32 flags; + struct ecryptfs_session_key session_key; + u8 reserved[32]; + union { + struct ecryptfs_password password; + struct ecryptfs_private_key private_key; + } token; +} __attribute__ ((packed)); + +#endif /* _LINUX_ECRYPTFS_H */ diff --git a/include/linux/if.h b/include/linux/if.h index 3bc63e6a02f7..03489ca92ded 100644 --- a/include/linux/if.h +++ b/include/linux/if.h @@ -76,6 +76,8 @@ #define IFF_BRIDGE_PORT 0x4000 /* device used as bridge port */ #define IFF_OVS_DATAPATH 0x8000 /* device used as Open vSwitch * datapath port */ +#define IFF_TX_SKB_SHARING 0x10000 /* The interface supports sharing + * skbs on transmit */ #define IF_GET_IFACE 0x0001 /* for querying only */ #define IF_GET_PROTO 0x0002 diff --git a/include/linux/input.h b/include/linux/input.h index 771d6d85667d..068784e17972 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -119,9 +119,9 @@ struct input_keymap_entry { #define EVIOCGSND(len) _IOC(_IOC_READ, 'E', 0x1a, len) /* get all sounds status */ #define EVIOCGSW(len) _IOC(_IOC_READ, 'E', 0x1b, len) /* get all switch states */ -#define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + ev, len) /* get event bits */ -#define EVIOCGABS(abs) _IOR('E', 0x40 + abs, struct input_absinfo) /* get abs value/limits */ -#define EVIOCSABS(abs) _IOW('E', 0xc0 + abs, struct input_absinfo) /* set abs value/limits */ +#define EVIOCGBIT(ev,len) _IOC(_IOC_READ, 'E', 0x20 + (ev), len) /* get event bits */ +#define EVIOCGABS(abs) _IOR('E', 0x40 + (abs), struct input_absinfo) /* get abs value/limits */ +#define EVIOCSABS(abs) _IOW('E', 0xc0 + (abs), struct input_absinfo) /* set abs value/limits */ #define EVIOCSFF _IOC(_IOC_WRITE, 'E', 0x80, sizeof(struct ff_effect)) /* send a force effect to a force feedback device */ #define EVIOCRMFF _IOW('E', 0x81, int) /* Erase a force effect */ diff --git a/include/linux/input/kxtj9.h b/include/linux/input/kxtj9.h new file mode 100644 index 000000000000..f6bac89537b8 --- /dev/null +++ b/include/linux/input/kxtj9.h @@ -0,0 +1,70 @@ +/* + * Copyright (C) 2011 Kionix, Inc. + * Written by Chris Hudson <chudson@kionix.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + * 02111-1307, USA + */ + +#ifndef __KXTJ9_H__ +#define __KXTJ9_H__ + +#define KXTJ9_I2C_ADDR 0x0F + +struct kxtj9_platform_data { + unsigned int min_interval; /* minimum poll interval (in milli-seconds) */ + + /* + * By default, x is axis 0, y is axis 1, z is axis 2; these can be + * changed to account for sensor orientation within the host device. + */ + u8 axis_map_x; + u8 axis_map_y; + u8 axis_map_z; + + /* + * Each axis can be negated to account for sensor orientation within + * the host device. + */ + bool negate_x; + bool negate_y; + bool negate_z; + + /* CTRL_REG1: set resolution, g-range, data ready enable */ + /* Output resolution: 8-bit valid or 12-bit valid */ + #define RES_8BIT 0 + #define RES_12BIT (1 << 6) + u8 res_12bit; + /* Output g-range: +/-2g, 4g, or 8g */ + #define KXTJ9_G_2G 0 + #define KXTJ9_G_4G (1 << 3) + #define KXTJ9_G_8G (1 << 4) + u8 g_range; + + /* DATA_CTRL_REG: controls the output data rate of the part */ + #define ODR12_5F 0 + #define ODR25F 1 + #define ODR50F 2 + #define ODR100F 3 + #define ODR200F 4 + #define ODR400F 5 + #define ODR800F 6 + u8 data_odr_init; + + int (*init)(void); + void (*exit)(void); + int (*power_on)(void); + int (*power_off)(void); +}; +#endif /* __KXTJ9_H__ */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 9a43ad792cfc..46ac9a50528d 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -56,6 +56,14 @@ #define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) #define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#define DIV_ROUND_UP_ULL(ll,d) \ + ({ unsigned long long _tmp = (ll)+(d)-1; do_div(_tmp, d); _tmp; }) + +#if BITS_PER_LONG == 32 +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP_ULL(ll, d) +#else +# define DIV_ROUND_UP_SECTOR_T(ll,d) DIV_ROUND_UP(ll,d) +#endif /* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */ #define roundup(x, y) ( \ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2ed0b6cf11c5..ddee79bb8f15 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1132,7 +1132,7 @@ struct net_device { spinlock_t addr_list_lock; struct netdev_hw_addr_list uc; /* Unicast mac addresses */ struct netdev_hw_addr_list mc; /* Multicast mac addresses */ - int uc_promisc; + bool uc_promisc; unsigned int promiscuity; unsigned int allmulti; @@ -1679,9 +1679,12 @@ static inline int skb_gro_header_hard(struct sk_buff *skb, unsigned int hlen) static inline void *skb_gro_header_slow(struct sk_buff *skb, unsigned int hlen, unsigned int offset) { + if (!pskb_may_pull(skb, hlen)) + return NULL; + NAPI_GRO_CB(skb)->frag0 = NULL; NAPI_GRO_CB(skb)->frag0_len = 0; - return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL; + return skb->data + offset; } static inline void *skb_gro_mac_header(struct sk_buff *skb) diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 504b289ba680..a3c4bc800dce 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -563,6 +563,9 @@ enum { NFSPROC4_CLNT_GETDEVICEINFO, NFSPROC4_CLNT_LAYOUTCOMMIT, NFSPROC4_CLNT_LAYOUTRETURN, + NFSPROC4_CLNT_SECINFO_NO_NAME, + NFSPROC4_CLNT_TEST_STATEID, + NFSPROC4_CLNT_FREE_STATEID, }; /* nfs41 types */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 08c444aa0411..50a661f8b45a 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -16,6 +16,7 @@ struct nfs4_sequence_args; struct nfs4_sequence_res; struct nfs_server; struct nfs4_minor_version_ops; +struct server_scope; /* * The nfs_client identifies our client state to the server. @@ -77,12 +78,13 @@ struct nfs_client { /* The flags used for obtaining the clientid during EXCHANGE_ID */ u32 cl_exchange_flags; struct nfs4_session *cl_session; /* sharred session */ - struct list_head cl_layouts; #endif /* CONFIG_NFS_V4 */ #ifdef CONFIG_NFS_FSCACHE struct fscache_cookie *fscache; /* client index cache cookie */ #endif + + struct server_scope *server_scope; /* from exchange_id */ }; /* @@ -149,6 +151,7 @@ struct nfs_server { struct rb_root openowner_id; struct rb_root lockowner_id; #endif + struct list_head layouts; struct list_head delegations; void (*destroy)(struct nfs_server *); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 25311b3bedf8..e2791a27a901 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -55,20 +55,28 @@ struct nfs_page { struct nfs_writeverf wb_verf; /* Commit cookie */ }; +struct nfs_pageio_descriptor; +struct nfs_pageio_ops { + void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *); + bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); + int (*pg_doio)(struct nfs_pageio_descriptor *); +}; + struct nfs_pageio_descriptor { struct list_head pg_list; unsigned long pg_bytes_written; size_t pg_count; size_t pg_bsize; unsigned int pg_base; - char pg_moreio; + unsigned char pg_moreio : 1, + pg_recoalesce : 1; struct inode *pg_inode; - int (*pg_doio)(struct nfs_pageio_descriptor *); + const struct nfs_pageio_ops *pg_ops; int pg_ioflags; int pg_error; + const struct rpc_call_ops *pg_rpc_callops; struct pnfs_layout_segment *pg_lseg; - bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); }; #define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) @@ -85,7 +93,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst, pgoff_t idx_start, unsigned int npages, int tag); extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, struct inode *inode, - int (*doio)(struct nfs_pageio_descriptor *desc), + const struct nfs_pageio_ops *pg_ops, size_t bsize, int how); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, @@ -100,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req); extern int nfs_set_page_tag_locked(struct nfs_page *req); extern void nfs_clear_page_tag_locked(struct nfs_page *req); - /* * Lock the page of an asynchronous request without getting a new reference */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 00848d86ffb2..5b115956abac 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data { }; struct nfs4_layoutreturn_args { - __u32 layout_type; + struct pnfs_layout_hdr *layout; struct inode *inode; nfs4_stateid stateid; + __u32 layout_type; struct nfs4_sequence_args seq_args; }; @@ -1060,6 +1061,7 @@ struct server_scope { struct nfs41_exchange_id_res { struct nfs_client *client; u32 flags; + struct server_scope *server_scope; }; struct nfs41_create_session_args { @@ -1083,6 +1085,34 @@ struct nfs41_reclaim_complete_args { struct nfs41_reclaim_complete_res { struct nfs4_sequence_res seq_res; }; + +#define SECINFO_STYLE_CURRENT_FH 0 +#define SECINFO_STYLE_PARENT 1 +struct nfs41_secinfo_no_name_args { + int style; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_test_stateid_args { + nfs4_stateid *stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_test_stateid_res { + unsigned int status; + struct nfs4_sequence_res seq_res; +}; + +struct nfs41_free_stateid_args { + nfs4_stateid *stateid; + struct nfs4_sequence_args seq_args; +}; + +struct nfs41_free_stateid_res { + unsigned int status; + struct nfs4_sequence_res seq_res; +}; + #endif /* CONFIG_NFS_V4_1 */ struct nfs_page; @@ -1096,6 +1126,7 @@ struct nfs_read_data { struct rpc_cred *cred; struct nfs_fattr fattr; /* fattr storage */ struct list_head pages; /* Coalesced read requests */ + struct list_head list; /* lists of struct nfs_read_data */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ @@ -1119,6 +1150,7 @@ struct nfs_write_data { struct nfs_fattr fattr; struct nfs_writeverf verf; struct list_head pages; /* Coalesced requests we wish to flush */ + struct list_head list; /* lists of struct nfs_write_data */ struct nfs_page *req; /* multi ops per nfs_page */ struct page **pagevec; unsigned int npages; /* Max length of pagevec */ diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h index 76efbdd01622..435dd5fa7453 100644 --- a/include/linux/pnfs_osd_xdr.h +++ b/include/linux/pnfs_osd_xdr.h @@ -41,9 +41,6 @@ #include <linux/nfs_fs.h> #include <linux/nfs_page.h> -#include <scsi/osd_protocol.h> - -#define PNFS_OSD_OSDNAME_MAXSIZE 256 /* * draft-ietf-nfsv4-minorversion-22 @@ -99,12 +96,6 @@ struct pnfs_osd_objid { #define _DEVID_HI(oid_device_id) \ (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) -static inline int -pnfs_osd_objid_xdr_sz(void) -{ - return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2; -} - enum pnfs_osd_version { PNFS_OSD_MISSING = 0, PNFS_OSD_VERSION_1 = 1, @@ -189,8 +180,6 @@ struct pnfs_osd_targetid { struct nfs4_string oti_scsi_device_id; }; -enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 }; - /* struct netaddr4 { * // see struct rpcb in RFC1833 * string r_netid<>; // network id @@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr { struct pnfs_osd_net_addr ota_netaddr; }; -enum { - NETWORK_ID_MAX = 16 / 4, - UNIVERSAL_ADDRESS_MAX = 64 / 4, - PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX, -}; - struct pnfs_osd_deviceaddr { struct pnfs_osd_targetid oda_targetid; struct pnfs_osd_targetaddr oda_targetaddr; @@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr { struct nfs4_string oda_osdname; }; -enum { - ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4, - PNFS_OSD_DEVICEADDR_MAX = - PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX + - 2 /*oda_lun*/ + - 1 + OSD_SYSTEMID_LEN + - 1 + ODA_OSDNAME_MAX, -}; - /* LAYOUTCOMMIT: layoutupdate */ /* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { @@ -279,7 +253,7 @@ struct pnfs_osd_ioerr { u32 oer_errno; }; -/* OSD XDR API */ +/* OSD XDR Client API */ /* Layout helpers */ /* Layout decoding is done in two parts: * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part @@ -337,8 +311,7 @@ extern int pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, struct pnfs_osd_layoutupdate *lou); -/* osd_ioerror encoding/decoding (layout_return) */ -/* Client */ +/* osd_ioerror encoding (layout_return) */ extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h index 650af6deaf8f..643b96c7a94f 100644 --- a/include/linux/proc_fs.h +++ b/include/linux/proc_fs.h @@ -50,8 +50,6 @@ typedef int (write_proc_t)(struct file *file, const char __user *buffer, struct proc_dir_entry { unsigned int low_ino; - unsigned int namelen; - const char *name; mode_t mode; nlink_t nlink; uid_t uid; @@ -73,9 +71,11 @@ struct proc_dir_entry { write_proc_t *write_proc; atomic_t count; /* use count */ int pde_users; /* number of callers into module in progress */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ struct completion *pde_unload_completion; struct list_head pde_openers; /* who did ->open, but not ->release */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + u8 namelen; + char name[]; }; enum kcore_type { diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 75cbf4f62fe8..9e65d9e20662 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -245,10 +245,16 @@ struct mdp_superblock_1 { __u8 device_uuid[16]; /* user-space setable, ignored by kernel */ __u8 devflags; /* per-device flags. Only one defined...*/ #define WriteMostly1 1 /* mask for writemostly flag in above */ - __u8 pad2[64-57]; /* set to 0 when writing */ + /* Bad block log. If there are any bad blocks the feature flag is set. + * If offset and size are non-zero, that space is reserved and available + */ + __u8 bblog_shift; /* shift from sectors to block size */ + __le16 bblog_size; /* number of sectors reserved for list */ + __le32 bblog_offset; /* sector offset from superblock to bblog, + * signed - not unsigned */ /* array state information - 64 bytes */ - __le64 utime; /* 40 bits second, 24 btes microseconds */ + __le64 utime; /* 40 bits second, 24 bits microseconds */ __le64 events; /* incremented when superblock updated */ __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ __le32 sb_csum; /* checksum up to devs[max_dev] */ @@ -270,8 +276,8 @@ struct mdp_superblock_1 { * must be honoured */ #define MD_FEATURE_RESHAPE_ACTIVE 4 +#define MD_FEATURE_BAD_BLOCKS 8 /* badblock list is not empty */ -#define MD_FEATURE_ALL (1|2|4) +#define MD_FEATURE_ALL (1|2|4|8) #endif - diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 082884295f80..f7f3ce340c08 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include <linux/sunrpc/xprt.h> #include <linux/sunrpc/sched.h> -#ifdef CONFIG_NFS_V4_1 +#ifdef CONFIG_SUNRPC_BACKCHANNEL struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); @@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) return 1; return 0; } -#else /* CONFIG_NFS_V4_1 */ +#else /* CONFIG_SUNRPC_BACKCHANNEL */ static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, unsigned int min_reqs) { @@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp) static inline void xprt_free_bc_request(struct rpc_rqst *req) { } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #endif /* _LINUX_SUNRPC_BC_XPRT_H */ diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index fe2d8e6b923b..e7756896f3ca 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -227,6 +227,10 @@ void rpc_init_wait_queue(struct rpc_wait_queue *, const char *); void rpc_destroy_wait_queue(struct rpc_wait_queue *); void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, rpc_action action); +void rpc_sleep_on_priority(struct rpc_wait_queue *, + struct rpc_task *, + rpc_action action, + int priority); void rpc_wake_up_queued_task(struct rpc_wait_queue *, struct rpc_task *); void rpc_wake_up(struct rpc_wait_queue *); diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 2f1e5186e049..223588a976a0 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -92,7 +92,7 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same * connection */ @@ -100,7 +100,7 @@ struct svc_serv { wait_queue_head_t sv_cb_waitq; /* sleep here if there are no * entries in the svc_cb_list */ struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ }; /* diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 81cce3b3ee66..15518a152ac3 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -22,6 +22,7 @@ #define RPC_MIN_SLOT_TABLE (2U) #define RPC_DEF_SLOT_TABLE (16U) #define RPC_MAX_SLOT_TABLE (128U) +#define RPC_MAX_SLOT_TABLE_LIMIT (65536U) /* * This describes a timeout strategy @@ -100,18 +101,18 @@ struct rpc_rqst { ktime_t rq_xtime; /* transmit time stamp */ int rq_ntrans; -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head rq_bc_list; /* Callback service list */ unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANEL */ }; #define rq_svec rq_snd_buf.head #define rq_slen rq_snd_buf.len struct rpc_xprt_ops { void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); - int (*reserve_xprt)(struct rpc_task *task); + int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); void (*rpcbind)(struct rpc_task *task); void (*set_port)(struct rpc_xprt *xprt, unsigned short port); @@ -164,12 +165,12 @@ struct rpc_xprt { struct rpc_wait_queue binding; /* requests waiting on rpcbind */ struct rpc_wait_queue sending; /* requests waiting to send */ - struct rpc_wait_queue resend; /* requests waiting to resend */ struct rpc_wait_queue pending; /* requests in flight */ struct rpc_wait_queue backlog; /* waiting for slot */ struct list_head free; /* free slots */ - struct rpc_rqst * slot; /* slot table storage */ - unsigned int max_reqs; /* total slots */ + unsigned int max_reqs; /* max number of slots */ + unsigned int min_reqs; /* min number of slots */ + atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char shutdown : 1, /* being shut down */ resvport : 1; /* use a reserved port */ @@ -200,7 +201,7 @@ struct rpc_xprt { u32 xid; /* Next XID value to use */ struct rpc_task * snd_task; /* Task blocked in send */ struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) struct svc_serv *bc_serv; /* The RPC service which will */ /* process the callback */ unsigned int bc_alloc_count; /* Total number of preallocs */ @@ -208,7 +209,7 @@ struct rpc_xprt { * items */ struct list_head bc_pa_list; /* List of preallocated * backchannel rpc_rqst's */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ struct list_head recv; struct { @@ -228,15 +229,15 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; }; -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Backchannel flags */ #define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */ /* buffer in use */ -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static inline int bc_prealloc(struct rpc_rqst *req) { return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); @@ -246,7 +247,7 @@ static inline int bc_prealloc(struct rpc_rqst *req) { return 0; } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ struct xprt_create { int ident; /* XPRT_TRANSPORT identifier */ @@ -271,8 +272,8 @@ struct xprt_class { struct rpc_xprt *xprt_create_transport(struct xprt_create *args); void xprt_connect(struct rpc_task *task); void xprt_reserve(struct rpc_task *task); -int xprt_reserve_xprt(struct rpc_task *task); -int xprt_reserve_xprt_cong(struct rpc_task *task); +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task); +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); int xprt_prepare_transmit(struct rpc_task *task); void xprt_transmit(struct rpc_task *task); void xprt_end_transmit(struct rpc_task *task); @@ -282,7 +283,9 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); -struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); +struct rpc_xprt * xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_req); void xprt_free(struct rpc_xprt *); static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) @@ -321,7 +324,6 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); #define XPRT_CLOSING (6) #define XPRT_CONNECTION_ABORT (7) #define XPRT_CONNECTION_CLOSE (8) -#define XPRT_INITIALIZED (9) static inline void xprt_set_connected(struct rpc_xprt *xprt) { diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 011bcfeb9f09..111843f88b2a 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -59,6 +59,84 @@ struct watchdog_info { #define WATCHDOG_NOWAYOUT 0 #endif +struct watchdog_ops; +struct watchdog_device; + +/** struct watchdog_ops - The watchdog-devices operations + * + * @owner: The module owner. + * @start: The routine for starting the watchdog device. + * @stop: The routine for stopping the watchdog device. + * @ping: The routine that sends a keepalive ping to the watchdog device. + * @status: The routine that shows the status of the watchdog device. + * @set_timeout:The routine for setting the watchdog devices timeout value. + * @ioctl: The routines that handles extra ioctl calls. + * + * The watchdog_ops structure contains a list of low-level operations + * that control a watchdog device. It also contains the module that owns + * these operations. The start and stop function are mandatory, all other + * functions are optonal. + */ +struct watchdog_ops { + struct module *owner; + /* mandatory operations */ + int (*start)(struct watchdog_device *); + int (*stop)(struct watchdog_device *); + /* optional operations */ + int (*ping)(struct watchdog_device *); + unsigned int (*status)(struct watchdog_device *); + int (*set_timeout)(struct watchdog_device *, unsigned int); + long (*ioctl)(struct watchdog_device *, unsigned int, unsigned long); +}; + +/** struct watchdog_device - The structure that defines a watchdog device + * + * @info: Pointer to a watchdog_info structure. + * @ops: Pointer to the list of watchdog operations. + * @bootstatus: Status of the watchdog device at boot. + * @timeout: The watchdog devices timeout value. + * @min_timeout:The watchdog devices minimum timeout value. + * @max_timeout:The watchdog devices maximum timeout value. + * @driver-data:Pointer to the drivers private data. + * @status: Field that contains the devices internal status bits. + * + * The watchdog_device structure contains all information about a + * watchdog timer device. + * + * The driver-data field may not be accessed directly. It must be accessed + * via the watchdog_set_drvdata and watchdog_get_drvdata helpers. + */ +struct watchdog_device { + const struct watchdog_info *info; + const struct watchdog_ops *ops; + unsigned int bootstatus; + unsigned int timeout; + unsigned int min_timeout; + unsigned int max_timeout; + void *driver_data; + unsigned long status; +/* Bit numbers for status flags */ +#define WDOG_ACTIVE 0 /* Is the watchdog running/active */ +#define WDOG_DEV_OPEN 1 /* Opened via /dev/watchdog ? */ +#define WDOG_ALLOW_RELEASE 2 /* Did we receive the magic char ? */ +#define WDOG_NO_WAY_OUT 3 /* Is 'nowayout' feature set ? */ +}; + +/* Use the following functions to manipulate watchdog driver specific data */ +static inline void watchdog_set_drvdata(struct watchdog_device *wdd, void *data) +{ + wdd->driver_data = data; +} + +static inline void *watchdog_get_drvdata(struct watchdog_device *wdd) +{ + return wdd->driver_data; +} + +/* drivers/watchdog/core/watchdog_core.c */ +extern int watchdog_register_device(struct watchdog_device *); +extern void watchdog_unregister_device(struct watchdog_device *); + #endif /* __KERNEL__ */ #endif /* ifndef _LINUX_WATCHDOG_H */ diff --git a/include/linux/wm97xx.h b/include/linux/wm97xx.h index 38e8c4d9289e..fd98bb968219 100644 --- a/include/linux/wm97xx.h +++ b/include/linux/wm97xx.h @@ -38,7 +38,11 @@ #define WM97XX_ADCSEL_X 0x1000 /* x coord measurement */ #define WM97XX_ADCSEL_Y 0x2000 /* y coord measurement */ #define WM97XX_ADCSEL_PRES 0x3000 /* pressure measurement */ -#define WM97XX_ADCSEL_MASK 0x7000 +#define WM97XX_AUX_ID1 0x4000 +#define WM97XX_AUX_ID2 0x5000 +#define WM97XX_AUX_ID3 0x6000 +#define WM97XX_AUX_ID4 0x7000 +#define WM97XX_ADCSEL_MASK 0x7000 /* ADC selection mask */ #define WM97XX_COO 0x0800 /* enable coordinate mode */ #define WM97XX_CTC 0x0400 /* enable continuous mode */ #define WM97XX_CM_RATE_93 0x0000 /* 93.75Hz continuous rate */ @@ -61,13 +65,6 @@ #define WM97XX_PRP_DET_DIG 0xc000 /* setect on, digitise on */ #define WM97XX_RPR 0x2000 /* wake up on pen down */ #define WM97XX_PEN_DOWN 0x8000 /* pen is down */ -#define WM97XX_ADCSRC_MASK 0x7000 /* ADC source mask */ - -#define WM97XX_AUX_ID1 0x8001 -#define WM97XX_AUX_ID2 0x8002 -#define WM97XX_AUX_ID3 0x8003 -#define WM97XX_AUX_ID4 0x8004 - /* WM9712 Bits */ #define WM9712_45W 0x1000 /* set for 5-wire touchscreen */ diff --git a/include/scsi/iscsi_proto.h b/include/scsi/iscsi_proto.h index ea68b3c56dbf..988ba06b3ad6 100644 --- a/include/scsi/iscsi_proto.h +++ b/include/scsi/iscsi_proto.h @@ -29,10 +29,40 @@ /* default iSCSI listen port for incoming connections */ #define ISCSI_LISTEN_PORT 3260 +/* iSCSI header length */ +#define ISCSI_HDR_LEN 48 + +/* iSCSI CRC32C length */ +#define ISCSI_CRC_LEN 4 + /* Padding word length */ #define ISCSI_PAD_LEN 4 /* + * Serial Number Arithmetic, 32 bits, RFC1982 + */ + +static inline int iscsi_sna_lt(u32 n1, u32 n2) +{ + return (s32)(n1 - n2) < 0; +} + +static inline int iscsi_sna_lte(u32 n1, u32 n2) +{ + return (s32)(n1 - n2) <= 0; +} + +static inline int iscsi_sna_gt(u32 n1, u32 n2) +{ + return (s32)(n1 - n2) > 0; +} + +static inline int iscsi_sna_gte(u32 n1, u32 n2) +{ + return (s32)(n1 - n2) >= 0; +} + +/* * useful common(control and data pathes) macro */ #define ntoh24(p) (((p)[0] << 16) | ((p)[1] << 8) | ((p)[2])) @@ -116,7 +146,7 @@ struct iscsi_ahs_hdr { #define ISCSI_CDB_SIZE 16 /* iSCSI PDU Header */ -struct iscsi_cmd { +struct iscsi_scsi_req { uint8_t opcode; uint8_t flags; __be16 rsvd2; @@ -161,7 +191,7 @@ struct iscsi_ecdb_ahdr { }; /* SCSI Response Header */ -struct iscsi_cmd_rsp { +struct iscsi_scsi_rsp { uint8_t opcode; uint8_t flags; uint8_t response; @@ -406,7 +436,7 @@ struct iscsi_text_rsp { }; /* Login Header */ -struct iscsi_login { +struct iscsi_login_req { uint8_t opcode; uint8_t flags; uint8_t max_version; /* Max. version supported */ @@ -427,7 +457,13 @@ struct iscsi_login { #define ISCSI_FLAG_LOGIN_TRANSIT 0x80 #define ISCSI_FLAG_LOGIN_CONTINUE 0x40 #define ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK 0x0C /* 2 bits */ +#define ISCSI_FLAG_LOGIN_CURRENT_STAGE1 0x04 +#define ISCSI_FLAG_LOGIN_CURRENT_STAGE2 0x08 +#define ISCSI_FLAG_LOGIN_CURRENT_STAGE3 0x0C #define ISCSI_FLAG_LOGIN_NEXT_STAGE_MASK 0x03 /* 2 bits */ +#define ISCSI_FLAG_LOGIN_NEXT_STAGE1 0x01 +#define ISCSI_FLAG_LOGIN_NEXT_STAGE2 0x02 +#define ISCSI_FLAG_LOGIN_NEXT_STAGE3 0x03 #define ISCSI_LOGIN_CURRENT_STAGE(flags) \ ((flags & ISCSI_FLAG_LOGIN_CURRENT_STAGE_MASK) >> 2) @@ -550,17 +586,25 @@ struct iscsi_logout_rsp { struct iscsi_snack { uint8_t opcode; uint8_t flags; - uint8_t rsvd2[14]; + uint8_t rsvd2[2]; + uint8_t hlength; + uint8_t dlength[3]; + uint8_t lun[8]; itt_t itt; + __be32 ttt; + uint8_t rsvd3[4]; + __be32 exp_statsn; + uint8_t rsvd4[8]; __be32 begrun; __be32 runlength; - __be32 exp_statsn; - __be32 rsvd3; - __be32 exp_datasn; - uint8_t rsvd6[8]; }; /* SNACK PDU flags */ +#define ISCSI_FLAG_SNACK_TYPE_DATA 0 +#define ISCSI_FLAG_SNACK_TYPE_R2T 0 +#define ISCSI_FLAG_SNACK_TYPE_STATUS 1 +#define ISCSI_FLAG_SNACK_TYPE_DATA_ACK 2 +#define ISCSI_FLAG_SNACK_TYPE_RDATA 3 #define ISCSI_FLAG_SNACK_TYPE_MASK 0x0F /* 4 bits */ /* Reject Message Header */ diff --git a/include/sound/pcm.h b/include/sound/pcm.h index e1bad1130616..57e71fa33f7c 100644 --- a/include/sound/pcm.h +++ b/include/sound/pcm.h @@ -507,6 +507,18 @@ void snd_pcm_detach_substream(struct snd_pcm_substream *substream); void snd_pcm_vma_notify_data(void *client, void *data); int snd_pcm_mmap_data(struct snd_pcm_substream *substream, struct file *file, struct vm_area_struct *area); + +#ifdef CONFIG_SND_DEBUG +void snd_pcm_debug_name(struct snd_pcm_substream *substream, + char *name, size_t len); +#else +static inline void +snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size) +{ + *buf = 0; +} +#endif + /* * PCM library */ @@ -749,17 +761,18 @@ static inline const struct snd_interval *hw_param_interval_c(const struct snd_pc return ¶ms->intervals[var - SNDRV_PCM_HW_PARAM_FIRST_INTERVAL]; } -#define params_access(p) ((__force snd_pcm_access_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_ACCESS))) -#define params_format(p) ((__force snd_pcm_format_t)snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_FORMAT))) -#define params_subformat(p) snd_mask_min(hw_param_mask((p), SNDRV_PCM_HW_PARAM_SUBFORMAT)) -#define params_channels(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min -#define params_rate(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_RATE)->min -#define params_period_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min -#define params_period_bytes(p) ((params_period_size(p)*snd_pcm_format_physical_width(params_format(p))*params_channels(p))/8) -#define params_periods(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_PERIODS)->min -#define params_buffer_size(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min -#define params_buffer_bytes(p) hw_param_interval((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min - +#define params_channels(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_CHANNELS)->min) +#define params_rate(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_RATE)->min) +#define params_period_size(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIOD_SIZE)->min) +#define params_periods(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_PERIODS)->min) +#define params_buffer_size(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_SIZE)->min) +#define params_buffer_bytes(p) \ + (hw_param_interval_c((p), SNDRV_PCM_HW_PARAM_BUFFER_BYTES)->min) int snd_interval_refine(struct snd_interval *i, const struct snd_interval *v); void snd_interval_mul(const struct snd_interval *a, const struct snd_interval *b, struct snd_interval *c); diff --git a/include/sound/pcm_params.h b/include/sound/pcm_params.h index 85cf1cf4f31a..f494f1e3c900 100644 --- a/include/sound/pcm_params.h +++ b/include/sound/pcm_params.h @@ -337,5 +337,19 @@ static inline unsigned int sub(unsigned int a, unsigned int b) return 0; } -#endif /* __SOUND_PCM_PARAMS_H */ +#define params_access(p) ((__force snd_pcm_access_t)\ + snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_ACCESS))) +#define params_format(p) ((__force snd_pcm_format_t)\ + snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_FORMAT))) +#define params_subformat(p) \ + snd_mask_min(hw_param_mask_c((p), SNDRV_PCM_HW_PARAM_SUBFORMAT)) +static inline unsigned int +params_period_bytes(const struct snd_pcm_hw_params *p) +{ + return (params_period_size(p) * + snd_pcm_format_physical_width(params_format(p)) * + params_channels(p)) / 8; +} + +#endif /* __SOUND_PCM_PARAMS_H */ diff --git a/include/sound/soc-dapm.h b/include/sound/soc-dapm.h index e09505c5a490..e0583b7769cb 100644 --- a/include/sound/soc-dapm.h +++ b/include/sound/soc-dapm.h @@ -266,6 +266,12 @@ .get = snd_soc_dapm_get_enum_virt, \ .put = snd_soc_dapm_put_enum_virt, \ .private_value = (unsigned long)&xenum } +#define SOC_DAPM_ENUM_EXT(xname, xenum, xget, xput) \ +{ .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ + .info = snd_soc_info_enum_double, \ + .get = xget, \ + .put = xput, \ + .private_value = (unsigned long)&xenum } #define SOC_DAPM_VALUE_ENUM(xname, xenum) \ { .iface = SNDRV_CTL_ELEM_IFACE_MIXER, .name = xname, \ .info = snd_soc_info_enum_double, \ diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 984458035d4a..1d2b6ceea95d 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -27,9 +27,11 @@ */ #include <linux/cgroup.h> +#include <linux/cred.h> #include <linux/ctype.h> #include <linux/errno.h> #include <linux/fs.h> +#include <linux/init_task.h> #include <linux/kernel.h> #include <linux/list.h> #include <linux/mm.h> @@ -1514,6 +1516,7 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, struct cgroup *root_cgrp = &root->top_cgroup; struct inode *inode; struct cgroupfs_root *existing_root; + const struct cred *cred; int i; BUG_ON(sb->s_root != NULL); @@ -1593,7 +1596,9 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type, BUG_ON(!list_empty(&root_cgrp->children)); BUG_ON(root->number_of_cgroups != 1); + cred = override_creds(&init_cred); cgroup_populate_dir(root_cgrp); + revert_creds(cred); mutex_unlock(&cgroup_mutex); mutex_unlock(&inode->i_mutex); } else { diff --git a/kernel/compat.c b/kernel/compat.c index 18197ae2d465..616c78197cca 100644 --- a/kernel/compat.c +++ b/kernel/compat.c @@ -992,11 +992,8 @@ asmlinkage long compat_sys_rt_sigsuspend(compat_sigset_t __user *unewset, compat sigset_from_compat(&newset, &newset32); sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - spin_lock_irq(¤t->sighand->siglock); current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&newset); current->state = TASK_INTERRUPTIBLE; schedule(); diff --git a/kernel/signal.c b/kernel/signal.c index d7f70aed1cc0..291c9700be75 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -3102,15 +3102,11 @@ SYSCALL_DEFINE0(sgetmask) SYSCALL_DEFINE1(ssetmask, int, newmask) { - int old; - - spin_lock_irq(¤t->sighand->siglock); - old = current->blocked.sig[0]; + int old = current->blocked.sig[0]; + sigset_t newset; - siginitset(¤t->blocked, newmask & ~(sigmask(SIGKILL)| - sigmask(SIGSTOP))); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + siginitset(&newset, newmask & ~(sigmask(SIGKILL) | sigmask(SIGSTOP))); + set_current_blocked(&newset); return old; } @@ -3167,11 +3163,8 @@ SYSCALL_DEFINE2(rt_sigsuspend, sigset_t __user *, unewset, size_t, sigsetsize) return -EFAULT; sigdelsetmask(&newset, sigmask(SIGKILL)|sigmask(SIGSTOP)); - spin_lock_irq(¤t->sighand->siglock); current->saved_sigmask = current->blocked; - current->blocked = newset; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); + set_current_blocked(&newset); current->state = TASK_INTERRUPTIBLE; schedule(); diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c index 934e221c1d07..9d40a071d038 100644 --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -695,7 +695,7 @@ void vlan_setup(struct net_device *dev) ether_setup(dev); dev->priv_flags |= IFF_802_1Q_VLAN; - dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING); dev->tx_queue_len = 0; dev->netdev_ops = &vlan_netdev_ops; diff --git a/net/bluetooth/bnep/netdev.c b/net/bluetooth/bnep/netdev.c index 8c100c9dae28..d4f5dff7c955 100644 --- a/net/bluetooth/bnep/netdev.c +++ b/net/bluetooth/bnep/netdev.c @@ -231,6 +231,7 @@ void bnep_net_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &bnep_netdev_ops; dev->watchdog_timeo = HZ * 2; diff --git a/net/core/dev.c b/net/core/dev.c index 9444c5cb4137..17d67b579beb 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4497,10 +4497,10 @@ void __dev_set_rx_mode(struct net_device *dev) */ if (!netdev_uc_empty(dev) && !dev->uc_promisc) { __dev_set_promiscuity(dev, 1); - dev->uc_promisc = 1; + dev->uc_promisc = true; } else if (netdev_uc_empty(dev) && dev->uc_promisc) { __dev_set_promiscuity(dev, -1); - dev->uc_promisc = 0; + dev->uc_promisc = false; } if (ops->ndo_set_multicast_list) diff --git a/net/core/pktgen.c b/net/core/pktgen.c index f76079cd750c..e35a6fbb8110 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1070,7 +1070,9 @@ static ssize_t pktgen_if_write(struct file *file, len = num_arg(&user_buffer[i], 10, &value); if (len < 0) return len; - + if ((value > 0) && + (!(pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING))) + return -ENOTSUPP; i += len; pkt_dev->clone_skb = value; @@ -3555,7 +3557,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->min_pkt_size = ETH_ZLEN; pkt_dev->max_pkt_size = ETH_ZLEN; pkt_dev->nfrags = 0; - pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->delay = pg_delay_d; pkt_dev->count = pg_count_d; pkt_dev->sofar = 0; @@ -3563,7 +3564,6 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) pkt_dev->udp_src_max = 9; pkt_dev->udp_dst_min = 9; pkt_dev->udp_dst_max = 9; - pkt_dev->vlan_p = 0; pkt_dev->vlan_cfi = 0; pkt_dev->vlan_id = 0xffff; @@ -3575,6 +3575,8 @@ static int pktgen_add_device(struct pktgen_thread *t, const char *ifname) err = pktgen_setup_dev(pkt_dev, ifname); if (err) goto out1; + if (pkt_dev->odev->priv_flags & IFF_TX_SKB_SHARING) + pkt_dev->clone_skb = pg_clone_skb_d; pkt_dev->entry = proc_create_data(ifname, 0600, pg_proc_dir, &pktgen_if_fops, pkt_dev); diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 5cffb63f481a..27997d35ebd3 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -231,6 +231,7 @@ EXPORT_SYMBOL(eth_header_parse); * eth_header_cache - fill cache entry from neighbour * @neigh: source neighbour * @hh: destination cache entry + * @type: Ethernet type field * Create an Ethernet header template from the neighbour. */ int eth_header_cache(const struct neighbour *neigh, struct hh_cache *hh, __be16 type) @@ -339,6 +340,7 @@ void ether_setup(struct net_device *dev) dev->addr_len = ETH_ALEN; dev->tx_queue_len = 1000; /* Ethernet wants good queues */ dev->flags = IFF_BROADCAST|IFF_MULTICAST; + dev->priv_flags = IFF_TX_SKB_SHARING; memset(dev->broadcast, 0xFF, ETH_ALEN); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 37b3c188d8b3..bc19bd06dd00 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1134,15 +1134,15 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, struct in_device *in_dev) { - struct in_ifaddr *ifa = in_dev->ifa_list; - - if (!ifa) - return; + struct in_ifaddr *ifa; - arp_send(ARPOP_REQUEST, ETH_P_ARP, - ifa->ifa_local, dev, - ifa->ifa_local, NULL, - dev->dev_addr, NULL); + for (ifa = in_dev->ifa_list; ifa; + ifa = ifa->ifa_next) { + arp_send(ARPOP_REQUEST, ETH_P_ARP, + ifa->ifa_local, dev, + ifa->ifa_local, NULL, + dev->dev_addr, NULL); + } } /* Called only under RTNL semaphore */ diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a06c53c14d84..a55500cc0b29 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1481,6 +1481,8 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; + if (ifp->prefix_len == 127) /* RFC 6164 */ + return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) return; diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index a8193f52c13c..d2726a74597d 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -103,7 +103,7 @@ static struct net_device_ops l2tp_eth_netdev_ops = { static void l2tp_eth_dev_setup(struct net_device *dev) { ether_setup(dev); - + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &l2tp_eth_netdev_ops; dev->destructor = free_netdev; } diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cd5fb40d3fd4..556e7e6ddf0a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -698,6 +698,7 @@ static const struct net_device_ops ieee80211_monitorif_ops = { static void ieee80211_if_setup(struct net_device *dev) { ether_setup(dev); + dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->netdev_ops = &ieee80211_dataif_ops; dev->destructor = free_netdev; } diff --git a/net/socket.c b/net/socket.c index 26ed35c7751e..b1cbbcd92558 100644 --- a/net/socket.c +++ b/net/socket.c @@ -580,7 +580,7 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) } EXPORT_SYMBOL(sock_sendmsg); -int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) +static int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg, size_t size) { struct kiocb iocb; struct sock_iocb siocb; diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig index b2198e65d8bb..ffd243d09188 100644 --- a/net/sunrpc/Kconfig +++ b/net/sunrpc/Kconfig @@ -4,6 +4,10 @@ config SUNRPC config SUNRPC_GSS tristate +config SUNRPC_BACKCHANNEL + bool + depends on SUNRPC + config SUNRPC_XPRT_RDMA tristate depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index 9d2fca5ad14a..8209a0411bca 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \ addr.o rpcb_clnt.o timer.o xdr.o \ sunrpc_syms.o cache.o rpc_pipe.o \ svc_xprt.o -sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o +sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o sunrpc-$(CONFIG_PROC_FS) += stats.o sunrpc-$(CONFIG_SYSCTL) += sysctl.o diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index cf06af3b63c6..91eaa26e4c42 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define RPCDBG_FACILITY RPCDBG_TRANS #endif -#if defined(CONFIG_NFS_V4_1) - /* * Helper routines that track the number of preallocation elements * on the transport. @@ -174,7 +172,7 @@ out_free: dprintk("RPC: setup backchannel transport failed\n"); return -1; } -EXPORT_SYMBOL(xprt_setup_backchannel); +EXPORT_SYMBOL_GPL(xprt_setup_backchannel); /* * Destroys the backchannel preallocated structures. @@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) dprintk("RPC: backchannel list empty= %s\n", list_empty(&xprt->bc_pa_list) ? "true" : "false"); } -EXPORT_SYMBOL(xprt_destroy_backchannel); +EXPORT_SYMBOL_GPL(xprt_destroy_backchannel); /* * One or more rpc_rqst structure have been preallocated during the @@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req) spin_unlock_bh(&xprt->bc_pa_lock); } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c index 1dd1a6890007..0b2eb388cbda 100644 --- a/net/sunrpc/bc_svc.c +++ b/net/sunrpc/bc_svc.c @@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * reply over an existing open connection previously established by the client. */ -#if defined(CONFIG_NFS_V4_1) - #include <linux/module.h> #include <linux/sunrpc/xprt.h> @@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req) return ret; } -#endif /* CONFIG_NFS_V4_1 */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c50818f0473b..c5347d29cfb7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -64,9 +64,9 @@ static void call_decode(struct rpc_task *task); static void call_bind(struct rpc_task *task); static void call_bind_status(struct rpc_task *task); static void call_transmit(struct rpc_task *task); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static void call_bc_transmit(struct rpc_task *task); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static void call_status(struct rpc_task *task); static void call_transmit_status(struct rpc_task *task); static void call_refresh(struct rpc_task *task); @@ -715,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags, } EXPORT_SYMBOL_GPL(rpc_call_async); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /** * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run * rpc_execute against it @@ -758,7 +758,7 @@ out: dprintk("RPC: rpc_run_bc_task: task= %p\n", task); return task; } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ void rpc_call_start(struct rpc_task *task) @@ -1361,7 +1361,7 @@ call_transmit_status(struct rpc_task *task) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * 5b. Send the backchannel RPC reply. On error, drop the reply. In * addition, disconnect on connectivity errors. @@ -1425,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task) } rpc_wake_up_queued_task(&req->rq_xprt->pending, task); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * 6. Sort out the RPC call status @@ -1550,8 +1550,7 @@ call_decode(struct rpc_task *task) kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; __be32 *p; - dprintk("RPC: %5u call_decode (status %d)\n", - task->tk_pid, task->tk_status); + dprint_status(task); if (task->tk_flags & RPC_CALL_MAJORSEEN) { if (clnt->cl_chatty) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 4814e246a874..d12ffa545811 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task) /* * Add new request to a priority queue. */ -static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { struct list_head *q; struct rpc_task *t; INIT_LIST_HEAD(&task->u.tk_wait.links); - q = &queue->tasks[task->tk_priority]; - if (unlikely(task->tk_priority > queue->maxpriority)) + q = &queue->tasks[queue_priority]; + if (unlikely(queue_priority > queue->maxpriority)) q = &queue->tasks[queue->maxpriority]; list_for_each_entry(t, q, u.tk_wait.list) { if (t->tk_owner == task->tk_owner) { @@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r * improve overall performance. * Everyone else gets appended to the queue to ensure proper FIFO behavior. */ -static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) +static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, + struct rpc_task *task, + unsigned char queue_priority) { BUG_ON (RPC_IS_QUEUED(task)); if (RPC_IS_PRIORITY(queue)) - __rpc_add_wait_queue_priority(queue, task); + __rpc_add_wait_queue_priority(queue, task, queue_priority); else if (RPC_IS_SWAPPER(task)) list_add(&task->u.tk_wait.list, &queue->tasks[0]); else @@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task) * NB: An RPC task will only receive interrupt-driven events as long * as it's on a wait queue. */ -static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, - rpc_action action) +static void __rpc_sleep_on_priority(struct rpc_wait_queue *q, + struct rpc_task *task, + rpc_action action, + unsigned char queue_priority) { dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", task->tk_pid, rpc_qname(q), jiffies); - __rpc_add_wait_queue(q, task); + __rpc_add_wait_queue(q, task, queue_priority); BUG_ON(task->tk_callback != NULL); task->tk_callback = action; @@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, * Protect the queue operations. */ spin_lock_bh(&q->lock); - __rpc_sleep_on(q, task, action); + __rpc_sleep_on_priority(q, task, action, task->tk_priority); spin_unlock_bh(&q->lock); } EXPORT_SYMBOL_GPL(rpc_sleep_on); +void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task, + rpc_action action, int priority) +{ + /* We shouldn't ever put an inactive task to sleep */ + BUG_ON(!RPC_IS_ACTIVATED(task)); + + /* + * Protect the queue operations. + */ + spin_lock_bh(&q->lock); + __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW); + spin_unlock_bh(&q->lock); +} + /** * __rpc_do_wake_up_task - wake up a single rpc_task * @queue: wait queue diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 2b90292e9505..6a69a1131fb7 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp) } } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Process a backchannel RPC request that arrived over an existing * outbound connection @@ -1300,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req, return 0; } } -EXPORT_SYMBOL(bc_svc_process); -#endif /* CONFIG_NFS_V4_1 */ +EXPORT_SYMBOL_GPL(bc_svc_process); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Return (transport-specific) limit on the rpc payload. diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index f2cb5b881dea..767d494de7a2 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -68,12 +68,12 @@ static void svc_sock_free(struct svc_xprt *); static struct svc_xprt *svc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); static void svc_bc_sock_free(struct svc_xprt *xprt); -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ #ifdef CONFIG_DEBUG_LOCK_ALLOC static struct lock_class_key svc_key[2]; @@ -1243,7 +1243,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, struct net *, struct sockaddr *, int, int); @@ -1284,7 +1284,7 @@ static void svc_cleanup_bc_xprt_sock(void) { svc_unreg_xprt_class(&svc_tcp_bc_class); } -#else /* CONFIG_NFS_V4_1 */ +#else /* CONFIG_SUNRPC_BACKCHANNEL */ static void svc_init_bc_xprt_sock(void) { } @@ -1292,7 +1292,7 @@ static void svc_init_bc_xprt_sock(void) static void svc_cleanup_bc_xprt_sock(void) { } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ static struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, @@ -1623,7 +1623,7 @@ static void svc_sock_free(struct svc_xprt *xprt) kfree(svsk); } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Create a back channel svc_xprt which shares the fore channel socket. */ @@ -1662,4 +1662,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt) if (xprt) kfree(container_of(xprt, struct svc_sock, sk_xprt)); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index f008c14ad34c..277ebd4bf095 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len) kaddr[buf->page_base + len] = '\0'; kunmap_atomic(kaddr, KM_USER0); } -EXPORT_SYMBOL(xdr_terminate_string); +EXPORT_SYMBOL_GPL(xdr_terminate_string); void xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index ce5eb68a9664..9b6a4d1ea8f8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -62,6 +62,7 @@ /* * Local functions */ +static void xprt_init(struct rpc_xprt *xprt, struct net *net); static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); static void xprt_connect_status(struct rpc_task *task); static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); @@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport); * transport connects from colliding with writes. No congestion control * is provided. */ -int xprt_reserve_xprt(struct rpc_task *task) +int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - struct rpc_xprt *xprt = req->rq_xprt; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) @@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task) goto out_sleep; } xprt->snd_task = task; - req->rq_bytes_sent = 0; - req->rq_ntrans++; + if (req != NULL) { + req->rq_bytes_sent = 0; + req->rq_ntrans++; + } return 1; @@ -212,10 +215,13 @@ out_sleep: task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt); @@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt) * integrated into the decision of whether a request is allowed to be * woken up and given access to the transport. */ -int xprt_reserve_xprt_cong(struct rpc_task *task) +int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpc_rqst *req = task->tk_rqstp; + int priority; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { if (task == xprt->snd_task) return 1; goto out_sleep; } + if (req == NULL) { + xprt->snd_task = task; + return 1; + } if (__xprt_get_cong(xprt, task)) { xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return 1; } xprt_clear_locked(xprt); @@ -262,10 +270,13 @@ out_sleep: dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); task->tk_timeout = 0; task->tk_status = -EAGAIN; - if (req && req->rq_ntrans) - rpc_sleep_on(&xprt->resend, task, NULL); + if (req == NULL) + priority = RPC_PRIORITY_LOW; + else if (!req->rq_ntrans) + priority = RPC_PRIORITY_NORMAL; else - rpc_sleep_on(&xprt->sending, task, NULL); + priority = RPC_PRIORITY_HIGH; + rpc_sleep_on_priority(&xprt->sending, task, NULL, priority); return 0; } EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); @@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task) int retval; spin_lock_bh(&xprt->transport_lock); - retval = xprt->ops->reserve_xprt(task); + retval = xprt->ops->reserve_xprt(xprt, task); spin_unlock_bh(&xprt->transport_lock); return retval; } @@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt) if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (!task) - goto out_unlock; - } + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; req = task->tk_rqstp; xprt->snd_task = task; @@ -310,24 +318,25 @@ out_unlock: static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) { struct rpc_task *task; + struct rpc_rqst *req; if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) return; if (RPCXPRT_CONGESTED(xprt)) goto out_unlock; - task = rpc_wake_up_next(&xprt->resend); - if (!task) { - task = rpc_wake_up_next(&xprt->sending); - if (!task) - goto out_unlock; + task = rpc_wake_up_next(&xprt->sending); + if (task == NULL) + goto out_unlock; + + req = task->tk_rqstp; + if (req == NULL) { + xprt->snd_task = task; + return; } if (__xprt_get_cong(xprt, task)) { - struct rpc_rqst *req = task->tk_rqstp; xprt->snd_task = task; - if (req) { - req->rq_bytes_sent = 0; - req->rq_ntrans++; - } + req->rq_bytes_sent = 0; + req->rq_ntrans++; return; } out_unlock: @@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task) err = req->rq_reply_bytes_recvd; goto out_unlock; } - if (!xprt->ops->reserve_xprt(task)) + if (!xprt->ops->reserve_xprt(xprt, task)) err = -EAGAIN; out_unlock: spin_unlock_bh(&xprt->transport_lock); @@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); } +static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags) +{ + struct rpc_rqst *req = ERR_PTR(-EAGAIN); + + if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs)) + goto out; + req = kzalloc(sizeof(struct rpc_rqst), gfp_flags); + if (req != NULL) + goto out; + atomic_dec(&xprt->num_reqs); + req = ERR_PTR(-ENOMEM); +out: + return req; +} + +static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) +{ + if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) { + kfree(req); + return true; + } + return false; +} + static void xprt_alloc_slot(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; + struct rpc_rqst *req; - task->tk_status = 0; - if (task->tk_rqstp) - return; if (!list_empty(&xprt->free)) { - struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); - list_del_init(&req->rq_list); - task->tk_rqstp = req; - xprt_request_init(task, xprt); - return; + req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + goto out_init_req; + } + req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT); + if (!IS_ERR(req)) + goto out_init_req; + switch (PTR_ERR(req)) { + case -ENOMEM: + rpc_delay(task, HZ >> 2); + dprintk("RPC: dynamic allocation of request slot " + "failed! Retrying\n"); + break; + case -EAGAIN: + rpc_sleep_on(&xprt->backlog, task, NULL); + dprintk("RPC: waiting for request slot\n"); } - dprintk("RPC: waiting for request slot\n"); task->tk_status = -EAGAIN; - task->tk_timeout = 0; - rpc_sleep_on(&xprt->backlog, task, NULL); + return; +out_init_req: + task->tk_status = 0; + task->tk_rqstp = req; + xprt_request_init(task, xprt); } static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) { + if (xprt_dynamic_free_slot(xprt, req)) + return; + memset(req, 0, sizeof(*req)); /* mark unused */ spin_lock(&xprt->reserve_lock); @@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) spin_unlock(&xprt->reserve_lock); } -struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) +static void xprt_free_all_slots(struct rpc_xprt *xprt) +{ + struct rpc_rqst *req; + while (!list_empty(&xprt->free)) { + req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list); + list_del(&req->rq_list); + kfree(req); + } +} + +struct rpc_xprt *xprt_alloc(struct net *net, size_t size, + unsigned int num_prealloc, + unsigned int max_alloc) { struct rpc_xprt *xprt; + struct rpc_rqst *req; + int i; xprt = kzalloc(size, GFP_KERNEL); if (xprt == NULL) goto out; - atomic_set(&xprt->count, 1); - xprt->max_reqs = max_req; - xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); - if (xprt->slot == NULL) + xprt_init(xprt, net); + + for (i = 0; i < num_prealloc; i++) { + req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL); + if (!req) + break; + list_add(&req->rq_list, &xprt->free); + } + if (i < num_prealloc) goto out_free; + if (max_alloc > num_prealloc) + xprt->max_reqs = max_alloc; + else + xprt->max_reqs = num_prealloc; + xprt->min_reqs = num_prealloc; + atomic_set(&xprt->num_reqs, num_prealloc); - xprt->xprt_net = get_net(net); return xprt; out_free: - kfree(xprt); + xprt_free(xprt); out: return NULL; } @@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc); void xprt_free(struct rpc_xprt *xprt) { put_net(xprt->xprt_net); - kfree(xprt->slot); + xprt_free_all_slots(xprt); kfree(xprt); } EXPORT_SYMBOL_GPL(xprt_free); @@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task) { struct rpc_xprt *xprt = task->tk_xprt; - task->tk_status = -EIO; + task->tk_status = 0; + if (task->tk_rqstp != NULL) + return; + + /* Note: grabbing the xprt_lock_write() here is not strictly needed, + * but ensures that we throttle new slot allocation if the transport + * is congested (e.g. if reconnecting or if we're out of socket + * write buffer space). + */ + task->tk_timeout = 0; + task->tk_status = -EAGAIN; + if (!xprt_lock_write(xprt, task)) + return; + spin_lock(&xprt->reserve_lock); xprt_alloc_slot(task); spin_unlock(&xprt->reserve_lock); + xprt_release_write(xprt, task); } static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) @@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) { struct rpc_rqst *req = task->tk_rqstp; + INIT_LIST_HEAD(&req->rq_list); req->rq_timeout = task->tk_client->cl_timeout->to_initval; req->rq_task = task; req->rq_xprt = xprt; @@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task) xprt_free_bc_request(req); } +static void xprt_init(struct rpc_xprt *xprt, struct net *net) +{ + atomic_set(&xprt->count, 1); + + spin_lock_init(&xprt->transport_lock); + spin_lock_init(&xprt->reserve_lock); + + INIT_LIST_HEAD(&xprt->free); + INIT_LIST_HEAD(&xprt->recv); +#if defined(CONFIG_SUNRPC_BACKCHANNEL) + spin_lock_init(&xprt->bc_pa_lock); + INIT_LIST_HEAD(&xprt->bc_pa_list); +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ + + xprt->last_used = jiffies; + xprt->cwnd = RPC_INITCWND; + xprt->bind_index = 0; + + rpc_init_wait_queue(&xprt->binding, "xprt_binding"); + rpc_init_wait_queue(&xprt->pending, "xprt_pending"); + rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending"); + rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); + + xprt_init_xid(xprt); + + xprt->xprt_net = get_net(net); +} + /** * xprt_create_transport - create an RPC transport * @args: rpc transport creation arguments @@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task) struct rpc_xprt *xprt_create_transport(struct xprt_create *args) { struct rpc_xprt *xprt; - struct rpc_rqst *req; struct xprt_class *t; spin_lock(&xprt_list_lock); @@ -1100,46 +1213,17 @@ found: if (IS_ERR(xprt)) { dprintk("RPC: xprt_create_transport: failed, %ld\n", -PTR_ERR(xprt)); - return xprt; + goto out; } - if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state)) - /* ->setup returned a pre-initialized xprt: */ - return xprt; - - spin_lock_init(&xprt->transport_lock); - spin_lock_init(&xprt->reserve_lock); - - INIT_LIST_HEAD(&xprt->free); - INIT_LIST_HEAD(&xprt->recv); -#if defined(CONFIG_NFS_V4_1) - spin_lock_init(&xprt->bc_pa_lock); - INIT_LIST_HEAD(&xprt->bc_pa_list); -#endif /* CONFIG_NFS_V4_1 */ - INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) setup_timer(&xprt->timer, xprt_init_autodisconnect, (unsigned long)xprt); else init_timer(&xprt->timer); - xprt->last_used = jiffies; - xprt->cwnd = RPC_INITCWND; - xprt->bind_index = 0; - - rpc_init_wait_queue(&xprt->binding, "xprt_binding"); - rpc_init_wait_queue(&xprt->pending, "xprt_pending"); - rpc_init_wait_queue(&xprt->sending, "xprt_sending"); - rpc_init_wait_queue(&xprt->resend, "xprt_resend"); - rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog"); - - /* initialize free list */ - for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--) - list_add(&req->rq_list, &xprt->free); - - xprt_init_xid(xprt); - dprintk("RPC: created transport %p with %u slots\n", xprt, xprt->max_reqs); +out: return xprt; } @@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt) rpc_destroy_wait_queue(&xprt->binding); rpc_destroy_wait_queue(&xprt->pending); rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->resend); rpc_destroy_wait_queue(&xprt->backlog); cancel_work_sync(&xprt->task_cleanup); /* diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 0867070bb5ca..b446e100286f 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args) } xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), + xprt_rdma_slot_table_entries, xprt_rdma_slot_table_entries); if (xprt == NULL) { dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", @@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task) } static int -xprt_rdma_reserve_xprt(struct rpc_task *task) +xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task) { - struct rpc_xprt *xprt = task->tk_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); int credits = atomic_read(&r_xprt->rx_buf.rb_credits); @@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task) BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); } xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; - return xprt_reserve_xprt_cong(task); + return xprt_reserve_xprt_cong(xprt, task); } /* diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index ddf05288d9f1..08c5d5a128fc 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -109,7 +109,7 @@ struct rpcrdma_ep { */ /* temporary static scatter/gather max */ -#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ +#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */ #define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ #define MAX_RPCRDMAHDR (\ /* max supported RPC/RDMA header */ \ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 72abb7358933..d7f97ef26590 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -37,7 +37,7 @@ #include <linux/sunrpc/svcsock.h> #include <linux/sunrpc/xprtsock.h> #include <linux/file.h> -#ifdef CONFIG_NFS_V4_1 +#ifdef CONFIG_SUNRPC_BACKCHANNEL #include <linux/sunrpc/bc_xprt.h> #endif @@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt); * xprtsock tunables */ unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; -unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; +unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE; +unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE; unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; @@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO; static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; +static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT; static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; @@ -104,6 +106,15 @@ static ctl_table xs_tunables_table[] = { .extra2 = &max_slot_table_size }, { + .procname = "tcp_max_slot_table_entries", + .data = &xprt_max_tcp_slot_table_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_slot_table_size, + .extra2 = &max_tcp_slot_table_limit + }, + { .procname = "min_resvport", .data = &xprt_min_resvport, .maxlen = sizeof(unsigned int), @@ -755,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task) if (task == NULL) goto out_release; req = task->tk_rqstp; + if (req == NULL) + goto out_release; if (req->rq_bytes_sent == 0) goto out_release; if (req->rq_bytes_sent == req->rq_snd_buf.len) @@ -1236,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, return 0; } -#if defined(CONFIG_NFS_V4_1) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* * Obtains an rpc_rqst previously allocated and invokes the common * tcp read code to read the data. The result is placed in the callback @@ -1299,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt, { return xs_tcp_read_reply(xprt, desc); } -#endif /* CONFIG_NFS_V4_1 */ +#endif /* CONFIG_SUNRPC_BACKCHANNEL */ /* * Read data off the transport. This can be either an RPC_CALL or an @@ -2489,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap) } static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, - unsigned int slot_table_size) + unsigned int slot_table_size, + unsigned int max_slot_table_size) { struct rpc_xprt *xprt; struct sock_xprt *new; @@ -2499,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, return ERR_PTR(-EBADF); } - xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); + xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size, + max_slot_table_size); if (xprt == NULL) { dprintk("RPC: xs_setup_xprt: couldn't allocate " "rpc_xprt\n"); @@ -2541,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args) struct rpc_xprt *xprt; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2605,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries, + xprt_udp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2681,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) struct sock_xprt *transport; struct rpc_xprt *ret; - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_max_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2760,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args) */ return args->bc_xprt->xpt_bc_xprt; } - xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); + xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries, + xprt_tcp_slot_table_entries); if (IS_ERR(xprt)) return xprt; transport = container_of(xprt, struct sock_xprt, xprt); @@ -2947,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = { #define param_check_slot_table_size(name, p) \ __param_check(name, p, unsigned int); +static int param_set_max_slot_table_size(const char *val, + const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, + RPC_MIN_SLOT_TABLE, + RPC_MAX_SLOT_TABLE_LIMIT); +} + +static struct kernel_param_ops param_ops_max_slot_table_size = { + .set = param_set_max_slot_table_size, + .get = param_get_uint, +}; + +#define param_check_max_slot_table_size(name, p) \ + __param_check(name, p, unsigned int); + module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, slot_table_size, 0644); +module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries, + max_slot_table_size, 0644); module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, slot_table_size, 0644); diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 1ad0f39fe091..02751dbc5a97 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -903,7 +903,7 @@ static bool ignore_reg_update(struct wiphy *wiphy, initiator != NL80211_REGDOM_SET_BY_COUNTRY_IE && !is_world_regdom(last_request->alpha2)) { REG_DBG_PRINT("Ignoring regulatory request %s " - "since the driver requires its own regulaotry " + "since the driver requires its own regulatory " "domain to be set first", reg_initiator_name(initiator)); return true; @@ -1125,12 +1125,13 @@ void wiphy_update_regulatory(struct wiphy *wiphy, enum ieee80211_band band; if (ignore_reg_update(wiphy, initiator)) - goto out; + return; + for (band = 0; band < IEEE80211_NUM_BANDS; band++) { if (wiphy->bands[band]) handle_band(wiphy, band, initiator); } -out: + reg_process_beacons(wiphy); reg_process_ht_flags(wiphy); if (wiphy->reg_notifier) diff --git a/security/apparmor/domain.c b/security/apparmor/domain.c index 7312bf9f7afc..c1e18ba5bdc0 100644 --- a/security/apparmor/domain.c +++ b/security/apparmor/domain.c @@ -73,7 +73,6 @@ static int may_change_ptraced_domain(struct task_struct *task, cred = get_task_cred(tracer); tracerp = aa_cred_profile(cred); } - rcu_read_unlock(); /* not ptraced */ if (!tracer || unconfined(tracerp)) @@ -82,6 +81,7 @@ static int may_change_ptraced_domain(struct task_struct *task, error = aa_may_ptrace(tracer, tracerp, to_profile, PTRACE_MODE_ATTACH); out: + rcu_read_unlock(); if (cred) put_cred(cred); diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index 3d2fd141dff7..37832026e58a 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -127,7 +127,7 @@ static int apparmor_capget(struct task_struct *target, kernel_cap_t *effective, *inheritable = cred->cap_inheritable; *permitted = cred->cap_permitted; - if (!unconfined(profile)) { + if (!unconfined(profile) && !COMPLAIN_MODE(profile)) { *effective = cap_intersect(*effective, profile->caps.allow); *permitted = cap_intersect(*permitted, profile->caps.allow); } diff --git a/security/keys/Makefile b/security/keys/Makefile index 1bf090a885fe..b34cc6ee6900 100644 --- a/security/keys/Makefile +++ b/security/keys/Makefile @@ -14,7 +14,7 @@ obj-y := \ user_defined.o obj-$(CONFIG_TRUSTED_KEYS) += trusted.o -obj-$(CONFIG_ENCRYPTED_KEYS) += encrypted.o +obj-$(CONFIG_ENCRYPTED_KEYS) += ecryptfs_format.o encrypted.o obj-$(CONFIG_KEYS_COMPAT) += compat.o obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_SYSCTL) += sysctl.o diff --git a/security/keys/ecryptfs_format.c b/security/keys/ecryptfs_format.c new file mode 100644 index 000000000000..6daa3b6ff9ed --- /dev/null +++ b/security/keys/ecryptfs_format.c @@ -0,0 +1,81 @@ +/* + * ecryptfs_format.c: helper functions for the encrypted key type + * + * Copyright (C) 2006 International Business Machines Corp. + * Copyright (C) 2010 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Authors: + * Michael A. Halcrow <mahalcro@us.ibm.com> + * Tyler Hicks <tyhicks@ou.edu> + * Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + */ + +#include <linux/module.h> +#include "ecryptfs_format.h" + +u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok) +{ + return auth_tok->token.password.session_key_encryption_key; +} +EXPORT_SYMBOL(ecryptfs_get_auth_tok_key); + +/* + * ecryptfs_get_versions() + * + * Source code taken from the software 'ecryptfs-utils' version 83. + * + */ +void ecryptfs_get_versions(int *major, int *minor, int *file_version) +{ + *major = ECRYPTFS_VERSION_MAJOR; + *minor = ECRYPTFS_VERSION_MINOR; + if (file_version) + *file_version = ECRYPTFS_SUPPORTED_FILE_VERSION; +} +EXPORT_SYMBOL(ecryptfs_get_versions); + +/* + * ecryptfs_fill_auth_tok - fill the ecryptfs_auth_tok structure + * + * Fill the ecryptfs_auth_tok structure with required ecryptfs data. + * The source code is inspired to the original function generate_payload() + * shipped with the software 'ecryptfs-utils' version 83. + * + */ +int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok, + const char *key_desc) +{ + int major, minor; + + ecryptfs_get_versions(&major, &minor, NULL); + auth_tok->version = (((uint16_t)(major << 8) & 0xFF00) + | ((uint16_t)minor & 0x00FF)); + auth_tok->token_type = ECRYPTFS_PASSWORD; + strncpy((char *)auth_tok->token.password.signature, key_desc, + ECRYPTFS_PASSWORD_SIG_SIZE); + auth_tok->token.password.session_key_encryption_key_bytes = + ECRYPTFS_MAX_KEY_BYTES; + /* + * Removed auth_tok->token.password.salt and + * auth_tok->token.password.session_key_encryption_key + * initialization from the original code + */ + /* TODO: Make the hash parameterizable via policy */ + auth_tok->token.password.flags |= + ECRYPTFS_SESSION_KEY_ENCRYPTION_KEY_SET; + /* The kernel code will encrypt the session key. */ + auth_tok->session_key.encrypted_key[0] = 0; + auth_tok->session_key.encrypted_key_size = 0; + /* Default; subject to change by kernel eCryptfs */ + auth_tok->token.password.hash_algo = PGP_DIGEST_ALGO_SHA512; + auth_tok->token.password.flags &= ~(ECRYPTFS_PERSISTENT_PASSWORD); + return 0; +} +EXPORT_SYMBOL(ecryptfs_fill_auth_tok); + +MODULE_LICENSE("GPL"); diff --git a/security/keys/ecryptfs_format.h b/security/keys/ecryptfs_format.h new file mode 100644 index 000000000000..40294de238bb --- /dev/null +++ b/security/keys/ecryptfs_format.h @@ -0,0 +1,30 @@ +/* + * ecryptfs_format.h: helper functions for the encrypted key type + * + * Copyright (C) 2006 International Business Machines Corp. + * Copyright (C) 2010 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it + * + * Authors: + * Michael A. Halcrow <mahalcro@us.ibm.com> + * Tyler Hicks <tyhicks@ou.edu> + * Roberto Sassu <roberto.sassu@polito.it> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, version 2 of the License. + */ + +#ifndef __KEYS_ECRYPTFS_H +#define __KEYS_ECRYPTFS_H + +#include <linux/ecryptfs.h> + +#define PGP_DIGEST_ALGO_SHA512 10 + +u8 *ecryptfs_get_auth_tok_key(struct ecryptfs_auth_tok *auth_tok); +void ecryptfs_get_versions(int *major, int *minor, int *file_version); +int ecryptfs_fill_auth_tok(struct ecryptfs_auth_tok *auth_tok, + const char *key_desc); + +#endif /* __KEYS_ECRYPTFS_H */ diff --git a/security/keys/encrypted.c b/security/keys/encrypted.c index b1cba5bf0a5e..e7eca9ec4c65 100644 --- a/security/keys/encrypted.c +++ b/security/keys/encrypted.c @@ -1,8 +1,11 @@ /* * Copyright (C) 2010 IBM Corporation + * Copyright (C) 2010 Politecnico di Torino, Italy + * TORSEC group -- http://security.polito.it * - * Author: + * Authors: * Mimi Zohar <zohar@us.ibm.com> + * Roberto Sassu <roberto.sassu@polito.it> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -26,22 +29,27 @@ #include <linux/rcupdate.h> #include <linux/scatterlist.h> #include <linux/crypto.h> +#include <linux/ctype.h> #include <crypto/hash.h> #include <crypto/sha.h> #include <crypto/aes.h> #include "encrypted.h" +#include "ecryptfs_format.h" static const char KEY_TRUSTED_PREFIX[] = "trusted:"; static const char KEY_USER_PREFIX[] = "user:"; static const char hash_alg[] = "sha256"; static const char hmac_alg[] = "hmac(sha256)"; static const char blkcipher_alg[] = "cbc(aes)"; +static const char key_format_default[] = "default"; +static const char key_format_ecryptfs[] = "ecryptfs"; static unsigned int ivsize; static int blksize; #define KEY_TRUSTED_PREFIX_LEN (sizeof (KEY_TRUSTED_PREFIX) - 1) #define KEY_USER_PREFIX_LEN (sizeof (KEY_USER_PREFIX) - 1) +#define KEY_ECRYPTFS_DESC_LEN 16 #define HASH_SIZE SHA256_DIGEST_SIZE #define MAX_DATA_SIZE 4096 #define MIN_DATA_SIZE 20 @@ -58,6 +66,16 @@ enum { Opt_err = -1, Opt_new, Opt_load, Opt_update }; +enum { + Opt_error = -1, Opt_default, Opt_ecryptfs +}; + +static const match_table_t key_format_tokens = { + {Opt_default, "default"}, + {Opt_ecryptfs, "ecryptfs"}, + {Opt_error, NULL} +}; + static const match_table_t key_tokens = { {Opt_new, "new"}, {Opt_load, "load"}, @@ -82,9 +100,37 @@ static int aes_get_sizes(void) } /* + * valid_ecryptfs_desc - verify the description of a new/loaded encrypted key + * + * The description of a encrypted key with format 'ecryptfs' must contain + * exactly 16 hexadecimal characters. + * + */ +static int valid_ecryptfs_desc(const char *ecryptfs_desc) +{ + int i; + + if (strlen(ecryptfs_desc) != KEY_ECRYPTFS_DESC_LEN) { + pr_err("encrypted_key: key description must be %d hexadecimal " + "characters long\n", KEY_ECRYPTFS_DESC_LEN); + return -EINVAL; + } + + for (i = 0; i < KEY_ECRYPTFS_DESC_LEN; i++) { + if (!isxdigit(ecryptfs_desc[i])) { + pr_err("encrypted_key: key description must contain " + "only hexadecimal characters\n"); + return -EINVAL; + } + } + + return 0; +} + +/* * valid_master_desc - verify the 'key-type:desc' of a new/updated master-key * - * key-type:= "trusted:" | "encrypted:" + * key-type:= "trusted:" | "user:" * desc:= master-key description * * Verify that 'key-type' is valid and that 'desc' exists. On key update, @@ -118,8 +164,9 @@ out: * datablob_parse - parse the keyctl data * * datablob format: - * new <master-key name> <decrypted data length> - * load <master-key name> <decrypted data length> <encrypted iv + data> + * new [<format>] <master-key name> <decrypted data length> + * load [<format>] <master-key name> <decrypted data length> + * <encrypted iv + data> * update <new-master-key name> * * Tokenizes a copy of the keyctl data, returning a pointer to each token, @@ -127,52 +174,95 @@ out: * * On success returns 0, otherwise -EINVAL. */ -static int datablob_parse(char *datablob, char **master_desc, - char **decrypted_datalen, char **hex_encoded_iv) +static int datablob_parse(char *datablob, const char **format, + char **master_desc, char **decrypted_datalen, + char **hex_encoded_iv) { substring_t args[MAX_OPT_ARGS]; int ret = -EINVAL; int key_cmd; - char *p; + int key_format; + char *p, *keyword; + + keyword = strsep(&datablob, " \t"); + if (!keyword) { + pr_info("encrypted_key: insufficient parameters specified\n"); + return ret; + } + key_cmd = match_token(keyword, key_tokens, args); + /* Get optional format: default | ecryptfs */ p = strsep(&datablob, " \t"); - if (!p) + if (!p) { + pr_err("encrypted_key: insufficient parameters specified\n"); return ret; - key_cmd = match_token(p, key_tokens, args); + } - *master_desc = strsep(&datablob, " \t"); - if (!*master_desc) + key_format = match_token(p, key_format_tokens, args); + switch (key_format) { + case Opt_ecryptfs: + case Opt_default: + *format = p; + *master_desc = strsep(&datablob, " \t"); + break; + case Opt_error: + *master_desc = p; + break; + } + + if (!*master_desc) { + pr_info("encrypted_key: master key parameter is missing\n"); goto out; + } - if (valid_master_desc(*master_desc, NULL) < 0) + if (valid_master_desc(*master_desc, NULL) < 0) { + pr_info("encrypted_key: master key parameter \'%s\' " + "is invalid\n", *master_desc); goto out; + } if (decrypted_datalen) { *decrypted_datalen = strsep(&datablob, " \t"); - if (!*decrypted_datalen) + if (!*decrypted_datalen) { + pr_info("encrypted_key: keylen parameter is missing\n"); goto out; + } } switch (key_cmd) { case Opt_new: - if (!decrypted_datalen) + if (!decrypted_datalen) { + pr_info("encrypted_key: keyword \'%s\' not allowed " + "when called from .update method\n", keyword); break; + } ret = 0; break; case Opt_load: - if (!decrypted_datalen) + if (!decrypted_datalen) { + pr_info("encrypted_key: keyword \'%s\' not allowed " + "when called from .update method\n", keyword); break; + } *hex_encoded_iv = strsep(&datablob, " \t"); - if (!*hex_encoded_iv) + if (!*hex_encoded_iv) { + pr_info("encrypted_key: hex blob is missing\n"); break; + } ret = 0; break; case Opt_update: - if (decrypted_datalen) + if (decrypted_datalen) { + pr_info("encrypted_key: keyword \'%s\' not allowed " + "when called from .instantiate method\n", + keyword); break; + } ret = 0; break; case Opt_err: + pr_info("encrypted_key: keyword \'%s\' not recognized\n", + keyword); break; } out: @@ -197,8 +287,8 @@ static char *datablob_format(struct encrypted_key_payload *epayload, ascii_buf[asciiblob_len] = '\0'; /* copy datablob master_desc and datalen strings */ - len = sprintf(ascii_buf, "%s %s ", epayload->master_desc, - epayload->datalen); + len = sprintf(ascii_buf, "%s %s %s ", epayload->format, + epayload->master_desc, epayload->datalen); /* convert the hex encoded iv, encrypted-data and HMAC to ascii */ bufp = &ascii_buf[len]; @@ -378,11 +468,13 @@ static struct key *request_master_key(struct encrypted_key_payload *epayload, } else goto out; - if (IS_ERR(mkey)) + if (IS_ERR(mkey)) { pr_info("encrypted_key: key %s not found", epayload->master_desc); - if (mkey) - dump_master_key(*master_key, *master_keylen); + goto out; + } + + dump_master_key(*master_key, *master_keylen); out: return mkey; } @@ -439,9 +531,9 @@ static int datablob_hmac_append(struct encrypted_key_payload *epayload, if (ret < 0) goto out; - digest = epayload->master_desc + epayload->datablob_len; + digest = epayload->format + epayload->datablob_len; ret = calc_hmac(digest, derived_key, sizeof derived_key, - epayload->master_desc, epayload->datablob_len); + epayload->format, epayload->datablob_len); if (!ret) dump_hmac(NULL, digest, HASH_SIZE); out: @@ -450,26 +542,35 @@ out: /* verify HMAC before decrypting encrypted key */ static int datablob_hmac_verify(struct encrypted_key_payload *epayload, - const u8 *master_key, size_t master_keylen) + const u8 *format, const u8 *master_key, + size_t master_keylen) { u8 derived_key[HASH_SIZE]; u8 digest[HASH_SIZE]; int ret; + char *p; + unsigned short len; ret = get_derived_key(derived_key, AUTH_KEY, master_key, master_keylen); if (ret < 0) goto out; - ret = calc_hmac(digest, derived_key, sizeof derived_key, - epayload->master_desc, epayload->datablob_len); + len = epayload->datablob_len; + if (!format) { + p = epayload->master_desc; + len -= strlen(epayload->format) + 1; + } else + p = epayload->format; + + ret = calc_hmac(digest, derived_key, sizeof derived_key, p, len); if (ret < 0) goto out; - ret = memcmp(digest, epayload->master_desc + epayload->datablob_len, + ret = memcmp(digest, epayload->format + epayload->datablob_len, sizeof digest); if (ret) { ret = -EINVAL; dump_hmac("datablob", - epayload->master_desc + epayload->datablob_len, + epayload->format + epayload->datablob_len, HASH_SIZE); dump_hmac("calc", digest, HASH_SIZE); } @@ -514,13 +615,16 @@ out: /* Allocate memory for decrypted key and datablob. */ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, + const char *format, const char *master_desc, const char *datalen) { struct encrypted_key_payload *epayload = NULL; unsigned short datablob_len; unsigned short decrypted_datalen; + unsigned short payload_datalen; unsigned int encrypted_datalen; + unsigned int format_len; long dlen; int ret; @@ -528,29 +632,43 @@ static struct encrypted_key_payload *encrypted_key_alloc(struct key *key, if (ret < 0 || dlen < MIN_DATA_SIZE || dlen > MAX_DATA_SIZE) return ERR_PTR(-EINVAL); + format_len = (!format) ? strlen(key_format_default) : strlen(format); decrypted_datalen = dlen; + payload_datalen = decrypted_datalen; + if (format && !strcmp(format, key_format_ecryptfs)) { + if (dlen != ECRYPTFS_MAX_KEY_BYTES) { + pr_err("encrypted_key: keylen for the ecryptfs format " + "must be equal to %d bytes\n", + ECRYPTFS_MAX_KEY_BYTES); + return ERR_PTR(-EINVAL); + } + decrypted_datalen = ECRYPTFS_MAX_KEY_BYTES; + payload_datalen = sizeof(struct ecryptfs_auth_tok); + } + encrypted_datalen = roundup(decrypted_datalen, blksize); - datablob_len = strlen(master_desc) + 1 + strlen(datalen) + 1 - + ivsize + 1 + encrypted_datalen; + datablob_len = format_len + 1 + strlen(master_desc) + 1 + + strlen(datalen) + 1 + ivsize + 1 + encrypted_datalen; - ret = key_payload_reserve(key, decrypted_datalen + datablob_len + ret = key_payload_reserve(key, payload_datalen + datablob_len + HASH_SIZE + 1); if (ret < 0) return ERR_PTR(ret); - epayload = kzalloc(sizeof(*epayload) + decrypted_datalen + + epayload = kzalloc(sizeof(*epayload) + payload_datalen + datablob_len + HASH_SIZE + 1, GFP_KERNEL); if (!epayload) return ERR_PTR(-ENOMEM); + epayload->payload_datalen = payload_datalen; epayload->decrypted_datalen = decrypted_datalen; epayload->datablob_len = datablob_len; return epayload; } static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, - const char *hex_encoded_iv) + const char *format, const char *hex_encoded_iv) { struct key *mkey; u8 derived_key[HASH_SIZE]; @@ -571,14 +689,14 @@ static int encrypted_key_decrypt(struct encrypted_key_payload *epayload, hex2bin(epayload->iv, hex_encoded_iv, ivsize); hex2bin(epayload->encrypted_data, hex_encoded_data, encrypted_datalen); - hmac = epayload->master_desc + epayload->datablob_len; + hmac = epayload->format + epayload->datablob_len; hex2bin(hmac, hex_encoded_data + (encrypted_datalen * 2), HASH_SIZE); mkey = request_master_key(epayload, &master_key, &master_keylen); if (IS_ERR(mkey)) return PTR_ERR(mkey); - ret = datablob_hmac_verify(epayload, master_key, master_keylen); + ret = datablob_hmac_verify(epayload, format, master_key, master_keylen); if (ret < 0) { pr_err("encrypted_key: bad hmac (%d)\n", ret); goto out; @@ -598,13 +716,28 @@ out: } static void __ekey_init(struct encrypted_key_payload *epayload, - const char *master_desc, const char *datalen) + const char *format, const char *master_desc, + const char *datalen) { - epayload->master_desc = epayload->decrypted_data - + epayload->decrypted_datalen; + unsigned int format_len; + + format_len = (!format) ? strlen(key_format_default) : strlen(format); + epayload->format = epayload->payload_data + epayload->payload_datalen; + epayload->master_desc = epayload->format + format_len + 1; epayload->datalen = epayload->master_desc + strlen(master_desc) + 1; epayload->iv = epayload->datalen + strlen(datalen) + 1; epayload->encrypted_data = epayload->iv + ivsize + 1; + epayload->decrypted_data = epayload->payload_data; + + if (!format) + memcpy(epayload->format, key_format_default, format_len); + else { + if (!strcmp(format, key_format_ecryptfs)) + epayload->decrypted_data = + ecryptfs_get_auth_tok_key((struct ecryptfs_auth_tok *)epayload->payload_data); + + memcpy(epayload->format, format, format_len); + } memcpy(epayload->master_desc, master_desc, strlen(master_desc)); memcpy(epayload->datalen, datalen, strlen(datalen)); @@ -617,19 +750,29 @@ static void __ekey_init(struct encrypted_key_payload *epayload, * itself. For an old key, decrypt the hex encoded data. */ static int encrypted_init(struct encrypted_key_payload *epayload, + const char *key_desc, const char *format, const char *master_desc, const char *datalen, const char *hex_encoded_iv) { int ret = 0; - __ekey_init(epayload, master_desc, datalen); + if (format && !strcmp(format, key_format_ecryptfs)) { + ret = valid_ecryptfs_desc(key_desc); + if (ret < 0) + return ret; + + ecryptfs_fill_auth_tok((struct ecryptfs_auth_tok *)epayload->payload_data, + key_desc); + } + + __ekey_init(epayload, format, master_desc, datalen); if (!hex_encoded_iv) { get_random_bytes(epayload->iv, ivsize); get_random_bytes(epayload->decrypted_data, epayload->decrypted_datalen); } else - ret = encrypted_key_decrypt(epayload, hex_encoded_iv); + ret = encrypted_key_decrypt(epayload, format, hex_encoded_iv); return ret; } @@ -646,6 +789,7 @@ static int encrypted_instantiate(struct key *key, const void *data, { struct encrypted_key_payload *epayload = NULL; char *datablob = NULL; + const char *format = NULL; char *master_desc = NULL; char *decrypted_datalen = NULL; char *hex_encoded_iv = NULL; @@ -659,18 +803,19 @@ static int encrypted_instantiate(struct key *key, const void *data, return -ENOMEM; datablob[datalen] = 0; memcpy(datablob, data, datalen); - ret = datablob_parse(datablob, &master_desc, &decrypted_datalen, - &hex_encoded_iv); + ret = datablob_parse(datablob, &format, &master_desc, + &decrypted_datalen, &hex_encoded_iv); if (ret < 0) goto out; - epayload = encrypted_key_alloc(key, master_desc, decrypted_datalen); + epayload = encrypted_key_alloc(key, format, master_desc, + decrypted_datalen); if (IS_ERR(epayload)) { ret = PTR_ERR(epayload); goto out; } - ret = encrypted_init(epayload, master_desc, decrypted_datalen, - hex_encoded_iv); + ret = encrypted_init(epayload, key->description, format, master_desc, + decrypted_datalen, hex_encoded_iv); if (ret < 0) { kfree(epayload); goto out; @@ -706,6 +851,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) struct encrypted_key_payload *new_epayload; char *buf; char *new_master_desc = NULL; + const char *format = NULL; int ret = 0; if (datalen <= 0 || datalen > 32767 || !data) @@ -717,7 +863,7 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) buf[datalen] = 0; memcpy(buf, data, datalen); - ret = datablob_parse(buf, &new_master_desc, NULL, NULL); + ret = datablob_parse(buf, &format, &new_master_desc, NULL, NULL); if (ret < 0) goto out; @@ -725,18 +871,19 @@ static int encrypted_update(struct key *key, const void *data, size_t datalen) if (ret < 0) goto out; - new_epayload = encrypted_key_alloc(key, new_master_desc, - epayload->datalen); + new_epayload = encrypted_key_alloc(key, epayload->format, + new_master_desc, epayload->datalen); if (IS_ERR(new_epayload)) { ret = PTR_ERR(new_epayload); goto out; } - __ekey_init(new_epayload, new_master_desc, epayload->datalen); + __ekey_init(new_epayload, epayload->format, new_master_desc, + epayload->datalen); memcpy(new_epayload->iv, epayload->iv, ivsize); - memcpy(new_epayload->decrypted_data, epayload->decrypted_data, - epayload->decrypted_datalen); + memcpy(new_epayload->payload_data, epayload->payload_data, + epayload->payload_datalen); rcu_assign_pointer(key->payload.data, new_epayload); call_rcu(&epayload->rcu, encrypted_rcu_free); diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 6cff37529b80..60d4e3f5e4bb 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -251,6 +251,8 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id) if (IS_ERR(authkey_ref)) { authkey = ERR_CAST(authkey_ref); + if (authkey == ERR_PTR(-EAGAIN)) + authkey = ERR_PTR(-ENOKEY); goto error; } diff --git a/security/tomoyo/Kconfig b/security/tomoyo/Kconfig index c8f385793235..7c7f8c16c10f 100644 --- a/security/tomoyo/Kconfig +++ b/security/tomoyo/Kconfig @@ -9,3 +9,64 @@ config SECURITY_TOMOYO Required userspace tools and further information may be found at <http://tomoyo.sourceforge.jp/>. If you are unsure how to answer this question, answer N. + +config SECURITY_TOMOYO_MAX_ACCEPT_ENTRY + int "Default maximal count for learning mode" + default 2048 + range 0 2147483647 + depends on SECURITY_TOMOYO + help + This is the default value for maximal ACL entries + that are automatically appended into policy at "learning mode". + Some programs access thousands of objects, so running + such programs in "learning mode" dulls the system response + and consumes much memory. + This is the safeguard for such programs. + +config SECURITY_TOMOYO_MAX_AUDIT_LOG + int "Default maximal count for audit log" + default 1024 + range 0 2147483647 + depends on SECURITY_TOMOYO + help + This is the default value for maximal entries for + audit logs that the kernel can hold on memory. + You can read the log via /sys/kernel/security/tomoyo/audit. + If you don't need audit logs, you may set this value to 0. + +config SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + bool "Activate without calling userspace policy loader." + default n + depends on SECURITY_TOMOYO + ---help--- + Say Y here if you want to activate access control as soon as built-in + policy was loaded. This option will be useful for systems where + operations which can lead to the hijacking of the boot sequence are + needed before loading the policy. For example, you can activate + immediately after loading the fixed part of policy which will allow + only operations needed for mounting a partition which contains the + variant part of policy and verifying (e.g. running GPG check) and + loading the variant part of policy. Since you can start using + enforcing mode from the beginning, you can reduce the possibility of + hijacking the boot sequence. + +config SECURITY_TOMOYO_POLICY_LOADER + string "Location of userspace policy loader" + default "/sbin/tomoyo-init" + depends on SECURITY_TOMOYO + depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + ---help--- + This is the default pathname of policy loader which is called before + activation. You can override this setting via TOMOYO_loader= kernel + command line option. + +config SECURITY_TOMOYO_ACTIVATION_TRIGGER + string "Trigger for calling userspace policy loader" + default "/sbin/init" + depends on SECURITY_TOMOYO + depends on !SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + ---help--- + This is the default pathname of activation trigger. + You can override this setting via TOMOYO_trigger= kernel command line + option. For example, if you pass init=/bin/systemd option, you may + want to also pass TOMOYO_trigger=/bin/systemd option. diff --git a/security/tomoyo/Makefile b/security/tomoyo/Makefile index 91640e96bd06..95278b71fc21 100644 --- a/security/tomoyo/Makefile +++ b/security/tomoyo/Makefile @@ -1 +1,48 @@ -obj-y = common.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o +obj-y = audit.o common.o condition.o domain.o file.o gc.o group.o load_policy.o memory.o mount.o realpath.o securityfs_if.o tomoyo.o util.o + +$(obj)/policy/profile.conf: + @mkdir -p $(obj)/policy/ + @echo Creating an empty policy/profile.conf + @touch $@ + +$(obj)/policy/exception_policy.conf: + @mkdir -p $(obj)/policy/ + @echo Creating a default policy/exception_policy.conf + @echo initialize_domain /sbin/modprobe from any >> $@ + @echo initialize_domain /sbin/hotplug from any >> $@ + +$(obj)/policy/domain_policy.conf: + @mkdir -p $(obj)/policy/ + @echo Creating an empty policy/domain_policy.conf + @touch $@ + +$(obj)/policy/manager.conf: + @mkdir -p $(obj)/policy/ + @echo Creating an empty policy/manager.conf + @touch $@ + +$(obj)/policy/stat.conf: + @mkdir -p $(obj)/policy/ + @echo Creating an empty policy/stat.conf + @touch $@ + +$(obj)/builtin-policy.h: $(obj)/policy/profile.conf $(obj)/policy/exception_policy.conf $(obj)/policy/domain_policy.conf $(obj)/policy/manager.conf $(obj)/policy/stat.conf + @echo Generating built-in policy for TOMOYO 2.4.x. + @echo "static char tomoyo_builtin_profile[] __initdata =" > $@.tmp + @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/profile.conf >> $@.tmp + @echo "\"\";" >> $@.tmp + @echo "static char tomoyo_builtin_exception_policy[] __initdata =" >> $@.tmp + @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/exception_policy.conf >> $@.tmp + @echo "\"\";" >> $@.tmp + @echo "static char tomoyo_builtin_domain_policy[] __initdata =" >> $@.tmp + @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/domain_policy.conf >> $@.tmp + @echo "\"\";" >> $@.tmp + @echo "static char tomoyo_builtin_manager[] __initdata =" >> $@.tmp + @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/manager.conf >> $@.tmp + @echo "\"\";" >> $@.tmp + @echo "static char tomoyo_builtin_stat[] __initdata =" >> $@.tmp + @sed -e 's/\\/\\\\/g' -e 's/\"/\\"/g' -e 's/\(.*\)/"\1\\n"/' < $(obj)/policy/stat.conf >> $@.tmp + @echo "\"\";" >> $@.tmp + @mv $@.tmp $@ + +$(obj)/common.o: $(obj)/builtin-policy.h diff --git a/security/tomoyo/audit.c b/security/tomoyo/audit.c new file mode 100644 index 000000000000..5dbb1f7617c0 --- /dev/null +++ b/security/tomoyo/audit.c @@ -0,0 +1,456 @@ +/* + * security/tomoyo/audit.c + * + * Copyright (C) 2005-2011 NTT DATA CORPORATION + */ + +#include "common.h" +#include <linux/slab.h> + +/** + * tomoyo_print_bprm - Print "struct linux_binprm" for auditing. + * + * @bprm: Pointer to "struct linux_binprm". + * @dump: Pointer to "struct tomoyo_page_dump". + * + * Returns the contents of @bprm on success, NULL otherwise. + * + * This function uses kzalloc(), so caller must kfree() if this function + * didn't return NULL. + */ +static char *tomoyo_print_bprm(struct linux_binprm *bprm, + struct tomoyo_page_dump *dump) +{ + static const int tomoyo_buffer_len = 4096 * 2; + char *buffer = kzalloc(tomoyo_buffer_len, GFP_NOFS); + char *cp; + char *last_start; + int len; + unsigned long pos = bprm->p; + int offset = pos % PAGE_SIZE; + int argv_count = bprm->argc; + int envp_count = bprm->envc; + bool truncated = false; + if (!buffer) + return NULL; + len = snprintf(buffer, tomoyo_buffer_len - 1, "argv[]={ "); + cp = buffer + len; + if (!argv_count) { + memmove(cp, "} envp[]={ ", 11); + cp += 11; + } + last_start = cp; + while (argv_count || envp_count) { + if (!tomoyo_dump_page(bprm, pos, dump)) + goto out; + pos += PAGE_SIZE - offset; + /* Read. */ + while (offset < PAGE_SIZE) { + const char *kaddr = dump->data; + const unsigned char c = kaddr[offset++]; + if (cp == last_start) + *cp++ = '"'; + if (cp >= buffer + tomoyo_buffer_len - 32) { + /* Reserve some room for "..." string. */ + truncated = true; + } else if (c == '\\') { + *cp++ = '\\'; + *cp++ = '\\'; + } else if (c > ' ' && c < 127) { + *cp++ = c; + } else if (!c) { + *cp++ = '"'; + *cp++ = ' '; + last_start = cp; + } else { + *cp++ = '\\'; + *cp++ = (c >> 6) + '0'; + *cp++ = ((c >> 3) & 7) + '0'; + *cp++ = (c & 7) + '0'; + } + if (c) + continue; + if (argv_count) { + if (--argv_count == 0) { + if (truncated) { + cp = last_start; + memmove(cp, "... ", 4); + cp += 4; + } + memmove(cp, "} envp[]={ ", 11); + cp += 11; + last_start = cp; + truncated = false; + } + } else if (envp_count) { + if (--envp_count == 0) { + if (truncated) { + cp = last_start; + memmove(cp, "... ", 4); + cp += 4; + } + } + } + if (!argv_count && !envp_count) + break; + } + offset = 0; + } + *cp++ = '}'; + *cp = '\0'; + return buffer; +out: + snprintf(buffer, tomoyo_buffer_len - 1, + "argv[]={ ... } envp[]= { ... }"); + return buffer; +} + +/** + * tomoyo_filetype - Get string representation of file type. + * + * @mode: Mode value for stat(). + * + * Returns file type string. + */ +static inline const char *tomoyo_filetype(const mode_t mode) +{ + switch (mode & S_IFMT) { + case S_IFREG: + case 0: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FILE]; + case S_IFDIR: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_DIRECTORY]; + case S_IFLNK: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SYMLINK]; + case S_IFIFO: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_FIFO]; + case S_IFSOCK: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_SOCKET]; + case S_IFBLK: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_BLOCK_DEV]; + case S_IFCHR: + return tomoyo_condition_keyword[TOMOYO_TYPE_IS_CHAR_DEV]; + } + return "unknown"; /* This should not happen. */ +} + +/** + * tomoyo_print_header - Get header line of audit log. + * + * @r: Pointer to "struct tomoyo_request_info". + * + * Returns string representation. + * + * This function uses kmalloc(), so caller must kfree() if this function + * didn't return NULL. + */ +static char *tomoyo_print_header(struct tomoyo_request_info *r) +{ + struct tomoyo_time stamp; + const pid_t gpid = task_pid_nr(current); + struct tomoyo_obj_info *obj = r->obj; + static const int tomoyo_buffer_len = 4096; + char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS); + int pos; + u8 i; + if (!buffer) + return NULL; + { + struct timeval tv; + do_gettimeofday(&tv); + tomoyo_convert_time(tv.tv_sec, &stamp); + } + pos = snprintf(buffer, tomoyo_buffer_len - 1, + "#%04u/%02u/%02u %02u:%02u:%02u# profile=%u mode=%s " + "granted=%s (global-pid=%u) task={ pid=%u ppid=%u " + "uid=%u gid=%u euid=%u egid=%u suid=%u sgid=%u " + "fsuid=%u fsgid=%u }", stamp.year, stamp.month, + stamp.day, stamp.hour, stamp.min, stamp.sec, r->profile, + tomoyo_mode[r->mode], tomoyo_yesno(r->granted), gpid, + tomoyo_sys_getpid(), tomoyo_sys_getppid(), + current_uid(), current_gid(), current_euid(), + current_egid(), current_suid(), current_sgid(), + current_fsuid(), current_fsgid()); + if (!obj) + goto no_obj_info; + if (!obj->validate_done) { + tomoyo_get_attributes(obj); + obj->validate_done = true; + } + for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) { + struct tomoyo_mini_stat *stat; + unsigned int dev; + mode_t mode; + if (!obj->stat_valid[i]) + continue; + stat = &obj->stat[i]; + dev = stat->dev; + mode = stat->mode; + if (i & 1) { + pos += snprintf(buffer + pos, + tomoyo_buffer_len - 1 - pos, + " path%u.parent={ uid=%u gid=%u " + "ino=%lu perm=0%o }", (i >> 1) + 1, + stat->uid, stat->gid, (unsigned long) + stat->ino, stat->mode & S_IALLUGO); + continue; + } + pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos, + " path%u={ uid=%u gid=%u ino=%lu major=%u" + " minor=%u perm=0%o type=%s", (i >> 1) + 1, + stat->uid, stat->gid, (unsigned long) + stat->ino, MAJOR(dev), MINOR(dev), + mode & S_IALLUGO, tomoyo_filetype(mode)); + if (S_ISCHR(mode) || S_ISBLK(mode)) { + dev = stat->rdev; + pos += snprintf(buffer + pos, + tomoyo_buffer_len - 1 - pos, + " dev_major=%u dev_minor=%u", + MAJOR(dev), MINOR(dev)); + } + pos += snprintf(buffer + pos, tomoyo_buffer_len - 1 - pos, + " }"); + } +no_obj_info: + if (pos < tomoyo_buffer_len - 1) + return buffer; + kfree(buffer); + return NULL; +} + +/** + * tomoyo_init_log - Allocate buffer for audit logs. + * + * @r: Pointer to "struct tomoyo_request_info". + * @len: Buffer size needed for @fmt and @args. + * @fmt: The printf()'s format string. + * @args: va_list structure for @fmt. + * + * Returns pointer to allocated memory. + * + * This function uses kzalloc(), so caller must kfree() if this function + * didn't return NULL. + */ +char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt, + va_list args) +{ + char *buf = NULL; + char *bprm_info = NULL; + const char *header = NULL; + char *realpath = NULL; + const char *symlink = NULL; + int pos; + const char *domainname = r->domain->domainname->name; + header = tomoyo_print_header(r); + if (!header) + return NULL; + /* +10 is for '\n' etc. and '\0'. */ + len += strlen(domainname) + strlen(header) + 10; + if (r->ee) { + struct file *file = r->ee->bprm->file; + realpath = tomoyo_realpath_from_path(&file->f_path); + bprm_info = tomoyo_print_bprm(r->ee->bprm, &r->ee->dump); + if (!realpath || !bprm_info) + goto out; + /* +80 is for " exec={ realpath=\"%s\" argc=%d envc=%d %s }" */ + len += strlen(realpath) + 80 + strlen(bprm_info); + } else if (r->obj && r->obj->symlink_target) { + symlink = r->obj->symlink_target->name; + /* +18 is for " symlink.target=\"%s\"" */ + len += 18 + strlen(symlink); + } + len = tomoyo_round2(len); + buf = kzalloc(len, GFP_NOFS); + if (!buf) + goto out; + len--; + pos = snprintf(buf, len, "%s", header); + if (realpath) { + struct linux_binprm *bprm = r->ee->bprm; + pos += snprintf(buf + pos, len - pos, + " exec={ realpath=\"%s\" argc=%d envc=%d %s }", + realpath, bprm->argc, bprm->envc, bprm_info); + } else if (symlink) + pos += snprintf(buf + pos, len - pos, " symlink.target=\"%s\"", + symlink); + pos += snprintf(buf + pos, len - pos, "\n%s\n", domainname); + vsnprintf(buf + pos, len - pos, fmt, args); +out: + kfree(realpath); + kfree(bprm_info); + kfree(header); + return buf; +} + +/* Wait queue for /sys/kernel/security/tomoyo/audit. */ +static DECLARE_WAIT_QUEUE_HEAD(tomoyo_log_wait); + +/* Structure for audit log. */ +struct tomoyo_log { + struct list_head list; + char *log; + int size; +}; + +/* The list for "struct tomoyo_log". */ +static LIST_HEAD(tomoyo_log); + +/* Lock for "struct list_head tomoyo_log". */ +static DEFINE_SPINLOCK(tomoyo_log_lock); + +/* Length of "stuct list_head tomoyo_log". */ +static unsigned int tomoyo_log_count; + +/** + * tomoyo_get_audit - Get audit mode. + * + * @ns: Pointer to "struct tomoyo_policy_namespace". + * @profile: Profile number. + * @index: Index number of functionality. + * @is_granted: True if granted log, false otherwise. + * + * Returns true if this request should be audited, false otherwise. + */ +static bool tomoyo_get_audit(const struct tomoyo_policy_namespace *ns, + const u8 profile, const u8 index, + const bool is_granted) +{ + u8 mode; + const u8 category = tomoyo_index2category[index] + + TOMOYO_MAX_MAC_INDEX; + struct tomoyo_profile *p; + if (!tomoyo_policy_loaded) + return false; + p = tomoyo_profile(ns, profile); + if (tomoyo_log_count >= p->pref[TOMOYO_PREF_MAX_AUDIT_LOG]) + return false; + mode = p->config[index]; + if (mode == TOMOYO_CONFIG_USE_DEFAULT) + mode = p->config[category]; + if (mode == TOMOYO_CONFIG_USE_DEFAULT) + mode = p->default_config; + if (is_granted) + return mode & TOMOYO_CONFIG_WANT_GRANT_LOG; + return mode & TOMOYO_CONFIG_WANT_REJECT_LOG; +} + +/** + * tomoyo_write_log2 - Write an audit log. + * + * @r: Pointer to "struct tomoyo_request_info". + * @len: Buffer size needed for @fmt and @args. + * @fmt: The printf()'s format string. + * @args: va_list structure for @fmt. + * + * Returns nothing. + */ +void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt, + va_list args) +{ + char *buf; + struct tomoyo_log *entry; + bool quota_exceeded = false; + if (!tomoyo_get_audit(r->domain->ns, r->profile, r->type, r->granted)) + goto out; + buf = tomoyo_init_log(r, len, fmt, args); + if (!buf) + goto out; + entry = kzalloc(sizeof(*entry), GFP_NOFS); + if (!entry) { + kfree(buf); + goto out; + } + entry->log = buf; + len = tomoyo_round2(strlen(buf) + 1); + /* + * The entry->size is used for memory quota checks. + * Don't go beyond strlen(entry->log). + */ + entry->size = len + tomoyo_round2(sizeof(*entry)); + spin_lock(&tomoyo_log_lock); + if (tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT] && + tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] + entry->size >= + tomoyo_memory_quota[TOMOYO_MEMORY_AUDIT]) { + quota_exceeded = true; + } else { + tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] += entry->size; + list_add_tail(&entry->list, &tomoyo_log); + tomoyo_log_count++; + } + spin_unlock(&tomoyo_log_lock); + if (quota_exceeded) { + kfree(buf); + kfree(entry); + goto out; + } + wake_up(&tomoyo_log_wait); +out: + return; +} + +/** + * tomoyo_write_log - Write an audit log. + * + * @r: Pointer to "struct tomoyo_request_info". + * @fmt: The printf()'s format string, followed by parameters. + * + * Returns nothing. + */ +void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...) +{ + va_list args; + int len; + va_start(args, fmt); + len = vsnprintf((char *) &len, 1, fmt, args) + 1; + va_end(args); + va_start(args, fmt); + tomoyo_write_log2(r, len, fmt, args); + va_end(args); +} + +/** + * tomoyo_read_log - Read an audit log. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ +void tomoyo_read_log(struct tomoyo_io_buffer *head) +{ + struct tomoyo_log *ptr = NULL; + if (head->r.w_pos) + return; + kfree(head->read_buf); + head->read_buf = NULL; + spin_lock(&tomoyo_log_lock); + if (!list_empty(&tomoyo_log)) { + ptr = list_entry(tomoyo_log.next, typeof(*ptr), list); + list_del(&ptr->list); + tomoyo_log_count--; + tomoyo_memory_used[TOMOYO_MEMORY_AUDIT] -= ptr->size; + } + spin_unlock(&tomoyo_log_lock); + if (ptr) { + head->read_buf = ptr->log; + head->r.w[head->r.w_pos++] = head->read_buf; + kfree(ptr); + } +} + +/** + * tomoyo_poll_log - Wait for an audit log. + * + * @file: Pointer to "struct file". + * @wait: Pointer to "poll_table". + * + * Returns POLLIN | POLLRDNORM when ready to read an audit log. + */ +int tomoyo_poll_log(struct file *file, poll_table *wait) +{ + if (tomoyo_log_count) + return POLLIN | POLLRDNORM; + poll_wait(file, &tomoyo_log_wait, wait); + if (tomoyo_log_count) + return POLLIN | POLLRDNORM; + return 0; +} diff --git a/security/tomoyo/common.c b/security/tomoyo/common.c index a0d09e56874b..c8439cf2a448 100644 --- a/security/tomoyo/common.c +++ b/security/tomoyo/common.c @@ -1,9 +1,7 @@ /* * security/tomoyo/common.c * - * Common functions for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/uaccess.h> @@ -11,54 +9,131 @@ #include <linux/security.h> #include "common.h" -static struct tomoyo_profile tomoyo_default_profile = { - .learning = &tomoyo_default_profile.preference, - .permissive = &tomoyo_default_profile.preference, - .enforcing = &tomoyo_default_profile.preference, - .preference.enforcing_verbose = true, - .preference.learning_max_entry = 2048, - .preference.learning_verbose = false, - .preference.permissive_verbose = true +/* String table for operation mode. */ +const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE] = { + [TOMOYO_CONFIG_DISABLED] = "disabled", + [TOMOYO_CONFIG_LEARNING] = "learning", + [TOMOYO_CONFIG_PERMISSIVE] = "permissive", + [TOMOYO_CONFIG_ENFORCING] = "enforcing" }; -/* Profile version. Currently only 20090903 is defined. */ -static unsigned int tomoyo_profile_version; +/* String table for /sys/kernel/security/tomoyo/profile */ +const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX + + TOMOYO_MAX_MAC_CATEGORY_INDEX] = { + [TOMOYO_MAC_FILE_EXECUTE] = "execute", + [TOMOYO_MAC_FILE_OPEN] = "open", + [TOMOYO_MAC_FILE_CREATE] = "create", + [TOMOYO_MAC_FILE_UNLINK] = "unlink", + [TOMOYO_MAC_FILE_GETATTR] = "getattr", + [TOMOYO_MAC_FILE_MKDIR] = "mkdir", + [TOMOYO_MAC_FILE_RMDIR] = "rmdir", + [TOMOYO_MAC_FILE_MKFIFO] = "mkfifo", + [TOMOYO_MAC_FILE_MKSOCK] = "mksock", + [TOMOYO_MAC_FILE_TRUNCATE] = "truncate", + [TOMOYO_MAC_FILE_SYMLINK] = "symlink", + [TOMOYO_MAC_FILE_MKBLOCK] = "mkblock", + [TOMOYO_MAC_FILE_MKCHAR] = "mkchar", + [TOMOYO_MAC_FILE_LINK] = "link", + [TOMOYO_MAC_FILE_RENAME] = "rename", + [TOMOYO_MAC_FILE_CHMOD] = "chmod", + [TOMOYO_MAC_FILE_CHOWN] = "chown", + [TOMOYO_MAC_FILE_CHGRP] = "chgrp", + [TOMOYO_MAC_FILE_IOCTL] = "ioctl", + [TOMOYO_MAC_FILE_CHROOT] = "chroot", + [TOMOYO_MAC_FILE_MOUNT] = "mount", + [TOMOYO_MAC_FILE_UMOUNT] = "unmount", + [TOMOYO_MAC_FILE_PIVOT_ROOT] = "pivot_root", + [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file", +}; -/* Profile table. Memory is allocated as needed. */ -static struct tomoyo_profile *tomoyo_profile_ptr[TOMOYO_MAX_PROFILES]; +/* String table for conditions. */ +const char * const tomoyo_condition_keyword[TOMOYO_MAX_CONDITION_KEYWORD] = { + [TOMOYO_TASK_UID] = "task.uid", + [TOMOYO_TASK_EUID] = "task.euid", + [TOMOYO_TASK_SUID] = "task.suid", + [TOMOYO_TASK_FSUID] = "task.fsuid", + [TOMOYO_TASK_GID] = "task.gid", + [TOMOYO_TASK_EGID] = "task.egid", + [TOMOYO_TASK_SGID] = "task.sgid", + [TOMOYO_TASK_FSGID] = "task.fsgid", + [TOMOYO_TASK_PID] = "task.pid", + [TOMOYO_TASK_PPID] = "task.ppid", + [TOMOYO_EXEC_ARGC] = "exec.argc", + [TOMOYO_EXEC_ENVC] = "exec.envc", + [TOMOYO_TYPE_IS_SOCKET] = "socket", + [TOMOYO_TYPE_IS_SYMLINK] = "symlink", + [TOMOYO_TYPE_IS_FILE] = "file", + [TOMOYO_TYPE_IS_BLOCK_DEV] = "block", + [TOMOYO_TYPE_IS_DIRECTORY] = "directory", + [TOMOYO_TYPE_IS_CHAR_DEV] = "char", + [TOMOYO_TYPE_IS_FIFO] = "fifo", + [TOMOYO_MODE_SETUID] = "setuid", + [TOMOYO_MODE_SETGID] = "setgid", + [TOMOYO_MODE_STICKY] = "sticky", + [TOMOYO_MODE_OWNER_READ] = "owner_read", + [TOMOYO_MODE_OWNER_WRITE] = "owner_write", + [TOMOYO_MODE_OWNER_EXECUTE] = "owner_execute", + [TOMOYO_MODE_GROUP_READ] = "group_read", + [TOMOYO_MODE_GROUP_WRITE] = "group_write", + [TOMOYO_MODE_GROUP_EXECUTE] = "group_execute", + [TOMOYO_MODE_OTHERS_READ] = "others_read", + [TOMOYO_MODE_OTHERS_WRITE] = "others_write", + [TOMOYO_MODE_OTHERS_EXECUTE] = "others_execute", + [TOMOYO_EXEC_REALPATH] = "exec.realpath", + [TOMOYO_SYMLINK_TARGET] = "symlink.target", + [TOMOYO_PATH1_UID] = "path1.uid", + [TOMOYO_PATH1_GID] = "path1.gid", + [TOMOYO_PATH1_INO] = "path1.ino", + [TOMOYO_PATH1_MAJOR] = "path1.major", + [TOMOYO_PATH1_MINOR] = "path1.minor", + [TOMOYO_PATH1_PERM] = "path1.perm", + [TOMOYO_PATH1_TYPE] = "path1.type", + [TOMOYO_PATH1_DEV_MAJOR] = "path1.dev_major", + [TOMOYO_PATH1_DEV_MINOR] = "path1.dev_minor", + [TOMOYO_PATH2_UID] = "path2.uid", + [TOMOYO_PATH2_GID] = "path2.gid", + [TOMOYO_PATH2_INO] = "path2.ino", + [TOMOYO_PATH2_MAJOR] = "path2.major", + [TOMOYO_PATH2_MINOR] = "path2.minor", + [TOMOYO_PATH2_PERM] = "path2.perm", + [TOMOYO_PATH2_TYPE] = "path2.type", + [TOMOYO_PATH2_DEV_MAJOR] = "path2.dev_major", + [TOMOYO_PATH2_DEV_MINOR] = "path2.dev_minor", + [TOMOYO_PATH1_PARENT_UID] = "path1.parent.uid", + [TOMOYO_PATH1_PARENT_GID] = "path1.parent.gid", + [TOMOYO_PATH1_PARENT_INO] = "path1.parent.ino", + [TOMOYO_PATH1_PARENT_PERM] = "path1.parent.perm", + [TOMOYO_PATH2_PARENT_UID] = "path2.parent.uid", + [TOMOYO_PATH2_PARENT_GID] = "path2.parent.gid", + [TOMOYO_PATH2_PARENT_INO] = "path2.parent.ino", + [TOMOYO_PATH2_PARENT_PERM] = "path2.parent.perm", +}; -/* String table for functionality that takes 4 modes. */ -static const char *tomoyo_mode[4] = { - "disabled", "learning", "permissive", "enforcing" +/* String table for PREFERENCE keyword. */ +static const char * const tomoyo_pref_keywords[TOMOYO_MAX_PREF] = { + [TOMOYO_PREF_MAX_AUDIT_LOG] = "max_audit_log", + [TOMOYO_PREF_MAX_LEARNING_ENTRY] = "max_learning_entry", }; -/* String table for /sys/kernel/security/tomoyo/profile */ -static const char *tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX - + TOMOYO_MAX_MAC_CATEGORY_INDEX] = { - [TOMOYO_MAC_FILE_EXECUTE] = "file::execute", - [TOMOYO_MAC_FILE_OPEN] = "file::open", - [TOMOYO_MAC_FILE_CREATE] = "file::create", - [TOMOYO_MAC_FILE_UNLINK] = "file::unlink", - [TOMOYO_MAC_FILE_MKDIR] = "file::mkdir", - [TOMOYO_MAC_FILE_RMDIR] = "file::rmdir", - [TOMOYO_MAC_FILE_MKFIFO] = "file::mkfifo", - [TOMOYO_MAC_FILE_MKSOCK] = "file::mksock", - [TOMOYO_MAC_FILE_TRUNCATE] = "file::truncate", - [TOMOYO_MAC_FILE_SYMLINK] = "file::symlink", - [TOMOYO_MAC_FILE_REWRITE] = "file::rewrite", - [TOMOYO_MAC_FILE_MKBLOCK] = "file::mkblock", - [TOMOYO_MAC_FILE_MKCHAR] = "file::mkchar", - [TOMOYO_MAC_FILE_LINK] = "file::link", - [TOMOYO_MAC_FILE_RENAME] = "file::rename", - [TOMOYO_MAC_FILE_CHMOD] = "file::chmod", - [TOMOYO_MAC_FILE_CHOWN] = "file::chown", - [TOMOYO_MAC_FILE_CHGRP] = "file::chgrp", - [TOMOYO_MAC_FILE_IOCTL] = "file::ioctl", - [TOMOYO_MAC_FILE_CHROOT] = "file::chroot", - [TOMOYO_MAC_FILE_MOUNT] = "file::mount", - [TOMOYO_MAC_FILE_UMOUNT] = "file::umount", - [TOMOYO_MAC_FILE_PIVOT_ROOT] = "file::pivot_root", - [TOMOYO_MAX_MAC_INDEX + TOMOYO_MAC_CATEGORY_FILE] = "file", +/* String table for path operation. */ +const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = { + [TOMOYO_TYPE_EXECUTE] = "execute", + [TOMOYO_TYPE_READ] = "read", + [TOMOYO_TYPE_WRITE] = "write", + [TOMOYO_TYPE_APPEND] = "append", + [TOMOYO_TYPE_UNLINK] = "unlink", + [TOMOYO_TYPE_GETATTR] = "getattr", + [TOMOYO_TYPE_RMDIR] = "rmdir", + [TOMOYO_TYPE_TRUNCATE] = "truncate", + [TOMOYO_TYPE_SYMLINK] = "symlink", + [TOMOYO_TYPE_CHROOT] = "chroot", + [TOMOYO_TYPE_UMOUNT] = "unmount", +}; + +/* String table for categories. */ +static const char * const tomoyo_category_keywords +[TOMOYO_MAX_MAC_CATEGORY_INDEX] = { + [TOMOYO_MAC_CATEGORY_FILE] = "file", }; /* Permit policy management by non-root user? */ @@ -71,11 +146,20 @@ static bool tomoyo_manage_by_non_root; * * @value: Bool value. */ -static const char *tomoyo_yesno(const unsigned int value) +const char *tomoyo_yesno(const unsigned int value) { return value ? "yes" : "no"; } +/** + * tomoyo_addprintf - strncat()-like-snprintf(). + * + * @buffer: Buffer to write to. Must be '\0'-terminated. + * @len: Size of @buffer. + * @fmt: The printf()'s format string, followed by parameters. + * + * Returns nothing. + */ static void tomoyo_addprintf(char *buffer, int len, const char *fmt, ...) { va_list args; @@ -96,7 +180,7 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head) { while (head->r.w_pos) { const char *w = head->r.w[0]; - int len = strlen(w); + size_t len = strlen(w); if (len) { if (len > head->read_user_buf_avail) len = head->read_user_buf_avail; @@ -111,7 +195,7 @@ static bool tomoyo_flush(struct tomoyo_io_buffer *head) head->r.w[0] = w; if (*w) return false; - /* Add '\0' for query. */ + /* Add '\0' for audit logs and query. */ if (head->poll) { if (!head->read_user_buf_avail || copy_to_user(head->read_user_buf, "", 1)) @@ -155,8 +239,8 @@ static void tomoyo_set_string(struct tomoyo_io_buffer *head, const char *string) void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...) { va_list args; - int len; - int pos = head->r.avail; + size_t len; + size_t pos = head->r.avail; int size = head->readbuf_size - pos; if (size <= 0) return; @@ -171,11 +255,25 @@ void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...) tomoyo_set_string(head, head->read_buf + pos); } +/** + * tomoyo_set_space - Put a space to "struct tomoyo_io_buffer" structure. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ static void tomoyo_set_space(struct tomoyo_io_buffer *head) { tomoyo_set_string(head, " "); } +/** + * tomoyo_set_lf - Put a line feed to "struct tomoyo_io_buffer" structure. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ static bool tomoyo_set_lf(struct tomoyo_io_buffer *head) { tomoyo_set_string(head, "\n"); @@ -183,6 +281,62 @@ static bool tomoyo_set_lf(struct tomoyo_io_buffer *head) } /** + * tomoyo_set_slash - Put a shash to "struct tomoyo_io_buffer" structure. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ +static void tomoyo_set_slash(struct tomoyo_io_buffer *head) +{ + tomoyo_set_string(head, "/"); +} + +/* List of namespaces. */ +LIST_HEAD(tomoyo_namespace_list); +/* True if namespace other than tomoyo_kernel_namespace is defined. */ +static bool tomoyo_namespace_enabled; + +/** + * tomoyo_init_policy_namespace - Initialize namespace. + * + * @ns: Pointer to "struct tomoyo_policy_namespace". + * + * Returns nothing. + */ +void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns) +{ + unsigned int idx; + for (idx = 0; idx < TOMOYO_MAX_ACL_GROUPS; idx++) + INIT_LIST_HEAD(&ns->acl_group[idx]); + for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++) + INIT_LIST_HEAD(&ns->group_list[idx]); + for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++) + INIT_LIST_HEAD(&ns->policy_list[idx]); + ns->profile_version = 20100903; + tomoyo_namespace_enabled = !list_empty(&tomoyo_namespace_list); + list_add_tail_rcu(&ns->namespace_list, &tomoyo_namespace_list); +} + +/** + * tomoyo_print_namespace - Print namespace header. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ +static void tomoyo_print_namespace(struct tomoyo_io_buffer *head) +{ + if (!tomoyo_namespace_enabled) + return; + tomoyo_set_string(head, + container_of(head->r.ns, + struct tomoyo_policy_namespace, + namespace_list)->name); + tomoyo_set_space(head); +} + +/** * tomoyo_print_name_union - Print a tomoyo_name_union. * * @head: Pointer to "struct tomoyo_io_buffer". @@ -192,7 +346,7 @@ static void tomoyo_print_name_union(struct tomoyo_io_buffer *head, const struct tomoyo_name_union *ptr) { tomoyo_set_space(head); - if (ptr->is_group) { + if (ptr->group) { tomoyo_set_string(head, "@"); tomoyo_set_string(head, ptr->group->group_name->name); } else { @@ -201,24 +355,46 @@ static void tomoyo_print_name_union(struct tomoyo_io_buffer *head, } /** - * tomoyo_print_number_union - Print a tomoyo_number_union. + * tomoyo_print_name_union_quoted - Print a tomoyo_name_union with a quote. * - * @head: Pointer to "struct tomoyo_io_buffer". - * @ptr: Pointer to "struct tomoyo_number_union". + * @head: Pointer to "struct tomoyo_io_buffer". + * @ptr: Pointer to "struct tomoyo_name_union". + * + * Returns nothing. */ -static void tomoyo_print_number_union(struct tomoyo_io_buffer *head, - const struct tomoyo_number_union *ptr) +static void tomoyo_print_name_union_quoted(struct tomoyo_io_buffer *head, + const struct tomoyo_name_union *ptr) { - tomoyo_set_space(head); - if (ptr->is_group) { + if (ptr->group) { + tomoyo_set_string(head, "@"); + tomoyo_set_string(head, ptr->group->group_name->name); + } else { + tomoyo_set_string(head, "\""); + tomoyo_set_string(head, ptr->filename->name); + tomoyo_set_string(head, "\""); + } +} + +/** + * tomoyo_print_number_union_nospace - Print a tomoyo_number_union without a space. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * @ptr: Pointer to "struct tomoyo_number_union". + * + * Returns nothing. + */ +static void tomoyo_print_number_union_nospace +(struct tomoyo_io_buffer *head, const struct tomoyo_number_union *ptr) +{ + if (ptr->group) { tomoyo_set_string(head, "@"); tomoyo_set_string(head, ptr->group->group_name->name); } else { int i; unsigned long min = ptr->values[0]; const unsigned long max = ptr->values[1]; - u8 min_type = ptr->min_type; - const u8 max_type = ptr->max_type; + u8 min_type = ptr->value_type[0]; + const u8 max_type = ptr->value_type[1]; char buffer[128]; buffer[0] = '\0'; for (i = 0; i < 2; i++) { @@ -232,8 +408,8 @@ static void tomoyo_print_number_union(struct tomoyo_io_buffer *head, "0%lo", min); break; default: - tomoyo_addprintf(buffer, sizeof(buffer), - "%lu", min); + tomoyo_addprintf(buffer, sizeof(buffer), "%lu", + min); break; } if (min == max && min_type == max_type) @@ -247,35 +423,53 @@ static void tomoyo_print_number_union(struct tomoyo_io_buffer *head, } /** + * tomoyo_print_number_union - Print a tomoyo_number_union. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * @ptr: Pointer to "struct tomoyo_number_union". + * + * Returns nothing. + */ +static void tomoyo_print_number_union(struct tomoyo_io_buffer *head, + const struct tomoyo_number_union *ptr) +{ + tomoyo_set_space(head); + tomoyo_print_number_union_nospace(head, ptr); +} + +/** * tomoyo_assign_profile - Create a new profile. * + * @ns: Pointer to "struct tomoyo_policy_namespace". * @profile: Profile number to create. * * Returns pointer to "struct tomoyo_profile" on success, NULL otherwise. */ -static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile) +static struct tomoyo_profile *tomoyo_assign_profile +(struct tomoyo_policy_namespace *ns, const unsigned int profile) { struct tomoyo_profile *ptr; struct tomoyo_profile *entry; if (profile >= TOMOYO_MAX_PROFILES) return NULL; - ptr = tomoyo_profile_ptr[profile]; + ptr = ns->profile_ptr[profile]; if (ptr) return ptr; entry = kzalloc(sizeof(*entry), GFP_NOFS); if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; - ptr = tomoyo_profile_ptr[profile]; + ptr = ns->profile_ptr[profile]; if (!ptr && tomoyo_memory_ok(entry)) { ptr = entry; - ptr->learning = &tomoyo_default_profile.preference; - ptr->permissive = &tomoyo_default_profile.preference; - ptr->enforcing = &tomoyo_default_profile.preference; - ptr->default_config = TOMOYO_CONFIG_DISABLED; + ptr->default_config = TOMOYO_CONFIG_DISABLED | + TOMOYO_CONFIG_WANT_GRANT_LOG | + TOMOYO_CONFIG_WANT_REJECT_LOG; memset(ptr->config, TOMOYO_CONFIG_USE_DEFAULT, sizeof(ptr->config)); + ptr->pref[TOMOYO_PREF_MAX_AUDIT_LOG] = 1024; + ptr->pref[TOMOYO_PREF_MAX_LEARNING_ENTRY] = 2048; mb(); /* Avoid out-of-order execution. */ - tomoyo_profile_ptr[profile] = ptr; + ns->profile_ptr[profile] = ptr; entry = NULL; } mutex_unlock(&tomoyo_policy_lock); @@ -287,19 +481,29 @@ static struct tomoyo_profile *tomoyo_assign_profile(const unsigned int profile) /** * tomoyo_profile - Find a profile. * + * @ns: Pointer to "struct tomoyo_policy_namespace". * @profile: Profile number to find. * * Returns pointer to "struct tomoyo_profile". */ -struct tomoyo_profile *tomoyo_profile(const u8 profile) +struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns, + const u8 profile) { - struct tomoyo_profile *ptr = tomoyo_profile_ptr[profile]; - if (!tomoyo_policy_loaded) - return &tomoyo_default_profile; - BUG_ON(!ptr); + static struct tomoyo_profile tomoyo_null_profile; + struct tomoyo_profile *ptr = ns->profile_ptr[profile]; + if (!ptr) + ptr = &tomoyo_null_profile; return ptr; } +/** + * tomoyo_find_yesno - Find values for specified keyword. + * + * @string: String to check. + * @find: Name of keyword. + * + * Returns 1 if "@find=yes" was found, 0 if "@find=no" was found, -1 otherwise. + */ static s8 tomoyo_find_yesno(const char *string, const char *find) { const char *cp = strstr(string, find); @@ -313,18 +517,15 @@ static s8 tomoyo_find_yesno(const char *string, const char *find) return -1; } -static void tomoyo_set_bool(bool *b, const char *string, const char *find) -{ - switch (tomoyo_find_yesno(string, find)) { - case 1: - *b = true; - break; - case 0: - *b = false; - break; - } -} - +/** + * tomoyo_set_uint - Set value for specified preference. + * + * @i: Pointer to "unsigned int". + * @string: String to check. + * @find: Name of keyword. + * + * Returns nothing. + */ static void tomoyo_set_uint(unsigned int *i, const char *string, const char *find) { @@ -333,51 +534,16 @@ static void tomoyo_set_uint(unsigned int *i, const char *string, sscanf(cp + strlen(find), "=%u", i); } -static void tomoyo_set_pref(const char *name, const char *value, - const bool use_default, - struct tomoyo_profile *profile) -{ - struct tomoyo_preference **pref; - bool *verbose; - if (!strcmp(name, "enforcing")) { - if (use_default) { - pref = &profile->enforcing; - goto set_default; - } - profile->enforcing = &profile->preference; - verbose = &profile->preference.enforcing_verbose; - goto set_verbose; - } - if (!strcmp(name, "permissive")) { - if (use_default) { - pref = &profile->permissive; - goto set_default; - } - profile->permissive = &profile->preference; - verbose = &profile->preference.permissive_verbose; - goto set_verbose; - } - if (!strcmp(name, "learning")) { - if (use_default) { - pref = &profile->learning; - goto set_default; - } - profile->learning = &profile->preference; - tomoyo_set_uint(&profile->preference.learning_max_entry, value, - "max_entry"); - verbose = &profile->preference.learning_verbose; - goto set_verbose; - } - return; - set_default: - *pref = &tomoyo_default_profile.preference; - return; - set_verbose: - tomoyo_set_bool(verbose, value, "verbose"); -} - +/** + * tomoyo_set_mode - Set mode for specified profile. + * + * @name: Name of functionality. + * @value: Mode for @name. + * @profile: Pointer to "struct tomoyo_profile". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_set_mode(char *name, const char *value, - const bool use_default, struct tomoyo_profile *profile) { u8 i; @@ -389,7 +555,17 @@ static int tomoyo_set_mode(char *name, const char *value, config = 0; for (i = 0; i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX; i++) { - if (strcmp(name, tomoyo_mac_keywords[i])) + int len = 0; + if (i < TOMOYO_MAX_MAC_INDEX) { + const u8 c = tomoyo_index2category[i]; + const char *category = + tomoyo_category_keywords[c]; + len = strlen(category); + if (strncmp(name, category, len) || + name[len++] != ':' || name[len++] != ':') + continue; + } + if (strcmp(name + len, tomoyo_mac_keywords[i])) continue; config = profile->config[i]; break; @@ -399,7 +575,7 @@ static int tomoyo_set_mode(char *name, const char *value, } else { return -EINVAL; } - if (use_default) { + if (strstr(value, "use_default")) { config = TOMOYO_CONFIG_USE_DEFAULT; } else { u8 mode; @@ -410,6 +586,24 @@ static int tomoyo_set_mode(char *name, const char *value, * 'config' from 'TOMOYO_CONFIG_USE_DEAFULT'. */ config = (config & ~7) | mode; + if (config != TOMOYO_CONFIG_USE_DEFAULT) { + switch (tomoyo_find_yesno(value, "grant_log")) { + case 1: + config |= TOMOYO_CONFIG_WANT_GRANT_LOG; + break; + case 0: + config &= ~TOMOYO_CONFIG_WANT_GRANT_LOG; + break; + } + switch (tomoyo_find_yesno(value, "reject_log")) { + case 1: + config |= TOMOYO_CONFIG_WANT_REJECT_LOG; + break; + case 0: + config &= ~TOMOYO_CONFIG_WANT_REJECT_LOG; + break; + } + } } if (i < TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX) profile->config[i] = config; @@ -429,34 +623,22 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head) { char *data = head->write_buf; unsigned int i; - bool use_default = false; char *cp; struct tomoyo_profile *profile; - if (sscanf(data, "PROFILE_VERSION=%u", &tomoyo_profile_version) == 1) + if (sscanf(data, "PROFILE_VERSION=%u", &head->w.ns->profile_version) + == 1) return 0; i = simple_strtoul(data, &cp, 10); - if (data == cp) { - profile = &tomoyo_default_profile; - } else { - if (*cp != '-') - return -EINVAL; - data = cp + 1; - profile = tomoyo_assign_profile(i); - if (!profile) - return -EINVAL; - } + if (*cp != '-') + return -EINVAL; + data = cp + 1; + profile = tomoyo_assign_profile(head->w.ns, i); + if (!profile) + return -EINVAL; cp = strchr(data, '='); if (!cp) return -EINVAL; *cp++ = '\0'; - if (profile != &tomoyo_default_profile) - use_default = strstr(cp, "use_default") != NULL; - if (tomoyo_str_starts(&data, "PREFERENCE::")) { - tomoyo_set_pref(data, cp, use_default, profile); - return 0; - } - if (profile == &tomoyo_default_profile) - return -EINVAL; if (!strcmp(data, "COMMENT")) { static DEFINE_SPINLOCK(lock); const struct tomoyo_path_info *new_comment @@ -471,77 +653,62 @@ static int tomoyo_write_profile(struct tomoyo_io_buffer *head) tomoyo_put_name(old_comment); return 0; } - return tomoyo_set_mode(data, cp, use_default, profile); -} - -static void tomoyo_print_preference(struct tomoyo_io_buffer *head, - const int idx) -{ - struct tomoyo_preference *pref = &tomoyo_default_profile.preference; - const struct tomoyo_profile *profile = idx >= 0 ? - tomoyo_profile_ptr[idx] : NULL; - char buffer[16] = ""; - if (profile) { - buffer[sizeof(buffer) - 1] = '\0'; - snprintf(buffer, sizeof(buffer) - 1, "%u-", idx); - } - if (profile) { - pref = profile->learning; - if (pref == &tomoyo_default_profile.preference) - goto skip1; - } - tomoyo_io_printf(head, "%sPREFERENCE::%s={ " - "verbose=%s max_entry=%u }\n", - buffer, "learning", - tomoyo_yesno(pref->learning_verbose), - pref->learning_max_entry); - skip1: - if (profile) { - pref = profile->permissive; - if (pref == &tomoyo_default_profile.preference) - goto skip2; - } - tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n", - buffer, "permissive", - tomoyo_yesno(pref->permissive_verbose)); - skip2: - if (profile) { - pref = profile->enforcing; - if (pref == &tomoyo_default_profile.preference) - return; + if (!strcmp(data, "PREFERENCE")) { + for (i = 0; i < TOMOYO_MAX_PREF; i++) + tomoyo_set_uint(&profile->pref[i], cp, + tomoyo_pref_keywords[i]); + return 0; } - tomoyo_io_printf(head, "%sPREFERENCE::%s={ verbose=%s }\n", - buffer, "enforcing", - tomoyo_yesno(pref->enforcing_verbose)); + return tomoyo_set_mode(data, cp, profile); } +/** + * tomoyo_print_config - Print mode for specified functionality. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * @config: Mode for that functionality. + * + * Returns nothing. + * + * Caller prints functionality's name. + */ static void tomoyo_print_config(struct tomoyo_io_buffer *head, const u8 config) { - tomoyo_io_printf(head, "={ mode=%s }\n", tomoyo_mode[config & 3]); + tomoyo_io_printf(head, "={ mode=%s grant_log=%s reject_log=%s }\n", + tomoyo_mode[config & 3], + tomoyo_yesno(config & TOMOYO_CONFIG_WANT_GRANT_LOG), + tomoyo_yesno(config & TOMOYO_CONFIG_WANT_REJECT_LOG)); } /** * tomoyo_read_profile - Read profile table. * * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. */ static void tomoyo_read_profile(struct tomoyo_io_buffer *head) { u8 index; + struct tomoyo_policy_namespace *ns = + container_of(head->r.ns, typeof(*ns), namespace_list); const struct tomoyo_profile *profile; + if (head->r.eof) + return; next: index = head->r.index; - profile = tomoyo_profile_ptr[index]; + profile = ns->profile_ptr[index]; switch (head->r.step) { case 0: - tomoyo_io_printf(head, "PROFILE_VERSION=%s\n", "20090903"); - tomoyo_print_preference(head, -1); + tomoyo_print_namespace(head); + tomoyo_io_printf(head, "PROFILE_VERSION=%u\n", + ns->profile_version); head->r.step++; break; case 1: for ( ; head->r.index < TOMOYO_MAX_PROFILES; head->r.index++) - if (tomoyo_profile_ptr[head->r.index]) + if (ns->profile_ptr[head->r.index]) break; if (head->r.index == TOMOYO_MAX_PROFILES) return; @@ -549,16 +716,25 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head) break; case 2: { + u8 i; const struct tomoyo_path_info *comment = profile->comment; + tomoyo_print_namespace(head); tomoyo_io_printf(head, "%u-COMMENT=", index); tomoyo_set_string(head, comment ? comment->name : ""); tomoyo_set_lf(head); + tomoyo_io_printf(head, "%u-PREFERENCE={ ", index); + for (i = 0; i < TOMOYO_MAX_PREF; i++) + tomoyo_io_printf(head, "%s=%u ", + tomoyo_pref_keywords[i], + profile->pref[i]); + tomoyo_set_string(head, "}\n"); head->r.step++; } break; case 3: { + tomoyo_print_namespace(head); tomoyo_io_printf(head, "%u-%s", index, "CONFIG"); tomoyo_print_config(head, profile->default_config); head->r.bit = 0; @@ -572,15 +748,22 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head) const u8 config = profile->config[i]; if (config == TOMOYO_CONFIG_USE_DEFAULT) continue; - tomoyo_io_printf(head, "%u-%s%s", index, "CONFIG::", - tomoyo_mac_keywords[i]); + tomoyo_print_namespace(head); + if (i < TOMOYO_MAX_MAC_INDEX) + tomoyo_io_printf(head, "%u-CONFIG::%s::%s", + index, + tomoyo_category_keywords + [tomoyo_index2category[i]], + tomoyo_mac_keywords[i]); + else + tomoyo_io_printf(head, "%u-CONFIG::%s", index, + tomoyo_mac_keywords[i]); tomoyo_print_config(head, config); head->r.bit++; break; } if (head->r.bit == TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX) { - tomoyo_print_preference(head, index); head->r.index++; head->r.step = 1; } @@ -590,6 +773,14 @@ static void tomoyo_read_profile(struct tomoyo_io_buffer *head) goto next; } +/** + * tomoyo_same_manager - Check for duplicated "struct tomoyo_manager" entry. + * + * @a: Pointer to "struct tomoyo_acl_head". + * @b: Pointer to "struct tomoyo_acl_head". + * + * Returns true if @a == @b, false otherwise. + */ static bool tomoyo_same_manager(const struct tomoyo_acl_head *a, const struct tomoyo_acl_head *b) { @@ -611,8 +802,13 @@ static int tomoyo_update_manager_entry(const char *manager, const bool is_delete) { struct tomoyo_manager e = { }; - int error; - + struct tomoyo_acl_param param = { + /* .ns = &tomoyo_kernel_namespace, */ + .is_delete = is_delete, + .list = &tomoyo_kernel_namespace. + policy_list[TOMOYO_ID_MANAGER], + }; + int error = is_delete ? -ENOENT : -ENOMEM; if (tomoyo_domain_def(manager)) { if (!tomoyo_correct_domain(manager)) return -EINVAL; @@ -622,12 +818,11 @@ static int tomoyo_update_manager_entry(const char *manager, return -EINVAL; } e.manager = tomoyo_get_name(manager); - if (!e.manager) - return -ENOMEM; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list[TOMOYO_ID_MANAGER], - tomoyo_same_manager); - tomoyo_put_name(e.manager); + if (e.manager) { + error = tomoyo_update_policy(&e.head, sizeof(e), ¶m, + tomoyo_same_manager); + tomoyo_put_name(e.manager); + } return error; } @@ -643,13 +838,12 @@ static int tomoyo_update_manager_entry(const char *manager, static int tomoyo_write_manager(struct tomoyo_io_buffer *head) { char *data = head->write_buf; - bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE); if (!strcmp(data, "manage_by_non_root")) { - tomoyo_manage_by_non_root = !is_delete; + tomoyo_manage_by_non_root = !head->w.is_delete; return 0; } - return tomoyo_update_manager_entry(data, is_delete); + return tomoyo_update_manager_entry(data, head->w.is_delete); } /** @@ -663,8 +857,8 @@ static void tomoyo_read_manager(struct tomoyo_io_buffer *head) { if (head->r.eof) return; - list_for_each_cookie(head->r.acl, - &tomoyo_policy_list[TOMOYO_ID_MANAGER]) { + list_for_each_cookie(head->r.acl, &tomoyo_kernel_namespace. + policy_list[TOMOYO_ID_MANAGER]) { struct tomoyo_manager *ptr = list_entry(head->r.acl, typeof(*ptr), head.list); if (ptr->head.is_deleted) @@ -697,8 +891,8 @@ static bool tomoyo_manager(void) return true; if (!tomoyo_manage_by_non_root && (task->cred->uid || task->cred->euid)) return false; - list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER], - head.list) { + list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace. + policy_list[TOMOYO_ID_MANAGER], head.list) { if (!ptr->head.is_deleted && ptr->is_domain && !tomoyo_pathcmp(domainname, ptr->manager)) { found = true; @@ -710,8 +904,8 @@ static bool tomoyo_manager(void) exe = tomoyo_get_exe(); if (!exe) return false; - list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_MANAGER], - head.list) { + list_for_each_entry_rcu(ptr, &tomoyo_kernel_namespace. + policy_list[TOMOYO_ID_MANAGER], head.list) { if (!ptr->head.is_deleted && !ptr->is_domain && !strcmp(exe, ptr->manager->name)) { found = true; @@ -732,7 +926,7 @@ static bool tomoyo_manager(void) } /** - * tomoyo_select_one - Parse select command. + * tomoyo_select_domain - Parse select command. * * @head: Pointer to "struct tomoyo_io_buffer". * @data: String to parse. @@ -741,16 +935,15 @@ static bool tomoyo_manager(void) * * Caller holds tomoyo_read_lock(). */ -static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data) +static bool tomoyo_select_domain(struct tomoyo_io_buffer *head, + const char *data) { unsigned int pid; struct tomoyo_domain_info *domain = NULL; bool global_pid = false; - - if (!strcmp(data, "allow_execute")) { - head->r.print_execute_only = true; - return true; - } + if (strncmp(data, "select ", 7)) + return false; + data += 7; if (sscanf(data, "pid=%u", &pid) == 1 || (global_pid = true, sscanf(data, "global-pid=%u", &pid) == 1)) { struct task_struct *p; @@ -769,7 +962,7 @@ static bool tomoyo_select_one(struct tomoyo_io_buffer *head, const char *data) domain = tomoyo_find_domain(data + 7); } else return false; - head->write_var1 = domain; + head->w.domain = domain; /* Accessing read_buf is safe because head->io_sem is held. */ if (!head->read_buf) return true; /* Do nothing if open(O_WRONLY). */ @@ -821,20 +1014,47 @@ static int tomoyo_delete_domain(char *domainname) /** * tomoyo_write_domain2 - Write domain policy. * - * @head: Pointer to "struct tomoyo_io_buffer". + * @ns: Pointer to "struct tomoyo_policy_namespace". + * @list: Pointer to "struct list_head". + * @data: Policy to be interpreted. + * @is_delete: True if it is a delete request. * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain, +static int tomoyo_write_domain2(struct tomoyo_policy_namespace *ns, + struct list_head *list, char *data, const bool is_delete) { - if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_ALLOW_MOUNT)) - return tomoyo_write_mount(data, domain, is_delete); - return tomoyo_write_file(data, domain, is_delete); + struct tomoyo_acl_param param = { + .ns = ns, + .list = list, + .data = data, + .is_delete = is_delete, + }; + static const struct { + const char *keyword; + int (*write) (struct tomoyo_acl_param *); + } tomoyo_callback[1] = { + { "file ", tomoyo_write_file }, + }; + u8 i; + for (i = 0; i < 1; i++) { + if (!tomoyo_str_starts(¶m.data, + tomoyo_callback[i].keyword)) + continue; + return tomoyo_callback[i].write(¶m); + } + return -EINVAL; } +/* String table for domain flags. */ +const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS] = { + [TOMOYO_DIF_QUOTA_WARNED] = "quota_exceeded\n", + [TOMOYO_DIF_TRANSITION_FAILED] = "transition_failed\n", +}; + /** * tomoyo_write_domain - Write domain policy. * @@ -847,69 +1067,198 @@ static int tomoyo_write_domain2(char *data, struct tomoyo_domain_info *domain, static int tomoyo_write_domain(struct tomoyo_io_buffer *head) { char *data = head->write_buf; - struct tomoyo_domain_info *domain = head->write_var1; - bool is_delete = false; - bool is_select = false; + struct tomoyo_policy_namespace *ns; + struct tomoyo_domain_info *domain = head->w.domain; + const bool is_delete = head->w.is_delete; + bool is_select = !is_delete && tomoyo_str_starts(&data, "select "); unsigned int profile; - - if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE)) - is_delete = true; - else if (tomoyo_str_starts(&data, TOMOYO_KEYWORD_SELECT)) - is_select = true; - if (is_select && tomoyo_select_one(head, data)) - return 0; - /* Don't allow updating policies by non manager programs. */ - if (!tomoyo_manager()) - return -EPERM; - if (tomoyo_domain_def(data)) { + if (*data == '<') { domain = NULL; if (is_delete) tomoyo_delete_domain(data); else if (is_select) domain = tomoyo_find_domain(data); else - domain = tomoyo_assign_domain(data, 0); - head->write_var1 = domain; + domain = tomoyo_assign_domain(data, false); + head->w.domain = domain; return 0; } if (!domain) return -EINVAL; - - if (sscanf(data, TOMOYO_KEYWORD_USE_PROFILE "%u", &profile) == 1 + ns = domain->ns; + if (sscanf(data, "use_profile %u", &profile) == 1 && profile < TOMOYO_MAX_PROFILES) { - if (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded) + if (!tomoyo_policy_loaded || ns->profile_ptr[profile]) domain->profile = (u8) profile; return 0; } - if (!strcmp(data, TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ)) { - domain->ignore_global_allow_read = !is_delete; + if (sscanf(data, "use_group %u\n", &profile) == 1 + && profile < TOMOYO_MAX_ACL_GROUPS) { + if (!is_delete) + domain->group = (u8) profile; return 0; } - if (!strcmp(data, TOMOYO_KEYWORD_QUOTA_EXCEEDED)) { - domain->quota_warned = !is_delete; - return 0; - } - if (!strcmp(data, TOMOYO_KEYWORD_TRANSITION_FAILED)) { - domain->transition_failed = !is_delete; + for (profile = 0; profile < TOMOYO_MAX_DOMAIN_INFO_FLAGS; profile++) { + const char *cp = tomoyo_dif[profile]; + if (strncmp(data, cp, strlen(cp) - 1)) + continue; + domain->flags[profile] = !is_delete; return 0; } - return tomoyo_write_domain2(data, domain, is_delete); + return tomoyo_write_domain2(ns, &domain->acl_info_list, data, + is_delete); } /** - * tomoyo_fns - Find next set bit. + * tomoyo_print_condition - Print condition part. * - * @perm: 8 bits value. - * @bit: First bit to find. + * @head: Pointer to "struct tomoyo_io_buffer". + * @cond: Pointer to "struct tomoyo_condition". * - * Returns next on-bit on success, 8 otherwise. + * Returns true on success, false otherwise. */ -static u8 tomoyo_fns(const u8 perm, u8 bit) +static bool tomoyo_print_condition(struct tomoyo_io_buffer *head, + const struct tomoyo_condition *cond) { - for ( ; bit < 8; bit++) - if (perm & (1 << bit)) + switch (head->r.cond_step) { + case 0: + head->r.cond_index = 0; + head->r.cond_step++; + /* fall through */ + case 1: + { + const u16 condc = cond->condc; + const struct tomoyo_condition_element *condp = + (typeof(condp)) (cond + 1); + const struct tomoyo_number_union *numbers_p = + (typeof(numbers_p)) (condp + condc); + const struct tomoyo_name_union *names_p = + (typeof(names_p)) + (numbers_p + cond->numbers_count); + const struct tomoyo_argv *argv = + (typeof(argv)) (names_p + cond->names_count); + const struct tomoyo_envp *envp = + (typeof(envp)) (argv + cond->argc); + u16 skip; + for (skip = 0; skip < head->r.cond_index; skip++) { + const u8 left = condp->left; + const u8 right = condp->right; + condp++; + switch (left) { + case TOMOYO_ARGV_ENTRY: + argv++; + continue; + case TOMOYO_ENVP_ENTRY: + envp++; + continue; + case TOMOYO_NUMBER_UNION: + numbers_p++; + break; + } + switch (right) { + case TOMOYO_NAME_UNION: + names_p++; + break; + case TOMOYO_NUMBER_UNION: + numbers_p++; + break; + } + } + while (head->r.cond_index < condc) { + const u8 match = condp->equals; + const u8 left = condp->left; + const u8 right = condp->right; + if (!tomoyo_flush(head)) + return false; + condp++; + head->r.cond_index++; + tomoyo_set_space(head); + switch (left) { + case TOMOYO_ARGV_ENTRY: + tomoyo_io_printf(head, + "exec.argv[%lu]%s=\"", + argv->index, argv-> + is_not ? "!" : ""); + tomoyo_set_string(head, + argv->value->name); + tomoyo_set_string(head, "\""); + argv++; + continue; + case TOMOYO_ENVP_ENTRY: + tomoyo_set_string(head, + "exec.envp[\""); + tomoyo_set_string(head, + envp->name->name); + tomoyo_io_printf(head, "\"]%s=", envp-> + is_not ? "!" : ""); + if (envp->value) { + tomoyo_set_string(head, "\""); + tomoyo_set_string(head, envp-> + value->name); + tomoyo_set_string(head, "\""); + } else { + tomoyo_set_string(head, + "NULL"); + } + envp++; + continue; + case TOMOYO_NUMBER_UNION: + tomoyo_print_number_union_nospace + (head, numbers_p++); + break; + default: + tomoyo_set_string(head, + tomoyo_condition_keyword[left]); + break; + } + tomoyo_set_string(head, match ? "=" : "!="); + switch (right) { + case TOMOYO_NAME_UNION: + tomoyo_print_name_union_quoted + (head, names_p++); + break; + case TOMOYO_NUMBER_UNION: + tomoyo_print_number_union_nospace + (head, numbers_p++); + break; + default: + tomoyo_set_string(head, + tomoyo_condition_keyword[right]); + break; + } + } + } + head->r.cond_step++; + /* fall through */ + case 2: + if (!tomoyo_flush(head)) break; - return bit; + head->r.cond_step++; + /* fall through */ + case 3: + tomoyo_set_lf(head); + return true; + } + return false; +} + +/** + * tomoyo_set_group - Print "acl_group " header keyword and category name. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * @category: Category name. + * + * Returns nothing. + */ +static void tomoyo_set_group(struct tomoyo_io_buffer *head, + const char *category) +{ + if (head->type == TOMOYO_EXCEPTIONPOLICY) { + tomoyo_print_namespace(head); + tomoyo_io_printf(head, "acl_group %u ", + head->r.acl_group_index); + } + tomoyo_set_string(head, category); } /** @@ -924,63 +1273,96 @@ static bool tomoyo_print_entry(struct tomoyo_io_buffer *head, struct tomoyo_acl_info *acl) { const u8 acl_type = acl->type; + bool first = true; u8 bit; + if (head->r.print_cond_part) + goto print_cond_part; if (acl->is_deleted) return true; - next: - bit = head->r.bit; if (!tomoyo_flush(head)) return false; else if (acl_type == TOMOYO_TYPE_PATH_ACL) { struct tomoyo_path_acl *ptr = container_of(acl, typeof(*ptr), head); const u16 perm = ptr->perm; - for ( ; bit < TOMOYO_MAX_PATH_OPERATION; bit++) { + for (bit = 0; bit < TOMOYO_MAX_PATH_OPERATION; bit++) { if (!(perm & (1 << bit))) continue; - if (head->r.print_execute_only && + if (head->r.print_transition_related_only && bit != TOMOYO_TYPE_EXECUTE) continue; - /* Print "read/write" instead of "read" and "write". */ - if ((bit == TOMOYO_TYPE_READ || - bit == TOMOYO_TYPE_WRITE) - && (perm & (1 << TOMOYO_TYPE_READ_WRITE))) - continue; - break; + if (first) { + tomoyo_set_group(head, "file "); + first = false; + } else { + tomoyo_set_slash(head); + } + tomoyo_set_string(head, tomoyo_path_keyword[bit]); } - if (bit >= TOMOYO_MAX_PATH_OPERATION) - goto done; - tomoyo_io_printf(head, "allow_%s", tomoyo_path_keyword[bit]); + if (first) + return true; tomoyo_print_name_union(head, &ptr->name); - } else if (head->r.print_execute_only) { + } else if (head->r.print_transition_related_only) { return true; } else if (acl_type == TOMOYO_TYPE_PATH2_ACL) { struct tomoyo_path2_acl *ptr = container_of(acl, typeof(*ptr), head); - bit = tomoyo_fns(ptr->perm, bit); - if (bit >= TOMOYO_MAX_PATH2_OPERATION) - goto done; - tomoyo_io_printf(head, "allow_%s", tomoyo_path2_keyword[bit]); + const u8 perm = ptr->perm; + for (bit = 0; bit < TOMOYO_MAX_PATH2_OPERATION; bit++) { + if (!(perm & (1 << bit))) + continue; + if (first) { + tomoyo_set_group(head, "file "); + first = false; + } else { + tomoyo_set_slash(head); + } + tomoyo_set_string(head, tomoyo_mac_keywords + [tomoyo_pp2mac[bit]]); + } + if (first) + return true; tomoyo_print_name_union(head, &ptr->name1); tomoyo_print_name_union(head, &ptr->name2); } else if (acl_type == TOMOYO_TYPE_PATH_NUMBER_ACL) { struct tomoyo_path_number_acl *ptr = container_of(acl, typeof(*ptr), head); - bit = tomoyo_fns(ptr->perm, bit); - if (bit >= TOMOYO_MAX_PATH_NUMBER_OPERATION) - goto done; - tomoyo_io_printf(head, "allow_%s", - tomoyo_path_number_keyword[bit]); + const u8 perm = ptr->perm; + for (bit = 0; bit < TOMOYO_MAX_PATH_NUMBER_OPERATION; bit++) { + if (!(perm & (1 << bit))) + continue; + if (first) { + tomoyo_set_group(head, "file "); + first = false; + } else { + tomoyo_set_slash(head); + } + tomoyo_set_string(head, tomoyo_mac_keywords + [tomoyo_pn2mac[bit]]); + } + if (first) + return true; tomoyo_print_name_union(head, &ptr->name); tomoyo_print_number_union(head, &ptr->number); } else if (acl_type == TOMOYO_TYPE_MKDEV_ACL) { struct tomoyo_mkdev_acl *ptr = container_of(acl, typeof(*ptr), head); - bit = tomoyo_fns(ptr->perm, bit); - if (bit >= TOMOYO_MAX_MKDEV_OPERATION) - goto done; - tomoyo_io_printf(head, "allow_%s", tomoyo_mkdev_keyword[bit]); + const u8 perm = ptr->perm; + for (bit = 0; bit < TOMOYO_MAX_MKDEV_OPERATION; bit++) { + if (!(perm & (1 << bit))) + continue; + if (first) { + tomoyo_set_group(head, "file "); + first = false; + } else { + tomoyo_set_slash(head); + } + tomoyo_set_string(head, tomoyo_mac_keywords + [tomoyo_pnnn2mac[bit]]); + } + if (first) + return true; tomoyo_print_name_union(head, &ptr->name); tomoyo_print_number_union(head, &ptr->mode); tomoyo_print_number_union(head, &ptr->major); @@ -988,35 +1370,41 @@ static bool tomoyo_print_entry(struct tomoyo_io_buffer *head, } else if (acl_type == TOMOYO_TYPE_MOUNT_ACL) { struct tomoyo_mount_acl *ptr = container_of(acl, typeof(*ptr), head); - tomoyo_io_printf(head, "allow_mount"); + tomoyo_set_group(head, "file mount"); tomoyo_print_name_union(head, &ptr->dev_name); tomoyo_print_name_union(head, &ptr->dir_name); tomoyo_print_name_union(head, &ptr->fs_type); tomoyo_print_number_union(head, &ptr->flags); } - head->r.bit = bit + 1; - tomoyo_io_printf(head, "\n"); - if (acl_type != TOMOYO_TYPE_MOUNT_ACL) - goto next; - done: - head->r.bit = 0; + if (acl->cond) { + head->r.print_cond_part = true; + head->r.cond_step = 0; + if (!tomoyo_flush(head)) + return false; +print_cond_part: + if (!tomoyo_print_condition(head, acl->cond)) + return false; + head->r.print_cond_part = false; + } else { + tomoyo_set_lf(head); + } return true; } /** * tomoyo_read_domain2 - Read domain policy. * - * @head: Pointer to "struct tomoyo_io_buffer". - * @domain: Pointer to "struct tomoyo_domain_info". + * @head: Pointer to "struct tomoyo_io_buffer". + * @list: Pointer to "struct list_head". * * Caller holds tomoyo_read_lock(). * * Returns true on success, false otherwise. */ static bool tomoyo_read_domain2(struct tomoyo_io_buffer *head, - struct tomoyo_domain_info *domain) + struct list_head *list) { - list_for_each_cookie(head->r.acl, &domain->acl_info_list) { + list_for_each_cookie(head->r.acl, list) { struct tomoyo_acl_info *ptr = list_entry(head->r.acl, typeof(*ptr), list); if (!tomoyo_print_entry(head, ptr)) @@ -1041,6 +1429,7 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head) struct tomoyo_domain_info *domain = list_entry(head->r.domain, typeof(*domain), list); switch (head->r.step) { + u8 i; case 0: if (domain->is_deleted && !head->r.print_this_domain_only) @@ -1048,22 +1437,18 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head) /* Print domainname and flags. */ tomoyo_set_string(head, domain->domainname->name); tomoyo_set_lf(head); - tomoyo_io_printf(head, - TOMOYO_KEYWORD_USE_PROFILE "%u\n", + tomoyo_io_printf(head, "use_profile %u\n", domain->profile); - if (domain->quota_warned) - tomoyo_set_string(head, "quota_exceeded\n"); - if (domain->transition_failed) - tomoyo_set_string(head, "transition_failed\n"); - if (domain->ignore_global_allow_read) - tomoyo_set_string(head, - TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ - "\n"); + tomoyo_io_printf(head, "use_group %u\n", + domain->group); + for (i = 0; i < TOMOYO_MAX_DOMAIN_INFO_FLAGS; i++) + if (domain->flags[i]) + tomoyo_set_string(head, tomoyo_dif[i]); head->r.step++; tomoyo_set_lf(head); /* fall through */ case 1: - if (!tomoyo_read_domain2(head, domain)) + if (!tomoyo_read_domain2(head, &domain->acl_info_list)) return; head->r.step++; if (!tomoyo_set_lf(head)) @@ -1080,73 +1465,6 @@ static void tomoyo_read_domain(struct tomoyo_io_buffer *head) } /** - * tomoyo_write_domain_profile - Assign profile for specified domain. - * - * @head: Pointer to "struct tomoyo_io_buffer". - * - * Returns 0 on success, -EINVAL otherwise. - * - * This is equivalent to doing - * - * ( echo "select " $domainname; echo "use_profile " $profile ) | - * /usr/sbin/tomoyo-loadpolicy -d - * - * Caller holds tomoyo_read_lock(). - */ -static int tomoyo_write_domain_profile(struct tomoyo_io_buffer *head) -{ - char *data = head->write_buf; - char *cp = strchr(data, ' '); - struct tomoyo_domain_info *domain; - unsigned long profile; - - if (!cp) - return -EINVAL; - *cp = '\0'; - domain = tomoyo_find_domain(cp + 1); - if (strict_strtoul(data, 10, &profile)) - return -EINVAL; - if (domain && profile < TOMOYO_MAX_PROFILES - && (tomoyo_profile_ptr[profile] || !tomoyo_policy_loaded)) - domain->profile = (u8) profile; - return 0; -} - -/** - * tomoyo_read_domain_profile - Read only domainname and profile. - * - * @head: Pointer to "struct tomoyo_io_buffer". - * - * Returns list of profile number and domainname pairs. - * - * This is equivalent to doing - * - * grep -A 1 '^<kernel>' /sys/kernel/security/tomoyo/domain_policy | - * awk ' { if ( domainname == "" ) { if ( $1 == "<kernel>" ) - * domainname = $0; } else if ( $1 == "use_profile" ) { - * print $2 " " domainname; domainname = ""; } } ; ' - * - * Caller holds tomoyo_read_lock(). - */ -static void tomoyo_read_domain_profile(struct tomoyo_io_buffer *head) -{ - if (head->r.eof) - return; - list_for_each_cookie(head->r.domain, &tomoyo_domain_list) { - struct tomoyo_domain_info *domain = - list_entry(head->r.domain, typeof(*domain), list); - if (domain->is_deleted) - continue; - if (!tomoyo_flush(head)) - return; - tomoyo_io_printf(head, "%u ", domain->profile); - tomoyo_set_string(head, domain->domainname->name); - tomoyo_set_lf(head); - } - head->r.eof = true; -} - -/** * tomoyo_write_pid: Specify PID to obtain domainname. * * @head: Pointer to "struct tomoyo_io_buffer". @@ -1204,18 +1522,20 @@ static void tomoyo_read_pid(struct tomoyo_io_buffer *head) tomoyo_set_string(head, domain->domainname->name); } +/* String table for domain transition control keywords. */ static const char *tomoyo_transition_type[TOMOYO_MAX_TRANSITION_TYPE] = { - [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] - = TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN, - [TOMOYO_TRANSITION_CONTROL_INITIALIZE] - = TOMOYO_KEYWORD_INITIALIZE_DOMAIN, - [TOMOYO_TRANSITION_CONTROL_NO_KEEP] = TOMOYO_KEYWORD_NO_KEEP_DOMAIN, - [TOMOYO_TRANSITION_CONTROL_KEEP] = TOMOYO_KEYWORD_KEEP_DOMAIN + [TOMOYO_TRANSITION_CONTROL_NO_RESET] = "no_reset_domain ", + [TOMOYO_TRANSITION_CONTROL_RESET] = "reset_domain ", + [TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE] = "no_initialize_domain ", + [TOMOYO_TRANSITION_CONTROL_INITIALIZE] = "initialize_domain ", + [TOMOYO_TRANSITION_CONTROL_NO_KEEP] = "no_keep_domain ", + [TOMOYO_TRANSITION_CONTROL_KEEP] = "keep_domain ", }; +/* String table for grouping keywords. */ static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = { - [TOMOYO_PATH_GROUP] = TOMOYO_KEYWORD_PATH_GROUP, - [TOMOYO_NUMBER_GROUP] = TOMOYO_KEYWORD_NUMBER_GROUP + [TOMOYO_PATH_GROUP] = "path_group ", + [TOMOYO_NUMBER_GROUP] = "number_group ", }; /** @@ -1229,29 +1549,30 @@ static const char *tomoyo_group_name[TOMOYO_MAX_GROUP] = { */ static int tomoyo_write_exception(struct tomoyo_io_buffer *head) { - char *data = head->write_buf; - bool is_delete = tomoyo_str_starts(&data, TOMOYO_KEYWORD_DELETE); - u8 i; - static const struct { - const char *keyword; - int (*write) (char *, const bool); - } tomoyo_callback[4] = { - { TOMOYO_KEYWORD_AGGREGATOR, tomoyo_write_aggregator }, - { TOMOYO_KEYWORD_FILE_PATTERN, tomoyo_write_pattern }, - { TOMOYO_KEYWORD_DENY_REWRITE, tomoyo_write_no_rewrite }, - { TOMOYO_KEYWORD_ALLOW_READ, tomoyo_write_globally_readable }, + const bool is_delete = head->w.is_delete; + struct tomoyo_acl_param param = { + .ns = head->w.ns, + .is_delete = is_delete, + .data = head->write_buf, }; - + u8 i; + if (tomoyo_str_starts(¶m.data, "aggregator ")) + return tomoyo_write_aggregator(¶m); for (i = 0; i < TOMOYO_MAX_TRANSITION_TYPE; i++) - if (tomoyo_str_starts(&data, tomoyo_transition_type[i])) - return tomoyo_write_transition_control(data, is_delete, - i); - for (i = 0; i < 4; i++) - if (tomoyo_str_starts(&data, tomoyo_callback[i].keyword)) - return tomoyo_callback[i].write(data, is_delete); + if (tomoyo_str_starts(¶m.data, tomoyo_transition_type[i])) + return tomoyo_write_transition_control(¶m, i); for (i = 0; i < TOMOYO_MAX_GROUP; i++) - if (tomoyo_str_starts(&data, tomoyo_group_name[i])) - return tomoyo_write_group(data, is_delete, i); + if (tomoyo_str_starts(¶m.data, tomoyo_group_name[i])) + return tomoyo_write_group(¶m, i); + if (tomoyo_str_starts(¶m.data, "acl_group ")) { + unsigned int group; + char *data; + group = simple_strtoul(param.data, &data, 10); + if (group < TOMOYO_MAX_ACL_GROUPS && *data++ == ' ') + return tomoyo_write_domain2 + (head->w.ns, &head->w.ns->acl_group[group], + data, is_delete); + } return -EINVAL; } @@ -1267,9 +1588,12 @@ static int tomoyo_write_exception(struct tomoyo_io_buffer *head) */ static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx) { - list_for_each_cookie(head->r.group, &tomoyo_group_list[idx]) { + struct tomoyo_policy_namespace *ns = + container_of(head->r.ns, typeof(*ns), namespace_list); + struct list_head *list = &ns->group_list[idx]; + list_for_each_cookie(head->r.group, list) { struct tomoyo_group *group = - list_entry(head->r.group, typeof(*group), list); + list_entry(head->r.group, typeof(*group), head.list); list_for_each_cookie(head->r.acl, &group->member_list) { struct tomoyo_acl_head *ptr = list_entry(head->r.acl, typeof(*ptr), list); @@ -1277,6 +1601,7 @@ static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx) continue; if (!tomoyo_flush(head)) return false; + tomoyo_print_namespace(head); tomoyo_set_string(head, tomoyo_group_name[idx]); tomoyo_set_string(head, group->group_name->name); if (idx == TOMOYO_PATH_GROUP) { @@ -1310,7 +1635,10 @@ static bool tomoyo_read_group(struct tomoyo_io_buffer *head, const int idx) */ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx) { - list_for_each_cookie(head->r.acl, &tomoyo_policy_list[idx]) { + struct tomoyo_policy_namespace *ns = + container_of(head->r.ns, typeof(*ns), namespace_list); + struct list_head *list = &ns->policy_list[idx]; + list_for_each_cookie(head->r.acl, list) { struct tomoyo_acl_head *acl = container_of(head->r.acl, typeof(*acl), list); if (acl->is_deleted) @@ -1322,35 +1650,23 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx) { struct tomoyo_transition_control *ptr = container_of(acl, typeof(*ptr), head); - tomoyo_set_string(head, - tomoyo_transition_type + tomoyo_print_namespace(head); + tomoyo_set_string(head, tomoyo_transition_type [ptr->type]); - if (ptr->program) - tomoyo_set_string(head, - ptr->program->name); - if (ptr->program && ptr->domainname) - tomoyo_set_string(head, " from "); - if (ptr->domainname) - tomoyo_set_string(head, - ptr->domainname-> - name); - } - break; - case TOMOYO_ID_GLOBALLY_READABLE: - { - struct tomoyo_readable_file *ptr = - container_of(acl, typeof(*ptr), head); - tomoyo_set_string(head, - TOMOYO_KEYWORD_ALLOW_READ); - tomoyo_set_string(head, ptr->filename->name); + tomoyo_set_string(head, ptr->program ? + ptr->program->name : "any"); + tomoyo_set_string(head, " from "); + tomoyo_set_string(head, ptr->domainname ? + ptr->domainname->name : + "any"); } break; case TOMOYO_ID_AGGREGATOR: { struct tomoyo_aggregator *ptr = container_of(acl, typeof(*ptr), head); - tomoyo_set_string(head, - TOMOYO_KEYWORD_AGGREGATOR); + tomoyo_print_namespace(head); + tomoyo_set_string(head, "aggregator "); tomoyo_set_string(head, ptr->original_name->name); tomoyo_set_space(head); @@ -1358,24 +1674,6 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx) ptr->aggregated_name->name); } break; - case TOMOYO_ID_PATTERN: - { - struct tomoyo_no_pattern *ptr = - container_of(acl, typeof(*ptr), head); - tomoyo_set_string(head, - TOMOYO_KEYWORD_FILE_PATTERN); - tomoyo_set_string(head, ptr->pattern->name); - } - break; - case TOMOYO_ID_NO_REWRITE: - { - struct tomoyo_no_rewrite *ptr = - container_of(acl, typeof(*ptr), head); - tomoyo_set_string(head, - TOMOYO_KEYWORD_DENY_REWRITE); - tomoyo_set_string(head, ptr->pattern->name); - } - break; default: continue; } @@ -1394,6 +1692,8 @@ static bool tomoyo_read_policy(struct tomoyo_io_buffer *head, const int idx) */ static void tomoyo_read_exception(struct tomoyo_io_buffer *head) { + struct tomoyo_policy_namespace *ns = + container_of(head->r.ns, typeof(*ns), namespace_list); if (head->r.eof) return; while (head->r.step < TOMOYO_MAX_POLICY && @@ -1406,95 +1706,40 @@ static void tomoyo_read_exception(struct tomoyo_io_buffer *head) head->r.step++; if (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP) return; + while (head->r.step < TOMOYO_MAX_POLICY + TOMOYO_MAX_GROUP + + TOMOYO_MAX_ACL_GROUPS) { + head->r.acl_group_index = head->r.step - TOMOYO_MAX_POLICY + - TOMOYO_MAX_GROUP; + if (!tomoyo_read_domain2(head, &ns->acl_group + [head->r.acl_group_index])) + return; + head->r.step++; + } head->r.eof = true; } -/** - * tomoyo_print_header - Get header line of audit log. - * - * @r: Pointer to "struct tomoyo_request_info". - * - * Returns string representation. - * - * This function uses kmalloc(), so caller must kfree() if this function - * didn't return NULL. - */ -static char *tomoyo_print_header(struct tomoyo_request_info *r) -{ - struct timeval tv; - const pid_t gpid = task_pid_nr(current); - static const int tomoyo_buffer_len = 4096; - char *buffer = kmalloc(tomoyo_buffer_len, GFP_NOFS); - pid_t ppid; - if (!buffer) - return NULL; - do_gettimeofday(&tv); - rcu_read_lock(); - ppid = task_tgid_vnr(current->real_parent); - rcu_read_unlock(); - snprintf(buffer, tomoyo_buffer_len - 1, - "#timestamp=%lu profile=%u mode=%s (global-pid=%u)" - " task={ pid=%u ppid=%u uid=%u gid=%u euid=%u" - " egid=%u suid=%u sgid=%u fsuid=%u fsgid=%u }", - tv.tv_sec, r->profile, tomoyo_mode[r->mode], gpid, - task_tgid_vnr(current), ppid, - current_uid(), current_gid(), current_euid(), - current_egid(), current_suid(), current_sgid(), - current_fsuid(), current_fsgid()); - return buffer; -} - -/** - * tomoyo_init_audit_log - Allocate buffer for audit logs. - * - * @len: Required size. - * @r: Pointer to "struct tomoyo_request_info". - * - * Returns pointer to allocated memory. - * - * The @len is updated to add the header lines' size on success. - * - * This function uses kzalloc(), so caller must kfree() if this function - * didn't return NULL. - */ -static char *tomoyo_init_audit_log(int *len, struct tomoyo_request_info *r) -{ - char *buf = NULL; - const char *header; - const char *domainname; - if (!r->domain) - r->domain = tomoyo_domain(); - domainname = r->domain->domainname->name; - header = tomoyo_print_header(r); - if (!header) - return NULL; - *len += strlen(domainname) + strlen(header) + 10; - buf = kzalloc(*len, GFP_NOFS); - if (buf) - snprintf(buf, (*len) - 1, "%s\n%s\n", header, domainname); - kfree(header); - return buf; -} - -/* Wait queue for tomoyo_query_list. */ +/* Wait queue for kernel -> userspace notification. */ static DECLARE_WAIT_QUEUE_HEAD(tomoyo_query_wait); - -/* Lock for manipulating tomoyo_query_list. */ -static DEFINE_SPINLOCK(tomoyo_query_list_lock); +/* Wait queue for userspace -> kernel notification. */ +static DECLARE_WAIT_QUEUE_HEAD(tomoyo_answer_wait); /* Structure for query. */ struct tomoyo_query { struct list_head list; char *query; - int query_len; + size_t query_len; unsigned int serial; - int timer; - int answer; + u8 timer; + u8 answer; + u8 retry; }; /* The list for "struct tomoyo_query". */ static LIST_HEAD(tomoyo_query_list); +/* Lock for manipulating tomoyo_query_list. */ +static DEFINE_SPINLOCK(tomoyo_query_list_lock); + /* * Number of "struct file" referring /sys/kernel/security/tomoyo/query * interface. @@ -1502,10 +1747,82 @@ static LIST_HEAD(tomoyo_query_list); static atomic_t tomoyo_query_observers = ATOMIC_INIT(0); /** + * tomoyo_truncate - Truncate a line. + * + * @str: String to truncate. + * + * Returns length of truncated @str. + */ +static int tomoyo_truncate(char *str) +{ + char *start = str; + while (*(unsigned char *) str > (unsigned char) ' ') + str++; + *str = '\0'; + return strlen(start) + 1; +} + +/** + * tomoyo_add_entry - Add an ACL to current thread's domain. Used by learning mode. + * + * @domain: Pointer to "struct tomoyo_domain_info". + * @header: Lines containing ACL. + * + * Returns nothing. + */ +static void tomoyo_add_entry(struct tomoyo_domain_info *domain, char *header) +{ + char *buffer; + char *realpath = NULL; + char *argv0 = NULL; + char *symlink = NULL; + char *cp = strchr(header, '\n'); + int len; + if (!cp) + return; + cp = strchr(cp + 1, '\n'); + if (!cp) + return; + *cp++ = '\0'; + len = strlen(cp) + 1; + /* strstr() will return NULL if ordering is wrong. */ + if (*cp == 'f') { + argv0 = strstr(header, " argv[]={ \""); + if (argv0) { + argv0 += 10; + len += tomoyo_truncate(argv0) + 14; + } + realpath = strstr(header, " exec={ realpath=\""); + if (realpath) { + realpath += 8; + len += tomoyo_truncate(realpath) + 6; + } + symlink = strstr(header, " symlink.target=\""); + if (symlink) + len += tomoyo_truncate(symlink + 1) + 1; + } + buffer = kmalloc(len, GFP_NOFS); + if (!buffer) + return; + snprintf(buffer, len - 1, "%s", cp); + if (realpath) + tomoyo_addprintf(buffer, len, " exec.%s", realpath); + if (argv0) + tomoyo_addprintf(buffer, len, " exec.argv[0]=%s", argv0); + if (symlink) + tomoyo_addprintf(buffer, len, "%s", symlink); + tomoyo_normalize_line(buffer); + if (!tomoyo_write_domain2(domain->ns, &domain->acl_info_list, buffer, + false)) + tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES); + kfree(buffer); +} + +/** * tomoyo_supervisor - Ask for the supervisor's decision. * - * @r: Pointer to "struct tomoyo_request_info". - * @fmt: The printf()'s format string, followed by parameters. + * @r: Pointer to "struct tomoyo_request_info". + * @fmt: The printf()'s format string, followed by parameters. * * Returns 0 if the supervisor decided to permit the access request which * violated the policy in enforcing mode, TOMOYO_RETRY_REQUEST if the @@ -1515,88 +1832,79 @@ static atomic_t tomoyo_query_observers = ATOMIC_INIT(0); int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...) { va_list args; - int error = -EPERM; - int pos; + int error; int len; static unsigned int tomoyo_serial; - struct tomoyo_query *entry = NULL; + struct tomoyo_query entry = { }; bool quota_exceeded = false; - char *header; + va_start(args, fmt); + len = vsnprintf((char *) &len, 1, fmt, args) + 1; + va_end(args); + /* Write /sys/kernel/security/tomoyo/audit. */ + va_start(args, fmt); + tomoyo_write_log2(r, len, fmt, args); + va_end(args); + /* Nothing more to do if granted. */ + if (r->granted) + return 0; + if (r->mode) + tomoyo_update_stat(r->mode); switch (r->mode) { - char *buffer; + case TOMOYO_CONFIG_ENFORCING: + error = -EPERM; + if (atomic_read(&tomoyo_query_observers)) + break; + goto out; case TOMOYO_CONFIG_LEARNING: - if (!tomoyo_domain_quota_is_ok(r)) - return 0; - va_start(args, fmt); - len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 4; - va_end(args); - buffer = kmalloc(len, GFP_NOFS); - if (!buffer) - return 0; - va_start(args, fmt); - vsnprintf(buffer, len - 1, fmt, args); - va_end(args); - tomoyo_normalize_line(buffer); - tomoyo_write_domain2(buffer, r->domain, false); - kfree(buffer); + error = 0; + /* Check max_learning_entry parameter. */ + if (tomoyo_domain_quota_is_ok(r)) + break; /* fall through */ - case TOMOYO_CONFIG_PERMISSIVE: + default: return 0; } - if (!r->domain) - r->domain = tomoyo_domain(); - if (!atomic_read(&tomoyo_query_observers)) - return -EPERM; + /* Get message. */ va_start(args, fmt); - len = vsnprintf((char *) &pos, sizeof(pos) - 1, fmt, args) + 32; + entry.query = tomoyo_init_log(r, len, fmt, args); va_end(args); - header = tomoyo_init_audit_log(&len, r); - if (!header) + if (!entry.query) goto out; - entry = kzalloc(sizeof(*entry), GFP_NOFS); - if (!entry) + entry.query_len = strlen(entry.query) + 1; + if (!error) { + tomoyo_add_entry(r->domain, entry.query); goto out; - entry->query = kzalloc(len, GFP_NOFS); - if (!entry->query) - goto out; - len = ksize(entry->query); + } + len = tomoyo_round2(entry.query_len); spin_lock(&tomoyo_query_list_lock); - if (tomoyo_quota_for_query && tomoyo_query_memory_size + len + - sizeof(*entry) >= tomoyo_quota_for_query) { + if (tomoyo_memory_quota[TOMOYO_MEMORY_QUERY] && + tomoyo_memory_used[TOMOYO_MEMORY_QUERY] + len + >= tomoyo_memory_quota[TOMOYO_MEMORY_QUERY]) { quota_exceeded = true; } else { - tomoyo_query_memory_size += len + sizeof(*entry); - entry->serial = tomoyo_serial++; + entry.serial = tomoyo_serial++; + entry.retry = r->retry; + tomoyo_memory_used[TOMOYO_MEMORY_QUERY] += len; + list_add_tail(&entry.list, &tomoyo_query_list); } spin_unlock(&tomoyo_query_list_lock); if (quota_exceeded) goto out; - pos = snprintf(entry->query, len - 1, "Q%u-%hu\n%s", - entry->serial, r->retry, header); - kfree(header); - header = NULL; - va_start(args, fmt); - vsnprintf(entry->query + pos, len - 1 - pos, fmt, args); - entry->query_len = strlen(entry->query) + 1; - va_end(args); - spin_lock(&tomoyo_query_list_lock); - list_add_tail(&entry->list, &tomoyo_query_list); - spin_unlock(&tomoyo_query_list_lock); /* Give 10 seconds for supervisor's opinion. */ - for (entry->timer = 0; - atomic_read(&tomoyo_query_observers) && entry->timer < 100; - entry->timer++) { - wake_up(&tomoyo_query_wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule_timeout(HZ / 10); - if (entry->answer) + while (entry.timer < 10) { + wake_up_all(&tomoyo_query_wait); + if (wait_event_interruptible_timeout + (tomoyo_answer_wait, entry.answer || + !atomic_read(&tomoyo_query_observers), HZ)) break; + else + entry.timer++; } spin_lock(&tomoyo_query_list_lock); - list_del(&entry->list); - tomoyo_query_memory_size -= len + sizeof(*entry); + list_del(&entry.list); + tomoyo_memory_used[TOMOYO_MEMORY_QUERY] -= len; spin_unlock(&tomoyo_query_list_lock); - switch (entry->answer) { + switch (entry.answer) { case 3: /* Asked to retry by administrator. */ error = TOMOYO_RETRY_REQUEST; r->retry++; @@ -1605,18 +1913,12 @@ int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...) /* Granted by administrator. */ error = 0; break; - case 0: - /* Timed out. */ - break; default: - /* Rejected by administrator. */ + /* Timed out or rejected by administrator. */ break; } - out: - if (entry) - kfree(entry->query); - kfree(entry); - kfree(header); +out: + kfree(entry.query); return error; } @@ -1663,8 +1965,8 @@ static int tomoyo_poll_query(struct file *file, poll_table *wait) static void tomoyo_read_query(struct tomoyo_io_buffer *head) { struct list_head *tmp; - int pos = 0; - int len = 0; + unsigned int pos = 0; + size_t len = 0; char *buf; if (head->r.w_pos) return; @@ -1687,7 +1989,7 @@ static void tomoyo_read_query(struct tomoyo_io_buffer *head) head->r.query_index = 0; return; } - buf = kzalloc(len, GFP_NOFS); + buf = kzalloc(len + 32, GFP_NOFS); if (!buf) return; pos = 0; @@ -1703,7 +2005,8 @@ static void tomoyo_read_query(struct tomoyo_io_buffer *head) * can change, but I don't care. */ if (len == ptr->query_len) - memmove(buf, ptr->query, len); + snprintf(buf, len + 31, "Q%u-%hu\n%s", ptr->serial, + ptr->retry, ptr->query); break; } spin_unlock(&tomoyo_query_list_lock); @@ -1760,7 +2063,7 @@ static int tomoyo_write_answer(struct tomoyo_io_buffer *head) static void tomoyo_read_version(struct tomoyo_io_buffer *head) { if (!head->r.eof) { - tomoyo_io_printf(head, "2.3.0"); + tomoyo_io_printf(head, "2.4.0"); head->r.eof = true; } } @@ -1785,15 +2088,111 @@ static void tomoyo_read_self_domain(struct tomoyo_io_buffer *head) } } +/* String table for /sys/kernel/security/tomoyo/stat interface. */ +static const char * const tomoyo_policy_headers[TOMOYO_MAX_POLICY_STAT] = { + [TOMOYO_STAT_POLICY_UPDATES] = "update:", + [TOMOYO_STAT_POLICY_LEARNING] = "violation in learning mode:", + [TOMOYO_STAT_POLICY_PERMISSIVE] = "violation in permissive mode:", + [TOMOYO_STAT_POLICY_ENFORCING] = "violation in enforcing mode:", +}; + +/* String table for /sys/kernel/security/tomoyo/stat interface. */ +static const char * const tomoyo_memory_headers[TOMOYO_MAX_MEMORY_STAT] = { + [TOMOYO_MEMORY_POLICY] = "policy:", + [TOMOYO_MEMORY_AUDIT] = "audit log:", + [TOMOYO_MEMORY_QUERY] = "query message:", +}; + +/* Timestamp counter for last updated. */ +static unsigned int tomoyo_stat_updated[TOMOYO_MAX_POLICY_STAT]; +/* Counter for number of updates. */ +static unsigned int tomoyo_stat_modified[TOMOYO_MAX_POLICY_STAT]; + +/** + * tomoyo_update_stat - Update statistic counters. + * + * @index: Index for policy type. + * + * Returns nothing. + */ +void tomoyo_update_stat(const u8 index) +{ + struct timeval tv; + do_gettimeofday(&tv); + /* + * I don't use atomic operations because race condition is not fatal. + */ + tomoyo_stat_updated[index]++; + tomoyo_stat_modified[index] = tv.tv_sec; +} + +/** + * tomoyo_read_stat - Read statistic data. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ +static void tomoyo_read_stat(struct tomoyo_io_buffer *head) +{ + u8 i; + unsigned int total = 0; + if (head->r.eof) + return; + for (i = 0; i < TOMOYO_MAX_POLICY_STAT; i++) { + tomoyo_io_printf(head, "Policy %-30s %10u", + tomoyo_policy_headers[i], + tomoyo_stat_updated[i]); + if (tomoyo_stat_modified[i]) { + struct tomoyo_time stamp; + tomoyo_convert_time(tomoyo_stat_modified[i], &stamp); + tomoyo_io_printf(head, " (Last: %04u/%02u/%02u " + "%02u:%02u:%02u)", + stamp.year, stamp.month, stamp.day, + stamp.hour, stamp.min, stamp.sec); + } + tomoyo_set_lf(head); + } + for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) { + unsigned int used = tomoyo_memory_used[i]; + total += used; + tomoyo_io_printf(head, "Memory used by %-22s %10u", + tomoyo_memory_headers[i], used); + used = tomoyo_memory_quota[i]; + if (used) + tomoyo_io_printf(head, " (Quota: %10u)", used); + tomoyo_set_lf(head); + } + tomoyo_io_printf(head, "Total memory used: %10u\n", + total); + head->r.eof = true; +} + +/** + * tomoyo_write_stat - Set memory quota. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns 0. + */ +static int tomoyo_write_stat(struct tomoyo_io_buffer *head) +{ + char *data = head->write_buf; + u8 i; + if (tomoyo_str_starts(&data, "Memory used by ")) + for (i = 0; i < TOMOYO_MAX_MEMORY_STAT; i++) + if (tomoyo_str_starts(&data, tomoyo_memory_headers[i])) + sscanf(data, "%u", &tomoyo_memory_quota[i]); + return 0; +} + /** * tomoyo_open_control - open() for /sys/kernel/security/tomoyo/ interface. * * @type: Type of interface. * @file: Pointer to "struct file". * - * Associates policy handler and returns 0 on success, -ENOMEM otherwise. - * - * Caller acquires tomoyo_read_lock(). + * Returns 0 on success, negative value otherwise. */ int tomoyo_open_control(const u8 type, struct file *file) { @@ -1814,15 +2213,15 @@ int tomoyo_open_control(const u8 type, struct file *file) head->write = tomoyo_write_exception; head->read = tomoyo_read_exception; break; + case TOMOYO_AUDIT: + /* /sys/kernel/security/tomoyo/audit */ + head->poll = tomoyo_poll_log; + head->read = tomoyo_read_log; + break; case TOMOYO_SELFDOMAIN: /* /sys/kernel/security/tomoyo/self_domain */ head->read = tomoyo_read_self_domain; break; - case TOMOYO_DOMAIN_STATUS: - /* /sys/kernel/security/tomoyo/.domain_status */ - head->write = tomoyo_write_domain_profile; - head->read = tomoyo_read_domain_profile; - break; case TOMOYO_PROCESS_STATUS: /* /sys/kernel/security/tomoyo/.process_status */ head->write = tomoyo_write_pid; @@ -1833,11 +2232,11 @@ int tomoyo_open_control(const u8 type, struct file *file) head->read = tomoyo_read_version; head->readbuf_size = 128; break; - case TOMOYO_MEMINFO: - /* /sys/kernel/security/tomoyo/meminfo */ - head->write = tomoyo_write_memory_quota; - head->read = tomoyo_read_memory_counter; - head->readbuf_size = 512; + case TOMOYO_STAT: + /* /sys/kernel/security/tomoyo/stat */ + head->write = tomoyo_write_stat; + head->read = tomoyo_read_stat; + head->readbuf_size = 1024; break; case TOMOYO_PROFILE: /* /sys/kernel/security/tomoyo/profile */ @@ -1887,26 +2286,16 @@ int tomoyo_open_control(const u8 type, struct file *file) return -ENOMEM; } } - if (type != TOMOYO_QUERY) - head->reader_idx = tomoyo_read_lock(); - file->private_data = head; - /* - * Call the handler now if the file is - * /sys/kernel/security/tomoyo/self_domain - * so that the user can use - * cat < /sys/kernel/security/tomoyo/self_domain" - * to know the current process's domainname. - */ - if (type == TOMOYO_SELFDOMAIN) - tomoyo_read_control(file, NULL, 0); /* * If the file is /sys/kernel/security/tomoyo/query , increment the * observer counter. * The obserber counter is used by tomoyo_supervisor() to see if * there is some process monitoring /sys/kernel/security/tomoyo/query. */ - else if (type == TOMOYO_QUERY) + if (type == TOMOYO_QUERY) atomic_inc(&tomoyo_query_observers); + file->private_data = head; + tomoyo_notify_gc(head, true); return 0; } @@ -1917,7 +2306,8 @@ int tomoyo_open_control(const u8 type, struct file *file) * @wait: Pointer to "poll_table". * * Waits for read readiness. - * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd . + * /sys/kernel/security/tomoyo/query is handled by /usr/sbin/tomoyo-queryd and + * /sys/kernel/security/tomoyo/audit is handled by /usr/sbin/tomoyo-auditd. */ int tomoyo_poll_control(struct file *file, poll_table *wait) { @@ -1928,21 +2318,58 @@ int tomoyo_poll_control(struct file *file, poll_table *wait) } /** + * tomoyo_set_namespace_cursor - Set namespace to read. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns nothing. + */ +static inline void tomoyo_set_namespace_cursor(struct tomoyo_io_buffer *head) +{ + struct list_head *ns; + if (head->type != TOMOYO_EXCEPTIONPOLICY && + head->type != TOMOYO_PROFILE) + return; + /* + * If this is the first read, or reading previous namespace finished + * and has more namespaces to read, update the namespace cursor. + */ + ns = head->r.ns; + if (!ns || (head->r.eof && ns->next != &tomoyo_namespace_list)) { + /* Clearing is OK because tomoyo_flush() returned true. */ + memset(&head->r, 0, sizeof(head->r)); + head->r.ns = ns ? ns->next : tomoyo_namespace_list.next; + } +} + +/** + * tomoyo_has_more_namespace - Check for unread namespaces. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * + * Returns true if we have more entries to print, false otherwise. + */ +static inline bool tomoyo_has_more_namespace(struct tomoyo_io_buffer *head) +{ + return (head->type == TOMOYO_EXCEPTIONPOLICY || + head->type == TOMOYO_PROFILE) && head->r.eof && + head->r.ns->next != &tomoyo_namespace_list; +} + +/** * tomoyo_read_control - read() for /sys/kernel/security/tomoyo/ interface. * - * @file: Pointer to "struct file". + * @head: Pointer to "struct tomoyo_io_buffer". * @buffer: Poiner to buffer to write to. * @buffer_len: Size of @buffer. * * Returns bytes read on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). */ -int tomoyo_read_control(struct file *file, char __user *buffer, - const int buffer_len) +ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer, + const int buffer_len) { int len; - struct tomoyo_io_buffer *head = file->private_data; + int idx; if (!head->read) return -ENOSYS; @@ -1950,64 +2377,156 @@ int tomoyo_read_control(struct file *file, char __user *buffer, return -EINTR; head->read_user_buf = buffer; head->read_user_buf_avail = buffer_len; + idx = tomoyo_read_lock(); if (tomoyo_flush(head)) /* Call the policy handler. */ - head->read(head); - tomoyo_flush(head); + do { + tomoyo_set_namespace_cursor(head); + head->read(head); + } while (tomoyo_flush(head) && + tomoyo_has_more_namespace(head)); + tomoyo_read_unlock(idx); len = head->read_user_buf - buffer; mutex_unlock(&head->io_sem); return len; } /** + * tomoyo_parse_policy - Parse a policy line. + * + * @head: Poiter to "struct tomoyo_io_buffer". + * @line: Line to parse. + * + * Returns 0 on success, negative value otherwise. + * + * Caller holds tomoyo_read_lock(). + */ +static int tomoyo_parse_policy(struct tomoyo_io_buffer *head, char *line) +{ + /* Delete request? */ + head->w.is_delete = !strncmp(line, "delete ", 7); + if (head->w.is_delete) + memmove(line, line + 7, strlen(line + 7) + 1); + /* Selecting namespace to update. */ + if (head->type == TOMOYO_EXCEPTIONPOLICY || + head->type == TOMOYO_PROFILE) { + if (*line == '<') { + char *cp = strchr(line, ' '); + if (cp) { + *cp++ = '\0'; + head->w.ns = tomoyo_assign_namespace(line); + memmove(line, cp, strlen(cp) + 1); + } else + head->w.ns = NULL; + } else + head->w.ns = &tomoyo_kernel_namespace; + /* Don't allow updating if namespace is invalid. */ + if (!head->w.ns) + return -ENOENT; + } + /* Do the update. */ + return head->write(head); +} + +/** * tomoyo_write_control - write() for /sys/kernel/security/tomoyo/ interface. * - * @file: Pointer to "struct file". + * @head: Pointer to "struct tomoyo_io_buffer". * @buffer: Pointer to buffer to read from. * @buffer_len: Size of @buffer. * * Returns @buffer_len on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). */ -int tomoyo_write_control(struct file *file, const char __user *buffer, - const int buffer_len) +ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, + const char __user *buffer, const int buffer_len) { - struct tomoyo_io_buffer *head = file->private_data; int error = buffer_len; - int avail_len = buffer_len; + size_t avail_len = buffer_len; char *cp0 = head->write_buf; - + int idx; if (!head->write) return -ENOSYS; if (!access_ok(VERIFY_READ, buffer, buffer_len)) return -EFAULT; - /* Don't allow updating policies by non manager programs. */ - if (head->write != tomoyo_write_pid && - head->write != tomoyo_write_domain && !tomoyo_manager()) - return -EPERM; if (mutex_lock_interruptible(&head->io_sem)) return -EINTR; + idx = tomoyo_read_lock(); /* Read a line and dispatch it to the policy handler. */ while (avail_len > 0) { char c; - if (head->write_avail >= head->writebuf_size - 1) { - error = -ENOMEM; - break; - } else if (get_user(c, buffer)) { + if (head->w.avail >= head->writebuf_size - 1) { + const int len = head->writebuf_size * 2; + char *cp = kzalloc(len, GFP_NOFS); + if (!cp) { + error = -ENOMEM; + break; + } + memmove(cp, cp0, head->w.avail); + kfree(cp0); + head->write_buf = cp; + cp0 = cp; + head->writebuf_size = len; + } + if (get_user(c, buffer)) { error = -EFAULT; break; } buffer++; avail_len--; - cp0[head->write_avail++] = c; + cp0[head->w.avail++] = c; if (c != '\n') continue; - cp0[head->write_avail - 1] = '\0'; - head->write_avail = 0; + cp0[head->w.avail - 1] = '\0'; + head->w.avail = 0; tomoyo_normalize_line(cp0); - head->write(head); + if (!strcmp(cp0, "reset")) { + head->w.ns = &tomoyo_kernel_namespace; + head->w.domain = NULL; + memset(&head->r, 0, sizeof(head->r)); + continue; + } + /* Don't allow updating policies by non manager programs. */ + switch (head->type) { + case TOMOYO_PROCESS_STATUS: + /* This does not write anything. */ + break; + case TOMOYO_DOMAINPOLICY: + if (tomoyo_select_domain(head, cp0)) + continue; + /* fall through */ + case TOMOYO_EXCEPTIONPOLICY: + if (!strcmp(cp0, "select transition_only")) { + head->r.print_transition_related_only = true; + continue; + } + /* fall through */ + default: + if (!tomoyo_manager()) { + error = -EPERM; + goto out; + } + } + switch (tomoyo_parse_policy(head, cp0)) { + case -EPERM: + error = -EPERM; + goto out; + case 0: + switch (head->type) { + case TOMOYO_DOMAINPOLICY: + case TOMOYO_EXCEPTIONPOLICY: + case TOMOYO_STAT: + case TOMOYO_PROFILE: + case TOMOYO_MANAGER: + tomoyo_update_stat(TOMOYO_STAT_POLICY_UPDATES); + break; + default: + break; + } + break; + } } +out: + tomoyo_read_unlock(idx); mutex_unlock(&head->io_sem); return error; } @@ -2015,35 +2534,20 @@ int tomoyo_write_control(struct file *file, const char __user *buffer, /** * tomoyo_close_control - close() for /sys/kernel/security/tomoyo/ interface. * - * @file: Pointer to "struct file". - * - * Releases memory and returns 0. + * @head: Pointer to "struct tomoyo_io_buffer". * - * Caller looses tomoyo_read_lock(). + * Returns 0. */ -int tomoyo_close_control(struct file *file) +int tomoyo_close_control(struct tomoyo_io_buffer *head) { - struct tomoyo_io_buffer *head = file->private_data; - const bool is_write = !!head->write_buf; - /* * If the file is /sys/kernel/security/tomoyo/query , decrement the * observer counter. */ - if (head->type == TOMOYO_QUERY) - atomic_dec(&tomoyo_query_observers); - else - tomoyo_read_unlock(head->reader_idx); - /* Release memory used for policy I/O. */ - kfree(head->read_buf); - head->read_buf = NULL; - kfree(head->write_buf); - head->write_buf = NULL; - kfree(head); - head = NULL; - file->private_data = NULL; - if (is_write) - tomoyo_run_gc(); + if (head->type == TOMOYO_QUERY && + atomic_dec_and_test(&tomoyo_query_observers)) + wake_up_all(&tomoyo_answer_wait); + tomoyo_notify_gc(head, false); return 0; } @@ -2055,27 +2559,90 @@ void tomoyo_check_profile(void) struct tomoyo_domain_info *domain; const int idx = tomoyo_read_lock(); tomoyo_policy_loaded = true; - /* Check all profiles currently assigned to domains are defined. */ + printk(KERN_INFO "TOMOYO: 2.4.0\n"); list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) { const u8 profile = domain->profile; - if (tomoyo_profile_ptr[profile]) + const struct tomoyo_policy_namespace *ns = domain->ns; + if (ns->profile_version != 20100903) + printk(KERN_ERR + "Profile version %u is not supported.\n", + ns->profile_version); + else if (!ns->profile_ptr[profile]) + printk(KERN_ERR + "Profile %u (used by '%s') is not defined.\n", + profile, domain->domainname->name); + else continue; - printk(KERN_ERR "You need to define profile %u before using it.\n", - profile); - printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " + printk(KERN_ERR + "Userland tools for TOMOYO 2.4 must be installed and " + "policy must be initialized.\n"); + printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.4/ " "for more information.\n"); - panic("Profile %u (used by '%s') not defined.\n", - profile, domain->domainname->name); + panic("STOP!"); } tomoyo_read_unlock(idx); - if (tomoyo_profile_version != 20090903) { - printk(KERN_ERR "You need to install userland programs for " - "TOMOYO 2.3 and initialize policy configuration.\n"); - printk(KERN_ERR "Please see http://tomoyo.sourceforge.jp/2.3/ " - "for more information.\n"); - panic("Profile version %u is not supported.\n", - tomoyo_profile_version); - } - printk(KERN_INFO "TOMOYO: 2.3.0\n"); printk(KERN_INFO "Mandatory Access Control activated.\n"); } + +/** + * tomoyo_load_builtin_policy - Load built-in policy. + * + * Returns nothing. + */ +void __init tomoyo_load_builtin_policy(void) +{ + /* + * This include file is manually created and contains built-in policy + * named "tomoyo_builtin_profile", "tomoyo_builtin_exception_policy", + * "tomoyo_builtin_domain_policy", "tomoyo_builtin_manager", + * "tomoyo_builtin_stat" in the form of "static char [] __initdata". + */ +#include "builtin-policy.h" + u8 i; + const int idx = tomoyo_read_lock(); + for (i = 0; i < 5; i++) { + struct tomoyo_io_buffer head = { }; + char *start = ""; + switch (i) { + case 0: + start = tomoyo_builtin_profile; + head.type = TOMOYO_PROFILE; + head.write = tomoyo_write_profile; + break; + case 1: + start = tomoyo_builtin_exception_policy; + head.type = TOMOYO_EXCEPTIONPOLICY; + head.write = tomoyo_write_exception; + break; + case 2: + start = tomoyo_builtin_domain_policy; + head.type = TOMOYO_DOMAINPOLICY; + head.write = tomoyo_write_domain; + break; + case 3: + start = tomoyo_builtin_manager; + head.type = TOMOYO_MANAGER; + head.write = tomoyo_write_manager; + break; + case 4: + start = tomoyo_builtin_stat; + head.type = TOMOYO_STAT; + head.write = tomoyo_write_stat; + break; + } + while (1) { + char *end = strchr(start, '\n'); + if (!end) + break; + *end = '\0'; + tomoyo_normalize_line(start); + head.write_buf = start; + tomoyo_parse_policy(&head, start); + start = end + 1; + } + } + tomoyo_read_unlock(idx); +#ifdef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + tomoyo_check_profile(); +#endif +} diff --git a/security/tomoyo/common.h b/security/tomoyo/common.h index 7c66bd898782..f7fbaa66e443 100644 --- a/security/tomoyo/common.h +++ b/security/tomoyo/common.h @@ -21,7 +21,8 @@ #include <linux/list.h> #include <linux/cred.h> #include <linux/poll.h> -struct linux_binprm; +#include <linux/binfmts.h> +#include <linux/highmem.h> /********** Constants definitions. **********/ @@ -38,66 +39,149 @@ struct linux_binprm; /* Profile number is an integer between 0 and 255. */ #define TOMOYO_MAX_PROFILES 256 +/* Group number is an integer between 0 and 255. */ +#define TOMOYO_MAX_ACL_GROUPS 256 + +/* Index numbers for "struct tomoyo_condition". */ +enum tomoyo_conditions_index { + TOMOYO_TASK_UID, /* current_uid() */ + TOMOYO_TASK_EUID, /* current_euid() */ + TOMOYO_TASK_SUID, /* current_suid() */ + TOMOYO_TASK_FSUID, /* current_fsuid() */ + TOMOYO_TASK_GID, /* current_gid() */ + TOMOYO_TASK_EGID, /* current_egid() */ + TOMOYO_TASK_SGID, /* current_sgid() */ + TOMOYO_TASK_FSGID, /* current_fsgid() */ + TOMOYO_TASK_PID, /* sys_getpid() */ + TOMOYO_TASK_PPID, /* sys_getppid() */ + TOMOYO_EXEC_ARGC, /* "struct linux_binprm *"->argc */ + TOMOYO_EXEC_ENVC, /* "struct linux_binprm *"->envc */ + TOMOYO_TYPE_IS_SOCKET, /* S_IFSOCK */ + TOMOYO_TYPE_IS_SYMLINK, /* S_IFLNK */ + TOMOYO_TYPE_IS_FILE, /* S_IFREG */ + TOMOYO_TYPE_IS_BLOCK_DEV, /* S_IFBLK */ + TOMOYO_TYPE_IS_DIRECTORY, /* S_IFDIR */ + TOMOYO_TYPE_IS_CHAR_DEV, /* S_IFCHR */ + TOMOYO_TYPE_IS_FIFO, /* S_IFIFO */ + TOMOYO_MODE_SETUID, /* S_ISUID */ + TOMOYO_MODE_SETGID, /* S_ISGID */ + TOMOYO_MODE_STICKY, /* S_ISVTX */ + TOMOYO_MODE_OWNER_READ, /* S_IRUSR */ + TOMOYO_MODE_OWNER_WRITE, /* S_IWUSR */ + TOMOYO_MODE_OWNER_EXECUTE, /* S_IXUSR */ + TOMOYO_MODE_GROUP_READ, /* S_IRGRP */ + TOMOYO_MODE_GROUP_WRITE, /* S_IWGRP */ + TOMOYO_MODE_GROUP_EXECUTE, /* S_IXGRP */ + TOMOYO_MODE_OTHERS_READ, /* S_IROTH */ + TOMOYO_MODE_OTHERS_WRITE, /* S_IWOTH */ + TOMOYO_MODE_OTHERS_EXECUTE, /* S_IXOTH */ + TOMOYO_EXEC_REALPATH, + TOMOYO_SYMLINK_TARGET, + TOMOYO_PATH1_UID, + TOMOYO_PATH1_GID, + TOMOYO_PATH1_INO, + TOMOYO_PATH1_MAJOR, + TOMOYO_PATH1_MINOR, + TOMOYO_PATH1_PERM, + TOMOYO_PATH1_TYPE, + TOMOYO_PATH1_DEV_MAJOR, + TOMOYO_PATH1_DEV_MINOR, + TOMOYO_PATH2_UID, + TOMOYO_PATH2_GID, + TOMOYO_PATH2_INO, + TOMOYO_PATH2_MAJOR, + TOMOYO_PATH2_MINOR, + TOMOYO_PATH2_PERM, + TOMOYO_PATH2_TYPE, + TOMOYO_PATH2_DEV_MAJOR, + TOMOYO_PATH2_DEV_MINOR, + TOMOYO_PATH1_PARENT_UID, + TOMOYO_PATH1_PARENT_GID, + TOMOYO_PATH1_PARENT_INO, + TOMOYO_PATH1_PARENT_PERM, + TOMOYO_PATH2_PARENT_UID, + TOMOYO_PATH2_PARENT_GID, + TOMOYO_PATH2_PARENT_INO, + TOMOYO_PATH2_PARENT_PERM, + TOMOYO_MAX_CONDITION_KEYWORD, + TOMOYO_NUMBER_UNION, + TOMOYO_NAME_UNION, + TOMOYO_ARGV_ENTRY, + TOMOYO_ENVP_ENTRY, +}; + + +/* Index numbers for stat(). */ +enum tomoyo_path_stat_index { + /* Do not change this order. */ + TOMOYO_PATH1, + TOMOYO_PATH1_PARENT, + TOMOYO_PATH2, + TOMOYO_PATH2_PARENT, + TOMOYO_MAX_PATH_STAT +}; + +/* Index numbers for operation mode. */ enum tomoyo_mode_index { TOMOYO_CONFIG_DISABLED, TOMOYO_CONFIG_LEARNING, TOMOYO_CONFIG_PERMISSIVE, TOMOYO_CONFIG_ENFORCING, - TOMOYO_CONFIG_USE_DEFAULT = 255 + TOMOYO_CONFIG_MAX_MODE, + TOMOYO_CONFIG_WANT_REJECT_LOG = 64, + TOMOYO_CONFIG_WANT_GRANT_LOG = 128, + TOMOYO_CONFIG_USE_DEFAULT = 255, }; +/* Index numbers for entry type. */ enum tomoyo_policy_id { TOMOYO_ID_GROUP, TOMOYO_ID_PATH_GROUP, TOMOYO_ID_NUMBER_GROUP, TOMOYO_ID_TRANSITION_CONTROL, TOMOYO_ID_AGGREGATOR, - TOMOYO_ID_GLOBALLY_READABLE, - TOMOYO_ID_PATTERN, - TOMOYO_ID_NO_REWRITE, TOMOYO_ID_MANAGER, + TOMOYO_ID_CONDITION, TOMOYO_ID_NAME, TOMOYO_ID_ACL, TOMOYO_ID_DOMAIN, TOMOYO_MAX_POLICY }; +/* Index numbers for domain's attributes. */ +enum tomoyo_domain_info_flags_index { + /* Quota warnning flag. */ + TOMOYO_DIF_QUOTA_WARNED, + /* + * This domain was unable to create a new domain at + * tomoyo_find_next_domain() because the name of the domain to be + * created was too long or it could not allocate memory. + * More than one process continued execve() without domain transition. + */ + TOMOYO_DIF_TRANSITION_FAILED, + TOMOYO_MAX_DOMAIN_INFO_FLAGS +}; + +/* Index numbers for group entries. */ enum tomoyo_group_id { TOMOYO_PATH_GROUP, TOMOYO_NUMBER_GROUP, TOMOYO_MAX_GROUP }; -/* Keywords for ACLs. */ -#define TOMOYO_KEYWORD_AGGREGATOR "aggregator " -#define TOMOYO_KEYWORD_ALLOW_MOUNT "allow_mount " -#define TOMOYO_KEYWORD_ALLOW_READ "allow_read " -#define TOMOYO_KEYWORD_DELETE "delete " -#define TOMOYO_KEYWORD_DENY_REWRITE "deny_rewrite " -#define TOMOYO_KEYWORD_FILE_PATTERN "file_pattern " -#define TOMOYO_KEYWORD_INITIALIZE_DOMAIN "initialize_domain " -#define TOMOYO_KEYWORD_KEEP_DOMAIN "keep_domain " -#define TOMOYO_KEYWORD_NO_INITIALIZE_DOMAIN "no_initialize_domain " -#define TOMOYO_KEYWORD_NO_KEEP_DOMAIN "no_keep_domain " -#define TOMOYO_KEYWORD_PATH_GROUP "path_group " -#define TOMOYO_KEYWORD_NUMBER_GROUP "number_group " -#define TOMOYO_KEYWORD_SELECT "select " -#define TOMOYO_KEYWORD_USE_PROFILE "use_profile " -#define TOMOYO_KEYWORD_IGNORE_GLOBAL_ALLOW_READ "ignore_global_allow_read" -#define TOMOYO_KEYWORD_QUOTA_EXCEEDED "quota_exceeded" -#define TOMOYO_KEYWORD_TRANSITION_FAILED "transition_failed" -/* A domain definition starts with <kernel>. */ -#define TOMOYO_ROOT_NAME "<kernel>" -#define TOMOYO_ROOT_NAME_LEN (sizeof(TOMOYO_ROOT_NAME) - 1) - -/* Value type definition. */ -#define TOMOYO_VALUE_TYPE_INVALID 0 -#define TOMOYO_VALUE_TYPE_DECIMAL 1 -#define TOMOYO_VALUE_TYPE_OCTAL 2 -#define TOMOYO_VALUE_TYPE_HEXADECIMAL 3 +/* Index numbers for type of numeric values. */ +enum tomoyo_value_type { + TOMOYO_VALUE_TYPE_INVALID, + TOMOYO_VALUE_TYPE_DECIMAL, + TOMOYO_VALUE_TYPE_OCTAL, + TOMOYO_VALUE_TYPE_HEXADECIMAL, +}; +/* Index numbers for domain transition control keywords. */ enum tomoyo_transition_type { /* Do not change this order, */ + TOMOYO_TRANSITION_CONTROL_NO_RESET, + TOMOYO_TRANSITION_CONTROL_RESET, TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE, TOMOYO_TRANSITION_CONTROL_INITIALIZE, TOMOYO_TRANSITION_CONTROL_NO_KEEP, @@ -114,35 +198,29 @@ enum tomoyo_acl_entry_type_index { TOMOYO_TYPE_MOUNT_ACL, }; -/* Index numbers for File Controls. */ - -/* - * TOMOYO_TYPE_READ_WRITE is special. TOMOYO_TYPE_READ_WRITE is automatically - * set if both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are set. - * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically set if - * TOMOYO_TYPE_READ_WRITE is set. - * TOMOYO_TYPE_READ_WRITE is automatically cleared if either TOMOYO_TYPE_READ - * or TOMOYO_TYPE_WRITE is cleared. - * Both TOMOYO_TYPE_READ and TOMOYO_TYPE_WRITE are automatically cleared if - * TOMOYO_TYPE_READ_WRITE is cleared. - */ - +/* Index numbers for access controls with one pathname. */ enum tomoyo_path_acl_index { - TOMOYO_TYPE_READ_WRITE, TOMOYO_TYPE_EXECUTE, TOMOYO_TYPE_READ, TOMOYO_TYPE_WRITE, + TOMOYO_TYPE_APPEND, TOMOYO_TYPE_UNLINK, + TOMOYO_TYPE_GETATTR, TOMOYO_TYPE_RMDIR, TOMOYO_TYPE_TRUNCATE, TOMOYO_TYPE_SYMLINK, - TOMOYO_TYPE_REWRITE, TOMOYO_TYPE_CHROOT, TOMOYO_TYPE_UMOUNT, TOMOYO_MAX_PATH_OPERATION }; -#define TOMOYO_RW_MASK ((1 << TOMOYO_TYPE_READ) | (1 << TOMOYO_TYPE_WRITE)) +/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */ +enum tomoyo_memory_stat_type { + TOMOYO_MEMORY_POLICY, + TOMOYO_MEMORY_AUDIT, + TOMOYO_MEMORY_QUERY, + TOMOYO_MAX_MEMORY_STAT +}; enum tomoyo_mkdev_acl_index { TOMOYO_TYPE_MKBLOCK, @@ -150,6 +228,7 @@ enum tomoyo_mkdev_acl_index { TOMOYO_MAX_MKDEV_OPERATION }; +/* Index numbers for access controls with two pathnames. */ enum tomoyo_path2_acl_index { TOMOYO_TYPE_LINK, TOMOYO_TYPE_RENAME, @@ -157,6 +236,7 @@ enum tomoyo_path2_acl_index { TOMOYO_MAX_PATH2_OPERATION }; +/* Index numbers for access controls with one pathname and one number. */ enum tomoyo_path_number_acl_index { TOMOYO_TYPE_CREATE, TOMOYO_TYPE_MKDIR, @@ -169,31 +249,45 @@ enum tomoyo_path_number_acl_index { TOMOYO_MAX_PATH_NUMBER_OPERATION }; +/* Index numbers for /sys/kernel/security/tomoyo/ interfaces. */ enum tomoyo_securityfs_interface_index { TOMOYO_DOMAINPOLICY, TOMOYO_EXCEPTIONPOLICY, - TOMOYO_DOMAIN_STATUS, TOMOYO_PROCESS_STATUS, - TOMOYO_MEMINFO, + TOMOYO_STAT, TOMOYO_SELFDOMAIN, + TOMOYO_AUDIT, TOMOYO_VERSION, TOMOYO_PROFILE, TOMOYO_QUERY, TOMOYO_MANAGER }; +/* Index numbers for special mount operations. */ +enum tomoyo_special_mount { + TOMOYO_MOUNT_BIND, /* mount --bind /source /dest */ + TOMOYO_MOUNT_MOVE, /* mount --move /old /new */ + TOMOYO_MOUNT_REMOUNT, /* mount -o remount /dir */ + TOMOYO_MOUNT_MAKE_UNBINDABLE, /* mount --make-unbindable /dir */ + TOMOYO_MOUNT_MAKE_PRIVATE, /* mount --make-private /dir */ + TOMOYO_MOUNT_MAKE_SLAVE, /* mount --make-slave /dir */ + TOMOYO_MOUNT_MAKE_SHARED, /* mount --make-shared /dir */ + TOMOYO_MAX_SPECIAL_MOUNT +}; + +/* Index numbers for functionality. */ enum tomoyo_mac_index { TOMOYO_MAC_FILE_EXECUTE, TOMOYO_MAC_FILE_OPEN, TOMOYO_MAC_FILE_CREATE, TOMOYO_MAC_FILE_UNLINK, + TOMOYO_MAC_FILE_GETATTR, TOMOYO_MAC_FILE_MKDIR, TOMOYO_MAC_FILE_RMDIR, TOMOYO_MAC_FILE_MKFIFO, TOMOYO_MAC_FILE_MKSOCK, TOMOYO_MAC_FILE_TRUNCATE, TOMOYO_MAC_FILE_SYMLINK, - TOMOYO_MAC_FILE_REWRITE, TOMOYO_MAC_FILE_MKBLOCK, TOMOYO_MAC_FILE_MKCHAR, TOMOYO_MAC_FILE_LINK, @@ -209,38 +303,66 @@ enum tomoyo_mac_index { TOMOYO_MAX_MAC_INDEX }; +/* Index numbers for category of functionality. */ enum tomoyo_mac_category_index { TOMOYO_MAC_CATEGORY_FILE, TOMOYO_MAX_MAC_CATEGORY_INDEX }; -#define TOMOYO_RETRY_REQUEST 1 /* Retry this request. */ - -/********** Structure definitions. **********/ - /* - * tomoyo_acl_head is a structure which is used for holding elements not in - * domain policy. - * It has following fields. + * Retry this request. Returned by tomoyo_supervisor() if policy violation has + * occurred in enforcing mode and the userspace daemon decided to retry. * - * (1) "list" which is linked to tomoyo_policy_list[] . - * (2) "is_deleted" is a bool which is true if marked as deleted, false - * otherwise. + * We must choose a positive value in order to distinguish "granted" (which is + * 0) and "rejected" (which is a negative value) and "retry". */ +#define TOMOYO_RETRY_REQUEST 1 + +/* Index numbers for /sys/kernel/security/tomoyo/stat interface. */ +enum tomoyo_policy_stat_type { + /* Do not change this order. */ + TOMOYO_STAT_POLICY_UPDATES, + TOMOYO_STAT_POLICY_LEARNING, /* == TOMOYO_CONFIG_LEARNING */ + TOMOYO_STAT_POLICY_PERMISSIVE, /* == TOMOYO_CONFIG_PERMISSIVE */ + TOMOYO_STAT_POLICY_ENFORCING, /* == TOMOYO_CONFIG_ENFORCING */ + TOMOYO_MAX_POLICY_STAT +}; + +/* Index numbers for profile's PREFERENCE values. */ +enum tomoyo_pref_index { + TOMOYO_PREF_MAX_AUDIT_LOG, + TOMOYO_PREF_MAX_LEARNING_ENTRY, + TOMOYO_MAX_PREF +}; + +/********** Structure definitions. **********/ + +/* Common header for holding ACL entries. */ struct tomoyo_acl_head { struct list_head list; bool is_deleted; } __packed; -/* - * tomoyo_request_info is a structure which is used for holding - * - * (1) Domain information of current process. - * (2) How many retries are made for this request. - * (3) Profile number used for this request. - * (4) Access control mode of the profile. - */ +/* Common header for shared entries. */ +struct tomoyo_shared_acl_head { + struct list_head list; + atomic_t users; +} __packed; + +struct tomoyo_policy_namespace; + +/* Structure for request info. */ struct tomoyo_request_info { + /* + * For holding parameters specific to operations which deal files. + * NULL if not dealing files. + */ + struct tomoyo_obj_info *obj; + /* + * For holding parameters specific to execve() request. + * NULL if not dealing do_execve(). + */ + struct tomoyo_execve *ee; struct tomoyo_domain_info *domain; /* For holding parameters. */ union { @@ -248,11 +370,13 @@ struct tomoyo_request_info { const struct tomoyo_path_info *filename; /* For using wildcards at tomoyo_find_next_domain(). */ const struct tomoyo_path_info *matched_path; + /* One of values in "enum tomoyo_path_acl_index". */ u8 operation; } path; struct { const struct tomoyo_path_info *filename1; const struct tomoyo_path_info *filename2; + /* One of values in "enum tomoyo_path2_acl_index". */ u8 operation; } path2; struct { @@ -260,11 +384,16 @@ struct tomoyo_request_info { unsigned int mode; unsigned int major; unsigned int minor; + /* One of values in "enum tomoyo_mkdev_acl_index". */ u8 operation; } mkdev; struct { const struct tomoyo_path_info *filename; unsigned long number; + /* + * One of values in + * "enum tomoyo_path_number_acl_index". + */ u8 operation; } path_number; struct { @@ -283,26 +412,7 @@ struct tomoyo_request_info { u8 type; }; -/* - * tomoyo_path_info is a structure which is used for holding a string data - * used by TOMOYO. - * This structure has several fields for supporting pattern matching. - * - * (1) "name" is the '\0' terminated string data. - * (2) "hash" is full_name_hash(name, strlen(name)). - * This allows tomoyo_pathcmp() to compare by hash before actually compare - * using strcmp(). - * (3) "const_len" is the length of the initial segment of "name" which - * consists entirely of non wildcard characters. In other words, the length - * which we can compare two strings using strncmp(). - * (4) "is_dir" is a bool which is true if "name" ends with "/", - * false otherwise. - * TOMOYO distinguishes directory and non-directory. A directory ends with - * "/" and non-directory does not end with "/". - * (5) "is_patterned" is a bool which is true if "name" contains wildcard - * characters, false otherwise. This allows TOMOYO to use "hash" and - * strcmp() for string comparison if "is_patterned" is false. - */ +/* Structure for holding a token. */ struct tomoyo_path_info { const char *name; u32 hash; /* = full_name_hash(name, strlen(name)) */ @@ -311,36 +421,32 @@ struct tomoyo_path_info { bool is_patterned; /* = tomoyo_path_contains_pattern(name) */ }; -/* - * tomoyo_name is a structure which is used for linking - * "struct tomoyo_path_info" into tomoyo_name_list . - */ +/* Structure for holding string data. */ struct tomoyo_name { - struct list_head list; - atomic_t users; + struct tomoyo_shared_acl_head head; struct tomoyo_path_info entry; }; +/* Structure for holding a word. */ struct tomoyo_name_union { + /* Either @filename or @group is NULL. */ const struct tomoyo_path_info *filename; struct tomoyo_group *group; - u8 is_group; }; +/* Structure for holding a number. */ struct tomoyo_number_union { unsigned long values[2]; - struct tomoyo_group *group; - u8 min_type; - u8 max_type; - u8 is_group; + struct tomoyo_group *group; /* Maybe NULL. */ + /* One of values in "enum tomoyo_value_type". */ + u8 value_type[2]; }; /* Structure for "path_group"/"number_group" directive. */ struct tomoyo_group { - struct list_head list; + struct tomoyo_shared_acl_head head; const struct tomoyo_path_info *group_name; struct list_head member_list; - atomic_t users; }; /* Structure for "path_group" directive. */ @@ -355,130 +461,158 @@ struct tomoyo_number_group { struct tomoyo_number_union number; }; -/* - * tomoyo_acl_info is a structure which is used for holding - * - * (1) "list" which is linked to the ->acl_info_list of - * "struct tomoyo_domain_info" - * (2) "is_deleted" is a bool which is true if this domain is marked as - * "deleted", false otherwise. - * (3) "type" which tells type of the entry. - * - * Packing "struct tomoyo_acl_info" allows - * "struct tomoyo_path_acl" to embed "u16" and "struct tomoyo_path2_acl" - * "struct tomoyo_path_number_acl" "struct tomoyo_mkdev_acl" to embed - * "u8" without enlarging their structure size. - */ +/* Subset of "struct stat". Used by conditional ACL and audit logs. */ +struct tomoyo_mini_stat { + uid_t uid; + gid_t gid; + ino_t ino; + mode_t mode; + dev_t dev; + dev_t rdev; +}; + +/* Structure for dumping argv[] and envp[] of "struct linux_binprm". */ +struct tomoyo_page_dump { + struct page *page; /* Previously dumped page. */ + char *data; /* Contents of "page". Size is PAGE_SIZE. */ +}; + +/* Structure for attribute checks in addition to pathname checks. */ +struct tomoyo_obj_info { + /* + * True if tomoyo_get_attributes() was already called, false otherwise. + */ + bool validate_done; + /* True if @stat[] is valid. */ + bool stat_valid[TOMOYO_MAX_PATH_STAT]; + /* First pathname. Initialized with { NULL, NULL } if no path. */ + struct path path1; + /* Second pathname. Initialized with { NULL, NULL } if no path. */ + struct path path2; + /* + * Information on @path1, @path1's parent directory, @path2, @path2's + * parent directory. + */ + struct tomoyo_mini_stat stat[TOMOYO_MAX_PATH_STAT]; + /* + * Content of symbolic link to be created. NULL for operations other + * than symlink(). + */ + struct tomoyo_path_info *symlink_target; +}; + +/* Structure for argv[]. */ +struct tomoyo_argv { + unsigned long index; + const struct tomoyo_path_info *value; + bool is_not; +}; + +/* Structure for envp[]. */ +struct tomoyo_envp { + const struct tomoyo_path_info *name; + const struct tomoyo_path_info *value; + bool is_not; +}; + +/* Structure for execve() operation. */ +struct tomoyo_execve { + struct tomoyo_request_info r; + struct tomoyo_obj_info obj; + struct linux_binprm *bprm; + /* For dumping argv[] and envp[]. */ + struct tomoyo_page_dump dump; + /* For temporary use. */ + char *tmp; /* Size is TOMOYO_EXEC_TMPSIZE bytes */ +}; + +/* Structure for entries which follows "struct tomoyo_condition". */ +struct tomoyo_condition_element { + /* + * Left hand operand. A "struct tomoyo_argv" for TOMOYO_ARGV_ENTRY, a + * "struct tomoyo_envp" for TOMOYO_ENVP_ENTRY is attached to the tail + * of the array of this struct. + */ + u8 left; + /* + * Right hand operand. A "struct tomoyo_number_union" for + * TOMOYO_NUMBER_UNION, a "struct tomoyo_name_union" for + * TOMOYO_NAME_UNION is attached to the tail of the array of this + * struct. + */ + u8 right; + /* Equation operator. True if equals or overlaps, false otherwise. */ + bool equals; +}; + +/* Structure for optional arguments. */ +struct tomoyo_condition { + struct tomoyo_shared_acl_head head; + u32 size; /* Memory size allocated for this entry. */ + u16 condc; /* Number of conditions in this struct. */ + u16 numbers_count; /* Number of "struct tomoyo_number_union values". */ + u16 names_count; /* Number of "struct tomoyo_name_union names". */ + u16 argc; /* Number of "struct tomoyo_argv". */ + u16 envc; /* Number of "struct tomoyo_envp". */ + /* + * struct tomoyo_condition_element condition[condc]; + * struct tomoyo_number_union values[numbers_count]; + * struct tomoyo_name_union names[names_count]; + * struct tomoyo_argv argv[argc]; + * struct tomoyo_envp envp[envc]; + */ +}; + +/* Common header for individual entries. */ struct tomoyo_acl_info { struct list_head list; + struct tomoyo_condition *cond; /* Maybe NULL. */ bool is_deleted; - u8 type; /* = one of values in "enum tomoyo_acl_entry_type_index". */ + u8 type; /* One of values in "enum tomoyo_acl_entry_type_index". */ } __packed; -/* - * tomoyo_domain_info is a structure which is used for holding permissions - * (e.g. "allow_read /lib/libc-2.5.so") given to each domain. - * It has following fields. - * - * (1) "list" which is linked to tomoyo_domain_list . - * (2) "acl_info_list" which is linked to "struct tomoyo_acl_info". - * (3) "domainname" which holds the name of the domain. - * (4) "profile" which remembers profile number assigned to this domain. - * (5) "is_deleted" is a bool which is true if this domain is marked as - * "deleted", false otherwise. - * (6) "quota_warned" is a bool which is used for suppressing warning message - * when learning mode learned too much entries. - * (7) "ignore_global_allow_read" is a bool which is true if this domain - * should ignore "allow_read" directive in exception policy. - * (8) "transition_failed" is a bool which is set to true when this domain was - * unable to create a new domain at tomoyo_find_next_domain() because the - * name of the domain to be created was too long or it could not allocate - * memory. If set to true, more than one process continued execve() - * without domain transition. - * (9) "users" is an atomic_t that holds how many "struct cred"->security - * are referring this "struct tomoyo_domain_info". If is_deleted == true - * and users == 0, this struct will be kfree()d upon next garbage - * collection. - * - * A domain's lifecycle is an analogy of files on / directory. - * Multiple domains with the same domainname cannot be created (as with - * creating files with the same filename fails with -EEXIST). - * If a process reached a domain, that process can reside in that domain after - * that domain is marked as "deleted" (as with a process can access an already - * open()ed file after that file was unlink()ed). - */ +/* Structure for domain information. */ struct tomoyo_domain_info { struct list_head list; struct list_head acl_info_list; /* Name of this domain. Never NULL. */ const struct tomoyo_path_info *domainname; + /* Namespace for this domain. Never NULL. */ + struct tomoyo_policy_namespace *ns; u8 profile; /* Profile number to use. */ + u8 group; /* Group number to use. */ bool is_deleted; /* Delete flag. */ - bool quota_warned; /* Quota warnning flag. */ - bool ignore_global_allow_read; /* Ignore "allow_read" flag. */ - bool transition_failed; /* Domain transition failed flag. */ + bool flags[TOMOYO_MAX_DOMAIN_INFO_FLAGS]; atomic_t users; /* Number of referring credentials. */ }; /* - * tomoyo_path_acl is a structure which is used for holding an - * entry with one pathname operation (e.g. open(), mkdir()). - * It has following fields. - * - * (1) "head" which is a "struct tomoyo_acl_info". - * (2) "perm" which is a bitmask of permitted operations. - * (3) "name" is the pathname. - * - * Directives held by this structure are "allow_read/write", "allow_execute", - * "allow_read", "allow_write", "allow_unlink", "allow_rmdir", - * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_chroot" and - * "allow_unmount". + * Structure for "file execute", "file read", "file write", "file append", + * "file unlink", "file getattr", "file rmdir", "file truncate", + * "file symlink", "file chroot" and "file unmount" directive. */ struct tomoyo_path_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_ACL */ - u16 perm; + u16 perm; /* Bitmask of values in "enum tomoyo_path_acl_index". */ struct tomoyo_name_union name; }; /* - * tomoyo_path_number_acl is a structure which is used for holding an - * entry with one pathname and one number operation. - * It has following fields. - * - * (1) "head" which is a "struct tomoyo_acl_info". - * (2) "perm" which is a bitmask of permitted operations. - * (3) "name" is the pathname. - * (4) "number" is the numeric value. - * - * Directives held by this structure are "allow_create", "allow_mkdir", - * "allow_ioctl", "allow_mkfifo", "allow_mksock", "allow_chmod", "allow_chown" - * and "allow_chgrp". - * + * Structure for "file create", "file mkdir", "file mkfifo", "file mksock", + * "file ioctl", "file chmod", "file chown" and "file chgrp" directive. */ struct tomoyo_path_number_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH_NUMBER_ACL */ + /* Bitmask of values in "enum tomoyo_path_number_acl_index". */ u8 perm; struct tomoyo_name_union name; struct tomoyo_number_union number; }; -/* - * tomoyo_mkdev_acl is a structure which is used for holding an - * entry with one pathname and three numbers operation. - * It has following fields. - * - * (1) "head" which is a "struct tomoyo_acl_info". - * (2) "perm" which is a bitmask of permitted operations. - * (3) "mode" is the create mode. - * (4) "major" is the major number of device node. - * (5) "minor" is the minor number of device node. - * - * Directives held by this structure are "allow_mkchar", "allow_mkblock". - * - */ +/* Structure for "file mkblock" and "file mkchar" directive. */ struct tomoyo_mkdev_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MKDEV_ACL */ - u8 perm; + u8 perm; /* Bitmask of values in "enum tomoyo_mkdev_acl_index". */ struct tomoyo_name_union name; struct tomoyo_number_union mode; struct tomoyo_number_union major; @@ -486,38 +620,16 @@ struct tomoyo_mkdev_acl { }; /* - * tomoyo_path2_acl is a structure which is used for holding an - * entry with two pathnames operation (i.e. link(), rename() and pivot_root()). - * It has following fields. - * - * (1) "head" which is a "struct tomoyo_acl_info". - * (2) "perm" which is a bitmask of permitted operations. - * (3) "name1" is the source/old pathname. - * (4) "name2" is the destination/new pathname. - * - * Directives held by this structure are "allow_rename", "allow_link" and - * "allow_pivot_root". + * Structure for "file rename", "file link" and "file pivot_root" directive. */ struct tomoyo_path2_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_PATH2_ACL */ - u8 perm; + u8 perm; /* Bitmask of values in "enum tomoyo_path2_acl_index". */ struct tomoyo_name_union name1; struct tomoyo_name_union name2; }; -/* - * tomoyo_mount_acl is a structure which is used for holding an - * entry for mount operation. - * It has following fields. - * - * (1) "head" which is a "struct tomoyo_acl_info". - * (2) "dev_name" is the device name. - * (3) "dir_name" is the mount point. - * (4) "fs_type" is the filesystem type. - * (5) "flags" is the mount flags. - * - * Directive held by this structure is "allow_mount". - */ +/* Structure for "file mount" directive. */ struct tomoyo_mount_acl { struct tomoyo_acl_info head; /* type = TOMOYO_TYPE_MOUNT_ACL */ struct tomoyo_name_union dev_name; @@ -526,7 +638,15 @@ struct tomoyo_mount_acl { struct tomoyo_number_union flags; }; -#define TOMOYO_MAX_IO_READ_QUEUE 32 +/* Structure for holding a line from /sys/kernel/security/tomoyo/ interface. */ +struct tomoyo_acl_param { + char *data; + struct list_head *list; + struct tomoyo_policy_namespace *ns; + bool is_delete; +}; + +#define TOMOYO_MAX_IO_READ_QUEUE 64 /* * Structure for reading/writing policy via /sys/kernel/security/tomoyo @@ -538,95 +658,55 @@ struct tomoyo_io_buffer { int (*poll) (struct file *file, poll_table *wait); /* Exclusive lock for this structure. */ struct mutex io_sem; - /* Index returned by tomoyo_read_lock(). */ - int reader_idx; char __user *read_user_buf; - int read_user_buf_avail; + size_t read_user_buf_avail; struct { + struct list_head *ns; struct list_head *domain; struct list_head *group; struct list_head *acl; - int avail; - int step; - int query_index; + size_t avail; + unsigned int step; + unsigned int query_index; u16 index; + u16 cond_index; + u8 acl_group_index; + u8 cond_step; u8 bit; u8 w_pos; bool eof; bool print_this_domain_only; - bool print_execute_only; + bool print_transition_related_only; + bool print_cond_part; const char *w[TOMOYO_MAX_IO_READ_QUEUE]; } r; - /* The position currently writing to. */ - struct tomoyo_domain_info *write_var1; + struct { + struct tomoyo_policy_namespace *ns; + /* The position currently writing to. */ + struct tomoyo_domain_info *domain; + /* Bytes available for writing. */ + size_t avail; + bool is_delete; + } w; /* Buffer for reading. */ char *read_buf; /* Size of read buffer. */ - int readbuf_size; + size_t readbuf_size; /* Buffer for writing. */ char *write_buf; - /* Bytes available for writing. */ - int write_avail; /* Size of write buffer. */ - int writebuf_size; + size_t writebuf_size; /* Type of this interface. */ - u8 type; -}; - -/* - * tomoyo_readable_file is a structure which is used for holding - * "allow_read" entries. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "filename" is a pathname which is allowed to open(O_RDONLY). - */ -struct tomoyo_readable_file { - struct tomoyo_acl_head head; - const struct tomoyo_path_info *filename; -}; - -/* - * tomoyo_no_pattern is a structure which is used for holding - * "file_pattern" entries. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "pattern" is a pathname pattern which is used for converting pathnames - * to pathname patterns during learning mode. - */ -struct tomoyo_no_pattern { - struct tomoyo_acl_head head; - const struct tomoyo_path_info *pattern; -}; - -/* - * tomoyo_no_rewrite is a structure which is used for holding - * "deny_rewrite" entries. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "pattern" is a pathname which is by default not permitted to modify - * already existing content. - */ -struct tomoyo_no_rewrite { - struct tomoyo_acl_head head; - const struct tomoyo_path_info *pattern; + enum tomoyo_securityfs_interface_index type; + /* Users counter protected by tomoyo_io_buffer_list_lock. */ + u8 users; + /* List for telling GC not to kfree() elements. */ + struct list_head list; }; /* - * tomoyo_transition_control is a structure which is used for holding - * "initialize_domain"/"no_initialize_domain"/"keep_domain"/"no_keep_domain" - * entries. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "type" is type of this entry. - * (3) "is_last_name" is a bool which is true if "domainname" is "the last - * component of a domainname", false otherwise. - * (4) "domainname" which is "a domainname" or "the last component of a - * domainname". - * (5) "program" which is a program's pathname. + * Structure for "initialize_domain"/"no_initialize_domain"/"keep_domain"/ + * "no_keep_domain" keyword. */ struct tomoyo_transition_control { struct tomoyo_acl_head head; @@ -637,32 +717,14 @@ struct tomoyo_transition_control { const struct tomoyo_path_info *program; /* Maybe NULL */ }; -/* - * tomoyo_aggregator is a structure which is used for holding - * "aggregator" entries. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "original_name" which is originally requested name. - * (3) "aggregated_name" which is name to rewrite. - */ +/* Structure for "aggregator" keyword. */ struct tomoyo_aggregator { struct tomoyo_acl_head head; const struct tomoyo_path_info *original_name; const struct tomoyo_path_info *aggregated_name; }; -/* - * tomoyo_manager is a structure which is used for holding list of - * domainnames or programs which are permitted to modify configuration via - * /sys/kernel/security/tomoyo/ interface. - * It has following fields. - * - * (1) "head" is "struct tomoyo_acl_head". - * (2) "is_domain" is a bool which is true if "manager" is a domainname, false - * otherwise. - * (3) "manager" is a domainname or a program's pathname. - */ +/* Structure for policy manager. */ struct tomoyo_manager { struct tomoyo_acl_head head; bool is_domain; /* True if manager is a domainname. */ @@ -677,6 +739,7 @@ struct tomoyo_preference { bool permissive_verbose; }; +/* Structure for /sys/kernel/security/tomnoyo/profile interface. */ struct tomoyo_profile { const struct tomoyo_path_info *comment; struct tomoyo_preference *learning; @@ -685,323 +748,409 @@ struct tomoyo_profile { struct tomoyo_preference preference; u8 default_config; u8 config[TOMOYO_MAX_MAC_INDEX + TOMOYO_MAX_MAC_CATEGORY_INDEX]; + unsigned int pref[TOMOYO_MAX_PREF]; +}; + +/* Structure for representing YYYY/MM/DD hh/mm/ss. */ +struct tomoyo_time { + u16 year; + u8 month; + u8 day; + u8 hour; + u8 min; + u8 sec; +}; + +/* Structure for policy namespace. */ +struct tomoyo_policy_namespace { + /* Profile table. Memory is allocated as needed. */ + struct tomoyo_profile *profile_ptr[TOMOYO_MAX_PROFILES]; + /* List of "struct tomoyo_group". */ + struct list_head group_list[TOMOYO_MAX_GROUP]; + /* List of policy. */ + struct list_head policy_list[TOMOYO_MAX_POLICY]; + /* The global ACL referred by "use_group" keyword. */ + struct list_head acl_group[TOMOYO_MAX_ACL_GROUPS]; + /* List for connecting to tomoyo_namespace_list list. */ + struct list_head namespace_list; + /* Profile version. Currently only 20100903 is defined. */ + unsigned int profile_version; + /* Name of this namespace (e.g. "<kernel>", "</usr/sbin/httpd>" ). */ + const char *name; }; /********** Function prototypes. **********/ -/* Check whether the given string starts with the given keyword. */ -bool tomoyo_str_starts(char **src, const char *find); -/* Get tomoyo_realpath() of current process. */ -const char *tomoyo_get_exe(void); -/* Format string. */ -void tomoyo_normalize_line(unsigned char *buffer); -/* Print warning or error message on console. */ -void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -/* Check all profiles currently assigned to domains are defined. */ -void tomoyo_check_profile(void); -/* Open operation for /sys/kernel/security/tomoyo/ interface. */ -int tomoyo_open_control(const u8 type, struct file *file); -/* Close /sys/kernel/security/tomoyo/ interface. */ -int tomoyo_close_control(struct file *file); -/* Poll operation for /sys/kernel/security/tomoyo/ interface. */ -int tomoyo_poll_control(struct file *file, poll_table *wait); -/* Read operation for /sys/kernel/security/tomoyo/ interface. */ -int tomoyo_read_control(struct file *file, char __user *buffer, - const int buffer_len); -/* Write operation for /sys/kernel/security/tomoyo/ interface. */ -int tomoyo_write_control(struct file *file, const char __user *buffer, - const int buffer_len); -/* Check whether the domain has too many ACL entries to hold. */ -bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r); -/* Print out of memory warning message. */ -void tomoyo_warn_oom(const char *function); -/* Check whether the given name matches the given name_union. */ -const struct tomoyo_path_info * -tomoyo_compare_name_union(const struct tomoyo_path_info *name, - const struct tomoyo_name_union *ptr); -/* Check whether the given number matches the given number_union. */ bool tomoyo_compare_number_union(const unsigned long value, const struct tomoyo_number_union *ptr); -int tomoyo_get_mode(const u8 profile, const u8 index); -void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -/* Check whether the domainname is correct. */ +bool tomoyo_condition(struct tomoyo_request_info *r, + const struct tomoyo_condition *cond); bool tomoyo_correct_domain(const unsigned char *domainname); -/* Check whether the token is correct. */ bool tomoyo_correct_path(const char *filename); bool tomoyo_correct_word(const char *string); -/* Check whether the token can be a domainname. */ bool tomoyo_domain_def(const unsigned char *buffer); -bool tomoyo_parse_name_union(const char *filename, - struct tomoyo_name_union *ptr); -/* Check whether the given filename matches the given path_group. */ -const struct tomoyo_path_info * -tomoyo_path_matches_group(const struct tomoyo_path_info *pathname, - const struct tomoyo_group *group); -/* Check whether the given value matches the given number_group. */ +bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r); +bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, + struct tomoyo_page_dump *dump); +bool tomoyo_memory_ok(void *ptr); bool tomoyo_number_matches_group(const unsigned long min, const unsigned long max, const struct tomoyo_group *group); -/* Check whether the given filename matches the given pattern. */ +bool tomoyo_parse_name_union(struct tomoyo_acl_param *param, + struct tomoyo_name_union *ptr); +bool tomoyo_parse_number_union(struct tomoyo_acl_param *param, + struct tomoyo_number_union *ptr); bool tomoyo_path_matches_pattern(const struct tomoyo_path_info *filename, const struct tomoyo_path_info *pattern); - -bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num); -/* Tokenize a line. */ -bool tomoyo_tokenize(char *buffer, char *w[], size_t size); -/* Write domain policy violation warning message to console? */ -bool tomoyo_verbose_mode(const struct tomoyo_domain_info *domain); -/* Fill "struct tomoyo_request_info". */ -int tomoyo_init_request_info(struct tomoyo_request_info *r, - struct tomoyo_domain_info *domain, - const u8 index); -/* Check permission for mount operation. */ -int tomoyo_mount_permission(char *dev_name, struct path *path, char *type, - unsigned long flags, void *data_page); -/* Create "aggregator" entry in exception policy. */ -int tomoyo_write_aggregator(char *data, const bool is_delete); -int tomoyo_write_transition_control(char *data, const bool is_delete, - const u8 type); -/* - * Create "allow_read/write", "allow_execute", "allow_read", "allow_write", - * "allow_create", "allow_unlink", "allow_mkdir", "allow_rmdir", - * "allow_mkfifo", "allow_mksock", "allow_mkblock", "allow_mkchar", - * "allow_truncate", "allow_symlink", "allow_rewrite", "allow_rename" and - * "allow_link" entry in domain policy. - */ -int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain, - const bool is_delete); -/* Create "allow_read" entry in exception policy. */ -int tomoyo_write_globally_readable(char *data, const bool is_delete); -/* Create "allow_mount" entry in domain policy. */ -int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain, - const bool is_delete); -/* Create "deny_rewrite" entry in exception policy. */ -int tomoyo_write_no_rewrite(char *data, const bool is_delete); -/* Create "file_pattern" entry in exception policy. */ -int tomoyo_write_pattern(char *data, const bool is_delete); -/* Create "path_group"/"number_group" entry in exception policy. */ -int tomoyo_write_group(char *data, const bool is_delete, const u8 type); -int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...) - __attribute__ ((format(printf, 2, 3))); -/* Find a domain by the given name. */ -struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname); -/* Find or create a domain by the given name. */ -struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, - const u8 profile); -struct tomoyo_profile *tomoyo_profile(const u8 profile); -/* - * Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group". - */ -struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 type); - -/* Check mode for specified functionality. */ -unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain, - const u8 index); -/* Fill in "struct tomoyo_path_info" members. */ -void tomoyo_fill_path_info(struct tomoyo_path_info *ptr); -/* Run policy loader when /sbin/init starts. */ -void tomoyo_load_policy(const char *filename); - -void tomoyo_put_number_union(struct tomoyo_number_union *ptr); - -/* Convert binary string to ascii string. */ +bool tomoyo_permstr(const char *string, const char *keyword); +bool tomoyo_str_starts(char **src, const char *find); char *tomoyo_encode(const char *str); - -/* - * Returns realpath(3) of the given pathname except that - * ignores chroot'ed root and does not follow the final symlink. - */ -char *tomoyo_realpath_nofollow(const char *pathname); -/* - * Returns realpath(3) of the given pathname except that - * ignores chroot'ed root and the pathname is already solved. - */ +char *tomoyo_init_log(struct tomoyo_request_info *r, int len, const char *fmt, + va_list args); +char *tomoyo_read_token(struct tomoyo_acl_param *param); char *tomoyo_realpath_from_path(struct path *path); -/* Get patterned pathname. */ -const char *tomoyo_pattern(const struct tomoyo_path_info *filename); - -/* Check memory quota. */ -bool tomoyo_memory_ok(void *ptr); -void *tomoyo_commit_ok(void *data, const unsigned int size); - -/* - * Keep the given name on the RAM. - * The RAM is shared, so NEVER try to modify or kfree() the returned name. - */ +char *tomoyo_realpath_nofollow(const char *pathname); +const char *tomoyo_get_exe(void); +const char *tomoyo_yesno(const unsigned int value); +const struct tomoyo_path_info *tomoyo_compare_name_union +(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr); const struct tomoyo_path_info *tomoyo_get_name(const char *name); - -/* Check for memory usage. */ -void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head); - -/* Set memory quota. */ -int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head); - -/* Initialize mm related code. */ -void __init tomoyo_mm_init(void); -int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, - const struct tomoyo_path_info *filename); +const struct tomoyo_path_info *tomoyo_path_matches_group +(const struct tomoyo_path_info *pathname, const struct tomoyo_group *group); int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, struct path *path, const int flag); -int tomoyo_path_number_perm(const u8 operation, struct path *path, - unsigned long number); +int tomoyo_close_control(struct tomoyo_io_buffer *head); +int tomoyo_find_next_domain(struct linux_binprm *bprm); +int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, + const u8 index); +int tomoyo_init_request_info(struct tomoyo_request_info *r, + struct tomoyo_domain_info *domain, + const u8 index); int tomoyo_mkdev_perm(const u8 operation, struct path *path, const unsigned int mode, unsigned int dev); -int tomoyo_path_perm(const u8 operation, struct path *path); +int tomoyo_mount_permission(char *dev_name, struct path *path, + const char *type, unsigned long flags, + void *data_page); +int tomoyo_open_control(const u8 type, struct file *file); int tomoyo_path2_perm(const u8 operation, struct path *path1, struct path *path2); -int tomoyo_find_next_domain(struct linux_binprm *bprm); - -void tomoyo_print_ulong(char *buffer, const int buffer_len, - const unsigned long value, const u8 type); - -/* Drop refcount on tomoyo_name_union. */ -void tomoyo_put_name_union(struct tomoyo_name_union *ptr); - -/* Run garbage collector. */ -void tomoyo_run_gc(void); - -void tomoyo_memory_free(void *ptr); - +int tomoyo_path_number_perm(const u8 operation, struct path *path, + unsigned long number); +int tomoyo_path_perm(const u8 operation, struct path *path, + const char *target); +int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, + const struct tomoyo_path_info *filename); +int tomoyo_poll_control(struct file *file, poll_table *wait); +int tomoyo_poll_log(struct file *file, poll_table *wait); +int tomoyo_supervisor(struct tomoyo_request_info *r, const char *fmt, ...) + __printf(2, 3); int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, - bool is_delete, struct tomoyo_domain_info *domain, - bool (*check_duplicate) (const struct tomoyo_acl_info - *, - const struct tomoyo_acl_info - *), - bool (*merge_duplicate) (struct tomoyo_acl_info *, - struct tomoyo_acl_info *, - const bool)); + struct tomoyo_acl_param *param, + bool (*check_duplicate) + (const struct tomoyo_acl_info *, + const struct tomoyo_acl_info *), + bool (*merge_duplicate) + (struct tomoyo_acl_info *, struct tomoyo_acl_info *, + const bool)); int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, - bool is_delete, struct list_head *list, - bool (*check_duplicate) (const struct tomoyo_acl_head - *, - const struct tomoyo_acl_head - *)); + struct tomoyo_acl_param *param, + bool (*check_duplicate) + (const struct tomoyo_acl_head *, + const struct tomoyo_acl_head *)); +int tomoyo_write_aggregator(struct tomoyo_acl_param *param); +int tomoyo_write_file(struct tomoyo_acl_param *param); +int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type); +int tomoyo_write_transition_control(struct tomoyo_acl_param *param, + const u8 type); +ssize_t tomoyo_read_control(struct tomoyo_io_buffer *head, char __user *buffer, + const int buffer_len); +ssize_t tomoyo_write_control(struct tomoyo_io_buffer *head, + const char __user *buffer, const int buffer_len); +struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param); +struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, + const bool transit); +struct tomoyo_domain_info *tomoyo_find_domain(const char *domainname); +struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param, + const u8 idx); +struct tomoyo_policy_namespace *tomoyo_assign_namespace +(const char *domainname); +struct tomoyo_profile *tomoyo_profile(const struct tomoyo_policy_namespace *ns, + const u8 profile); +unsigned int tomoyo_check_flags(const struct tomoyo_domain_info *domain, + const u8 index); +u8 tomoyo_parse_ulong(unsigned long *result, char **str); +void *tomoyo_commit_ok(void *data, const unsigned int size); +void __init tomoyo_load_builtin_policy(void); +void __init tomoyo_mm_init(void); void tomoyo_check_acl(struct tomoyo_request_info *r, bool (*check_entry) (struct tomoyo_request_info *, const struct tomoyo_acl_info *)); +void tomoyo_check_profile(void); +void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp); +void tomoyo_del_condition(struct list_head *element); +void tomoyo_fill_path_info(struct tomoyo_path_info *ptr); +void tomoyo_get_attributes(struct tomoyo_obj_info *obj); +void tomoyo_init_policy_namespace(struct tomoyo_policy_namespace *ns); +void tomoyo_io_printf(struct tomoyo_io_buffer *head, const char *fmt, ...) + __printf(2, 3); +void tomoyo_load_policy(const char *filename); +void tomoyo_memory_free(void *ptr); +void tomoyo_normalize_line(unsigned char *buffer); +void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register); +void tomoyo_print_ulong(char *buffer, const int buffer_len, + const unsigned long value, const u8 type); +void tomoyo_put_name_union(struct tomoyo_name_union *ptr); +void tomoyo_put_number_union(struct tomoyo_number_union *ptr); +void tomoyo_read_log(struct tomoyo_io_buffer *head); +void tomoyo_update_stat(const u8 index); +void tomoyo_warn_oom(const char *function); +void tomoyo_write_log(struct tomoyo_request_info *r, const char *fmt, ...) + __printf(2, 3); +void tomoyo_write_log2(struct tomoyo_request_info *r, int len, const char *fmt, + va_list args); /********** External variable definitions. **********/ -/* Lock for GC. */ -extern struct srcu_struct tomoyo_ss; - -/* The list for "struct tomoyo_domain_info". */ +extern bool tomoyo_policy_loaded; +extern const char * const tomoyo_condition_keyword +[TOMOYO_MAX_CONDITION_KEYWORD]; +extern const char * const tomoyo_dif[TOMOYO_MAX_DOMAIN_INFO_FLAGS]; +extern const char * const tomoyo_mac_keywords[TOMOYO_MAX_MAC_INDEX + + TOMOYO_MAX_MAC_CATEGORY_INDEX]; +extern const char * const tomoyo_mode[TOMOYO_CONFIG_MAX_MODE]; +extern const char * const tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION]; +extern const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX]; +extern const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION]; +extern const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION]; +extern const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION]; +extern struct list_head tomoyo_condition_list; extern struct list_head tomoyo_domain_list; - -extern struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY]; -extern struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP]; extern struct list_head tomoyo_name_list[TOMOYO_MAX_HASH]; - -/* Lock for protecting policy. */ +extern struct list_head tomoyo_namespace_list; extern struct mutex tomoyo_policy_lock; - -/* Has /sbin/init started? */ -extern bool tomoyo_policy_loaded; - -/* The kernel's domain. */ +extern struct srcu_struct tomoyo_ss; extern struct tomoyo_domain_info tomoyo_kernel_domain; - -extern const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION]; -extern const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION]; -extern const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION]; -extern const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION]; - -extern unsigned int tomoyo_quota_for_query; -extern unsigned int tomoyo_query_memory_size; +extern struct tomoyo_policy_namespace tomoyo_kernel_namespace; +extern unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; +extern unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; /********** Inlined functions. **********/ +/** + * tomoyo_read_lock - Take lock for protecting policy. + * + * Returns index number for tomoyo_read_unlock(). + */ static inline int tomoyo_read_lock(void) { return srcu_read_lock(&tomoyo_ss); } +/** + * tomoyo_read_unlock - Release lock for protecting policy. + * + * @idx: Index number returned by tomoyo_read_lock(). + * + * Returns nothing. + */ static inline void tomoyo_read_unlock(int idx) { srcu_read_unlock(&tomoyo_ss, idx); } -/* strcmp() for "struct tomoyo_path_info" structure. */ -static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a, - const struct tomoyo_path_info *b) +/** + * tomoyo_sys_getppid - Copy of getppid(). + * + * Returns parent process's PID. + * + * Alpha does not have getppid() defined. To be able to build this module on + * Alpha, I have to copy getppid() from kernel/timer.c. + */ +static inline pid_t tomoyo_sys_getppid(void) { - return a->hash != b->hash || strcmp(a->name, b->name); + pid_t pid; + rcu_read_lock(); + pid = task_tgid_vnr(current->real_parent); + rcu_read_unlock(); + return pid; } /** - * tomoyo_valid - Check whether the character is a valid char. + * tomoyo_sys_getpid - Copy of getpid(). * - * @c: The character to check. + * Returns current thread's PID. * - * Returns true if @c is a valid character, false otherwise. + * Alpha does not have getpid() defined. To be able to build this module on + * Alpha, I have to copy getpid() from kernel/timer.c. */ -static inline bool tomoyo_valid(const unsigned char c) +static inline pid_t tomoyo_sys_getpid(void) { - return c > ' ' && c < 127; + return task_tgid_vnr(current); } /** - * tomoyo_invalid - Check whether the character is an invalid char. + * tomoyo_pathcmp - strcmp() for "struct tomoyo_path_info" structure. * - * @c: The character to check. + * @a: Pointer to "struct tomoyo_path_info". + * @b: Pointer to "struct tomoyo_path_info". * - * Returns true if @c is an invalid character, false otherwise. + * Returns true if @a == @b, false otherwise. */ -static inline bool tomoyo_invalid(const unsigned char c) +static inline bool tomoyo_pathcmp(const struct tomoyo_path_info *a, + const struct tomoyo_path_info *b) { - return c && (c <= ' ' || c >= 127); + return a->hash != b->hash || strcmp(a->name, b->name); } +/** + * tomoyo_put_name - Drop reference on "struct tomoyo_name". + * + * @name: Pointer to "struct tomoyo_path_info". Maybe NULL. + * + * Returns nothing. + */ static inline void tomoyo_put_name(const struct tomoyo_path_info *name) { if (name) { struct tomoyo_name *ptr = container_of(name, typeof(*ptr), entry); - atomic_dec(&ptr->users); + atomic_dec(&ptr->head.users); } } +/** + * tomoyo_put_condition - Drop reference on "struct tomoyo_condition". + * + * @cond: Pointer to "struct tomoyo_condition". Maybe NULL. + * + * Returns nothing. + */ +static inline void tomoyo_put_condition(struct tomoyo_condition *cond) +{ + if (cond) + atomic_dec(&cond->head.users); +} + +/** + * tomoyo_put_group - Drop reference on "struct tomoyo_group". + * + * @group: Pointer to "struct tomoyo_group". Maybe NULL. + * + * Returns nothing. + */ static inline void tomoyo_put_group(struct tomoyo_group *group) { if (group) - atomic_dec(&group->users); + atomic_dec(&group->head.users); } +/** + * tomoyo_domain - Get "struct tomoyo_domain_info" for current thread. + * + * Returns pointer to "struct tomoyo_domain_info" for current thread. + */ static inline struct tomoyo_domain_info *tomoyo_domain(void) { return current_cred()->security; } +/** + * tomoyo_real_domain - Get "struct tomoyo_domain_info" for specified thread. + * + * @task: Pointer to "struct task_struct". + * + * Returns pointer to "struct tomoyo_security" for specified thread. + */ static inline struct tomoyo_domain_info *tomoyo_real_domain(struct task_struct *task) { return task_cred_xxx(task, security); } -static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *p1, - const struct tomoyo_acl_info *p2) +/** + * tomoyo_same_name_union - Check for duplicated "struct tomoyo_name_union" entry. + * + * @a: Pointer to "struct tomoyo_name_union". + * @b: Pointer to "struct tomoyo_name_union". + * + * Returns true if @a == @b, false otherwise. + */ +static inline bool tomoyo_same_name_union +(const struct tomoyo_name_union *a, const struct tomoyo_name_union *b) { - return p1->type == p2->type; + return a->filename == b->filename && a->group == b->group; } -static inline bool tomoyo_same_name_union -(const struct tomoyo_name_union *p1, const struct tomoyo_name_union *p2) +/** + * tomoyo_same_number_union - Check for duplicated "struct tomoyo_number_union" entry. + * + * @a: Pointer to "struct tomoyo_number_union". + * @b: Pointer to "struct tomoyo_number_union". + * + * Returns true if @a == @b, false otherwise. + */ +static inline bool tomoyo_same_number_union +(const struct tomoyo_number_union *a, const struct tomoyo_number_union *b) { - return p1->filename == p2->filename && p1->group == p2->group && - p1->is_group == p2->is_group; + return a->values[0] == b->values[0] && a->values[1] == b->values[1] && + a->group == b->group && a->value_type[0] == b->value_type[0] && + a->value_type[1] == b->value_type[1]; } -static inline bool tomoyo_same_number_union -(const struct tomoyo_number_union *p1, const struct tomoyo_number_union *p2) +/** + * tomoyo_current_namespace - Get "struct tomoyo_policy_namespace" for current thread. + * + * Returns pointer to "struct tomoyo_policy_namespace" for current thread. + */ +static inline struct tomoyo_policy_namespace *tomoyo_current_namespace(void) +{ + return tomoyo_domain()->ns; +} + +#if defined(CONFIG_SLOB) + +/** + * tomoyo_round2 - Round up to power of 2 for calculating memory usage. + * + * @size: Size to be rounded up. + * + * Returns @size. + * + * Since SLOB does not round up, this function simply returns @size. + */ +static inline int tomoyo_round2(size_t size) +{ + return size; +} + +#else + +/** + * tomoyo_round2 - Round up to power of 2 for calculating memory usage. + * + * @size: Size to be rounded up. + * + * Returns rounded size. + * + * Strictly speaking, SLAB may be able to allocate (e.g.) 96 bytes instead of + * (e.g.) 128 bytes. + */ +static inline int tomoyo_round2(size_t size) { - return p1->values[0] == p2->values[0] && p1->values[1] == p2->values[1] - && p1->group == p2->group && p1->min_type == p2->min_type && - p1->max_type == p2->max_type && p1->is_group == p2->is_group; +#if PAGE_SIZE == 4096 + size_t bsize = 32; +#else + size_t bsize = 64; +#endif + if (!size) + return 0; + while (size > bsize) + bsize <<= 1; + return bsize; } +#endif + /** * list_for_each_cookie - iterate over a list with cookie. * @pos: the &struct list_head to use as a loop cursor. diff --git a/security/tomoyo/condition.c b/security/tomoyo/condition.c new file mode 100644 index 000000000000..8a05f71eaf67 --- /dev/null +++ b/security/tomoyo/condition.c @@ -0,0 +1,1035 @@ +/* + * security/tomoyo/condition.c + * + * Copyright (C) 2005-2011 NTT DATA CORPORATION + */ + +#include "common.h" +#include <linux/slab.h> + +/* List of "struct tomoyo_condition". */ +LIST_HEAD(tomoyo_condition_list); + +/** + * tomoyo_argv - Check argv[] in "struct linux_binbrm". + * + * @index: Index number of @arg_ptr. + * @arg_ptr: Contents of argv[@index]. + * @argc: Length of @argv. + * @argv: Pointer to "struct tomoyo_argv". + * @checked: Set to true if @argv[@index] was found. + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_argv(const unsigned int index, const char *arg_ptr, + const int argc, const struct tomoyo_argv *argv, + u8 *checked) +{ + int i; + struct tomoyo_path_info arg; + arg.name = arg_ptr; + for (i = 0; i < argc; argv++, checked++, i++) { + bool result; + if (index != argv->index) + continue; + *checked = 1; + tomoyo_fill_path_info(&arg); + result = tomoyo_path_matches_pattern(&arg, argv->value); + if (argv->is_not) + result = !result; + if (!result) + return false; + } + return true; +} + +/** + * tomoyo_envp - Check envp[] in "struct linux_binbrm". + * + * @env_name: The name of environment variable. + * @env_value: The value of environment variable. + * @envc: Length of @envp. + * @envp: Pointer to "struct tomoyo_envp". + * @checked: Set to true if @envp[@env_name] was found. + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_envp(const char *env_name, const char *env_value, + const int envc, const struct tomoyo_envp *envp, + u8 *checked) +{ + int i; + struct tomoyo_path_info name; + struct tomoyo_path_info value; + name.name = env_name; + tomoyo_fill_path_info(&name); + value.name = env_value; + tomoyo_fill_path_info(&value); + for (i = 0; i < envc; envp++, checked++, i++) { + bool result; + if (!tomoyo_path_matches_pattern(&name, envp->name)) + continue; + *checked = 1; + if (envp->value) { + result = tomoyo_path_matches_pattern(&value, + envp->value); + if (envp->is_not) + result = !result; + } else { + result = true; + if (!envp->is_not) + result = !result; + } + if (!result) + return false; + } + return true; +} + +/** + * tomoyo_scan_bprm - Scan "struct linux_binprm". + * + * @ee: Pointer to "struct tomoyo_execve". + * @argc: Length of @argc. + * @argv: Pointer to "struct tomoyo_argv". + * @envc: Length of @envp. + * @envp: Poiner to "struct tomoyo_envp". + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_scan_bprm(struct tomoyo_execve *ee, + const u16 argc, const struct tomoyo_argv *argv, + const u16 envc, const struct tomoyo_envp *envp) +{ + struct linux_binprm *bprm = ee->bprm; + struct tomoyo_page_dump *dump = &ee->dump; + char *arg_ptr = ee->tmp; + int arg_len = 0; + unsigned long pos = bprm->p; + int offset = pos % PAGE_SIZE; + int argv_count = bprm->argc; + int envp_count = bprm->envc; + bool result = true; + u8 local_checked[32]; + u8 *checked; + if (argc + envc <= sizeof(local_checked)) { + checked = local_checked; + memset(local_checked, 0, sizeof(local_checked)); + } else { + checked = kzalloc(argc + envc, GFP_NOFS); + if (!checked) + return false; + } + while (argv_count || envp_count) { + if (!tomoyo_dump_page(bprm, pos, dump)) { + result = false; + goto out; + } + pos += PAGE_SIZE - offset; + while (offset < PAGE_SIZE) { + /* Read. */ + const char *kaddr = dump->data; + const unsigned char c = kaddr[offset++]; + if (c && arg_len < TOMOYO_EXEC_TMPSIZE - 10) { + if (c == '\\') { + arg_ptr[arg_len++] = '\\'; + arg_ptr[arg_len++] = '\\'; + } else if (c > ' ' && c < 127) { + arg_ptr[arg_len++] = c; + } else { + arg_ptr[arg_len++] = '\\'; + arg_ptr[arg_len++] = (c >> 6) + '0'; + arg_ptr[arg_len++] = + ((c >> 3) & 7) + '0'; + arg_ptr[arg_len++] = (c & 7) + '0'; + } + } else { + arg_ptr[arg_len] = '\0'; + } + if (c) + continue; + /* Check. */ + if (argv_count) { + if (!tomoyo_argv(bprm->argc - argv_count, + arg_ptr, argc, argv, + checked)) { + result = false; + break; + } + argv_count--; + } else if (envp_count) { + char *cp = strchr(arg_ptr, '='); + if (cp) { + *cp = '\0'; + if (!tomoyo_envp(arg_ptr, cp + 1, + envc, envp, + checked + argc)) { + result = false; + break; + } + } + envp_count--; + } else { + break; + } + arg_len = 0; + } + offset = 0; + if (!result) + break; + } +out: + if (result) { + int i; + /* Check not-yet-checked entries. */ + for (i = 0; i < argc; i++) { + if (checked[i]) + continue; + /* + * Return true only if all unchecked indexes in + * bprm->argv[] are not matched. + */ + if (argv[i].is_not) + continue; + result = false; + break; + } + for (i = 0; i < envc; envp++, i++) { + if (checked[argc + i]) + continue; + /* + * Return true only if all unchecked environ variables + * in bprm->envp[] are either undefined or not matched. + */ + if ((!envp->value && !envp->is_not) || + (envp->value && envp->is_not)) + continue; + result = false; + break; + } + } + if (checked != local_checked) + kfree(checked); + return result; +} + +/** + * tomoyo_scan_exec_realpath - Check "exec.realpath" parameter of "struct tomoyo_condition". + * + * @file: Pointer to "struct file". + * @ptr: Pointer to "struct tomoyo_name_union". + * @match: True if "exec.realpath=", false if "exec.realpath!=". + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_scan_exec_realpath(struct file *file, + const struct tomoyo_name_union *ptr, + const bool match) +{ + bool result; + struct tomoyo_path_info exe; + if (!file) + return false; + exe.name = tomoyo_realpath_from_path(&file->f_path); + if (!exe.name) + return false; + tomoyo_fill_path_info(&exe); + result = tomoyo_compare_name_union(&exe, ptr); + kfree(exe.name); + return result == match; +} + +/** + * tomoyo_get_dqword - tomoyo_get_name() for a quoted string. + * + * @start: String to save. + * + * Returns pointer to "struct tomoyo_path_info" on success, NULL otherwise. + */ +static const struct tomoyo_path_info *tomoyo_get_dqword(char *start) +{ + char *cp = start + strlen(start) - 1; + if (cp == start || *start++ != '"' || *cp != '"') + return NULL; + *cp = '\0'; + if (*start && !tomoyo_correct_word(start)) + return NULL; + return tomoyo_get_name(start); +} + +/** + * tomoyo_parse_name_union_quoted - Parse a quoted word. + * + * @param: Pointer to "struct tomoyo_acl_param". + * @ptr: Pointer to "struct tomoyo_name_union". + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_parse_name_union_quoted(struct tomoyo_acl_param *param, + struct tomoyo_name_union *ptr) +{ + char *filename = param->data; + if (*filename == '@') + return tomoyo_parse_name_union(param, ptr); + ptr->filename = tomoyo_get_dqword(filename); + return ptr->filename != NULL; +} + +/** + * tomoyo_parse_argv - Parse an argv[] condition part. + * + * @left: Lefthand value. + * @right: Righthand value. + * @argv: Pointer to "struct tomoyo_argv". + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_parse_argv(char *left, char *right, + struct tomoyo_argv *argv) +{ + if (tomoyo_parse_ulong(&argv->index, &left) != + TOMOYO_VALUE_TYPE_DECIMAL || *left++ != ']' || *left) + return false; + argv->value = tomoyo_get_dqword(right); + return argv->value != NULL; +} + +/** + * tomoyo_parse_envp - Parse an envp[] condition part. + * + * @left: Lefthand value. + * @right: Righthand value. + * @envp: Pointer to "struct tomoyo_envp". + * + * Returns true on success, false otherwise. + */ +static bool tomoyo_parse_envp(char *left, char *right, + struct tomoyo_envp *envp) +{ + const struct tomoyo_path_info *name; + const struct tomoyo_path_info *value; + char *cp = left + strlen(left) - 1; + if (*cp-- != ']' || *cp != '"') + goto out; + *cp = '\0'; + if (!tomoyo_correct_word(left)) + goto out; + name = tomoyo_get_name(left); + if (!name) + goto out; + if (!strcmp(right, "NULL")) { + value = NULL; + } else { + value = tomoyo_get_dqword(right); + if (!value) { + tomoyo_put_name(name); + goto out; + } + } + envp->name = name; + envp->value = value; + return true; +out: + return false; +} + +/** + * tomoyo_same_condition - Check for duplicated "struct tomoyo_condition" entry. + * + * @a: Pointer to "struct tomoyo_condition". + * @b: Pointer to "struct tomoyo_condition". + * + * Returns true if @a == @b, false otherwise. + */ +static inline bool tomoyo_same_condition(const struct tomoyo_condition *a, + const struct tomoyo_condition *b) +{ + return a->size == b->size && a->condc == b->condc && + a->numbers_count == b->numbers_count && + a->names_count == b->names_count && + a->argc == b->argc && a->envc == b->envc && + !memcmp(a + 1, b + 1, a->size - sizeof(*a)); +} + +/** + * tomoyo_condition_type - Get condition type. + * + * @word: Keyword string. + * + * Returns one of values in "enum tomoyo_conditions_index" on success, + * TOMOYO_MAX_CONDITION_KEYWORD otherwise. + */ +static u8 tomoyo_condition_type(const char *word) +{ + u8 i; + for (i = 0; i < TOMOYO_MAX_CONDITION_KEYWORD; i++) { + if (!strcmp(word, tomoyo_condition_keyword[i])) + break; + } + return i; +} + +/* Define this to enable debug mode. */ +/* #define DEBUG_CONDITION */ + +#ifdef DEBUG_CONDITION +#define dprintk printk +#else +#define dprintk(...) do { } while (0) +#endif + +/** + * tomoyo_commit_condition - Commit "struct tomoyo_condition". + * + * @entry: Pointer to "struct tomoyo_condition". + * + * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise. + * + * This function merges duplicated entries. This function returns NULL if + * @entry is not duplicated but memory quota for policy has exceeded. + */ +static struct tomoyo_condition *tomoyo_commit_condition +(struct tomoyo_condition *entry) +{ + struct tomoyo_condition *ptr; + bool found = false; + if (mutex_lock_interruptible(&tomoyo_policy_lock)) { + dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__); + ptr = NULL; + found = true; + goto out; + } + list_for_each_entry_rcu(ptr, &tomoyo_condition_list, head.list) { + if (!tomoyo_same_condition(ptr, entry)) + continue; + /* Same entry found. Share this entry. */ + atomic_inc(&ptr->head.users); + found = true; + break; + } + if (!found) { + if (tomoyo_memory_ok(entry)) { + atomic_set(&entry->head.users, 1); + list_add_rcu(&entry->head.list, + &tomoyo_condition_list); + } else { + found = true; + ptr = NULL; + } + } + mutex_unlock(&tomoyo_policy_lock); +out: + if (found) { + tomoyo_del_condition(&entry->head.list); + kfree(entry); + entry = ptr; + } + return entry; +} + +/** + * tomoyo_get_condition - Parse condition part. + * + * @param: Pointer to "struct tomoyo_acl_param". + * + * Returns pointer to "struct tomoyo_condition" on success, NULL otherwise. + */ +struct tomoyo_condition *tomoyo_get_condition(struct tomoyo_acl_param *param) +{ + struct tomoyo_condition *entry = NULL; + struct tomoyo_condition_element *condp = NULL; + struct tomoyo_number_union *numbers_p = NULL; + struct tomoyo_name_union *names_p = NULL; + struct tomoyo_argv *argv = NULL; + struct tomoyo_envp *envp = NULL; + struct tomoyo_condition e = { }; + char * const start_of_string = param->data; + char * const end_of_string = start_of_string + strlen(start_of_string); + char *pos; +rerun: + pos = start_of_string; + while (1) { + u8 left = -1; + u8 right = -1; + char *left_word = pos; + char *cp; + char *right_word; + bool is_not; + if (!*left_word) + break; + /* + * Since left-hand condition does not allow use of "path_group" + * or "number_group" and environment variable's names do not + * accept '=', it is guaranteed that the original line consists + * of one or more repetition of $left$operator$right blocks + * where "$left is free from '=' and ' '" and "$operator is + * either '=' or '!='" and "$right is free from ' '". + * Therefore, we can reconstruct the original line at the end + * of dry run even if we overwrite $operator with '\0'. + */ + cp = strchr(pos, ' '); + if (cp) { + *cp = '\0'; /* Will restore later. */ + pos = cp + 1; + } else { + pos = ""; + } + right_word = strchr(left_word, '='); + if (!right_word || right_word == left_word) + goto out; + is_not = *(right_word - 1) == '!'; + if (is_not) + *(right_word++ - 1) = '\0'; /* Will restore later. */ + else if (*(right_word + 1) != '=') + *right_word++ = '\0'; /* Will restore later. */ + else + goto out; + dprintk(KERN_WARNING "%u: <%s>%s=<%s>\n", __LINE__, left_word, + is_not ? "!" : "", right_word); + if (!strncmp(left_word, "exec.argv[", 10)) { + if (!argv) { + e.argc++; + e.condc++; + } else { + e.argc--; + e.condc--; + left = TOMOYO_ARGV_ENTRY; + argv->is_not = is_not; + if (!tomoyo_parse_argv(left_word + 10, + right_word, argv++)) + goto out; + } + goto store_value; + } + if (!strncmp(left_word, "exec.envp[\"", 11)) { + if (!envp) { + e.envc++; + e.condc++; + } else { + e.envc--; + e.condc--; + left = TOMOYO_ENVP_ENTRY; + envp->is_not = is_not; + if (!tomoyo_parse_envp(left_word + 11, + right_word, envp++)) + goto out; + } + goto store_value; + } + left = tomoyo_condition_type(left_word); + dprintk(KERN_WARNING "%u: <%s> left=%u\n", __LINE__, left_word, + left); + if (left == TOMOYO_MAX_CONDITION_KEYWORD) { + if (!numbers_p) { + e.numbers_count++; + } else { + e.numbers_count--; + left = TOMOYO_NUMBER_UNION; + param->data = left_word; + if (*left_word == '@' || + !tomoyo_parse_number_union(param, + numbers_p++)) + goto out; + } + } + if (!condp) + e.condc++; + else + e.condc--; + if (left == TOMOYO_EXEC_REALPATH || + left == TOMOYO_SYMLINK_TARGET) { + if (!names_p) { + e.names_count++; + } else { + e.names_count--; + right = TOMOYO_NAME_UNION; + param->data = right_word; + if (!tomoyo_parse_name_union_quoted(param, + names_p++)) + goto out; + } + goto store_value; + } + right = tomoyo_condition_type(right_word); + if (right == TOMOYO_MAX_CONDITION_KEYWORD) { + if (!numbers_p) { + e.numbers_count++; + } else { + e.numbers_count--; + right = TOMOYO_NUMBER_UNION; + param->data = right_word; + if (!tomoyo_parse_number_union(param, + numbers_p++)) + goto out; + } + } +store_value: + if (!condp) { + dprintk(KERN_WARNING "%u: dry_run left=%u right=%u " + "match=%u\n", __LINE__, left, right, !is_not); + continue; + } + condp->left = left; + condp->right = right; + condp->equals = !is_not; + dprintk(KERN_WARNING "%u: left=%u right=%u match=%u\n", + __LINE__, condp->left, condp->right, + condp->equals); + condp++; + } + dprintk(KERN_INFO "%u: cond=%u numbers=%u names=%u ac=%u ec=%u\n", + __LINE__, e.condc, e.numbers_count, e.names_count, e.argc, + e.envc); + if (entry) { + BUG_ON(e.names_count | e.numbers_count | e.argc | e.envc | + e.condc); + return tomoyo_commit_condition(entry); + } + e.size = sizeof(*entry) + + e.condc * sizeof(struct tomoyo_condition_element) + + e.numbers_count * sizeof(struct tomoyo_number_union) + + e.names_count * sizeof(struct tomoyo_name_union) + + e.argc * sizeof(struct tomoyo_argv) + + e.envc * sizeof(struct tomoyo_envp); + entry = kzalloc(e.size, GFP_NOFS); + if (!entry) + return NULL; + *entry = e; + condp = (struct tomoyo_condition_element *) (entry + 1); + numbers_p = (struct tomoyo_number_union *) (condp + e.condc); + names_p = (struct tomoyo_name_union *) (numbers_p + e.numbers_count); + argv = (struct tomoyo_argv *) (names_p + e.names_count); + envp = (struct tomoyo_envp *) (argv + e.argc); + { + bool flag = false; + for (pos = start_of_string; pos < end_of_string; pos++) { + if (*pos) + continue; + if (flag) /* Restore " ". */ + *pos = ' '; + else if (*(pos + 1) == '=') /* Restore "!=". */ + *pos = '!'; + else /* Restore "=". */ + *pos = '='; + flag = !flag; + } + } + goto rerun; +out: + dprintk(KERN_WARNING "%u: %s failed\n", __LINE__, __func__); + if (entry) { + tomoyo_del_condition(&entry->head.list); + kfree(entry); + } + return NULL; +} + +/** + * tomoyo_get_attributes - Revalidate "struct inode". + * + * @obj: Pointer to "struct tomoyo_obj_info". + * + * Returns nothing. + */ +void tomoyo_get_attributes(struct tomoyo_obj_info *obj) +{ + u8 i; + struct dentry *dentry = NULL; + + for (i = 0; i < TOMOYO_MAX_PATH_STAT; i++) { + struct inode *inode; + switch (i) { + case TOMOYO_PATH1: + dentry = obj->path1.dentry; + if (!dentry) + continue; + break; + case TOMOYO_PATH2: + dentry = obj->path2.dentry; + if (!dentry) + continue; + break; + default: + if (!dentry) + continue; + dentry = dget_parent(dentry); + break; + } + inode = dentry->d_inode; + if (inode) { + struct tomoyo_mini_stat *stat = &obj->stat[i]; + stat->uid = inode->i_uid; + stat->gid = inode->i_gid; + stat->ino = inode->i_ino; + stat->mode = inode->i_mode; + stat->dev = inode->i_sb->s_dev; + stat->rdev = inode->i_rdev; + obj->stat_valid[i] = true; + } + if (i & 1) /* i == TOMOYO_PATH1_PARENT || + i == TOMOYO_PATH2_PARENT */ + dput(dentry); + } +} + +/** + * tomoyo_condition - Check condition part. + * + * @r: Pointer to "struct tomoyo_request_info". + * @cond: Pointer to "struct tomoyo_condition". Maybe NULL. + * + * Returns true on success, false otherwise. + * + * Caller holds tomoyo_read_lock(). + */ +bool tomoyo_condition(struct tomoyo_request_info *r, + const struct tomoyo_condition *cond) +{ + u32 i; + unsigned long min_v[2] = { 0, 0 }; + unsigned long max_v[2] = { 0, 0 }; + const struct tomoyo_condition_element *condp; + const struct tomoyo_number_union *numbers_p; + const struct tomoyo_name_union *names_p; + const struct tomoyo_argv *argv; + const struct tomoyo_envp *envp; + struct tomoyo_obj_info *obj; + u16 condc; + u16 argc; + u16 envc; + struct linux_binprm *bprm = NULL; + if (!cond) + return true; + condc = cond->condc; + argc = cond->argc; + envc = cond->envc; + obj = r->obj; + if (r->ee) + bprm = r->ee->bprm; + if (!bprm && (argc || envc)) + return false; + condp = (struct tomoyo_condition_element *) (cond + 1); + numbers_p = (const struct tomoyo_number_union *) (condp + condc); + names_p = (const struct tomoyo_name_union *) + (numbers_p + cond->numbers_count); + argv = (const struct tomoyo_argv *) (names_p + cond->names_count); + envp = (const struct tomoyo_envp *) (argv + argc); + for (i = 0; i < condc; i++) { + const bool match = condp->equals; + const u8 left = condp->left; + const u8 right = condp->right; + bool is_bitop[2] = { false, false }; + u8 j; + condp++; + /* Check argv[] and envp[] later. */ + if (left == TOMOYO_ARGV_ENTRY || left == TOMOYO_ENVP_ENTRY) + continue; + /* Check string expressions. */ + if (right == TOMOYO_NAME_UNION) { + const struct tomoyo_name_union *ptr = names_p++; + switch (left) { + struct tomoyo_path_info *symlink; + struct tomoyo_execve *ee; + struct file *file; + case TOMOYO_SYMLINK_TARGET: + symlink = obj ? obj->symlink_target : NULL; + if (!symlink || + !tomoyo_compare_name_union(symlink, ptr) + == match) + goto out; + break; + case TOMOYO_EXEC_REALPATH: + ee = r->ee; + file = ee ? ee->bprm->file : NULL; + if (!tomoyo_scan_exec_realpath(file, ptr, + match)) + goto out; + break; + } + continue; + } + /* Check numeric or bit-op expressions. */ + for (j = 0; j < 2; j++) { + const u8 index = j ? right : left; + unsigned long value = 0; + switch (index) { + case TOMOYO_TASK_UID: + value = current_uid(); + break; + case TOMOYO_TASK_EUID: + value = current_euid(); + break; + case TOMOYO_TASK_SUID: + value = current_suid(); + break; + case TOMOYO_TASK_FSUID: + value = current_fsuid(); + break; + case TOMOYO_TASK_GID: + value = current_gid(); + break; + case TOMOYO_TASK_EGID: + value = current_egid(); + break; + case TOMOYO_TASK_SGID: + value = current_sgid(); + break; + case TOMOYO_TASK_FSGID: + value = current_fsgid(); + break; + case TOMOYO_TASK_PID: + value = tomoyo_sys_getpid(); + break; + case TOMOYO_TASK_PPID: + value = tomoyo_sys_getppid(); + break; + case TOMOYO_TYPE_IS_SOCKET: + value = S_IFSOCK; + break; + case TOMOYO_TYPE_IS_SYMLINK: + value = S_IFLNK; + break; + case TOMOYO_TYPE_IS_FILE: + value = S_IFREG; + break; + case TOMOYO_TYPE_IS_BLOCK_DEV: + value = S_IFBLK; + break; + case TOMOYO_TYPE_IS_DIRECTORY: + value = S_IFDIR; + break; + case TOMOYO_TYPE_IS_CHAR_DEV: + value = S_IFCHR; + break; + case TOMOYO_TYPE_IS_FIFO: + value = S_IFIFO; + break; + case TOMOYO_MODE_SETUID: + value = S_ISUID; + break; + case TOMOYO_MODE_SETGID: + value = S_ISGID; + break; + case TOMOYO_MODE_STICKY: + value = S_ISVTX; + break; + case TOMOYO_MODE_OWNER_READ: + value = S_IRUSR; + break; + case TOMOYO_MODE_OWNER_WRITE: + value = S_IWUSR; + break; + case TOMOYO_MODE_OWNER_EXECUTE: + value = S_IXUSR; + break; + case TOMOYO_MODE_GROUP_READ: + value = S_IRGRP; + break; + case TOMOYO_MODE_GROUP_WRITE: + value = S_IWGRP; + break; + case TOMOYO_MODE_GROUP_EXECUTE: + value = S_IXGRP; + break; + case TOMOYO_MODE_OTHERS_READ: + value = S_IROTH; + break; + case TOMOYO_MODE_OTHERS_WRITE: + value = S_IWOTH; + break; + case TOMOYO_MODE_OTHERS_EXECUTE: + value = S_IXOTH; + break; + case TOMOYO_EXEC_ARGC: + if (!bprm) + goto out; + value = bprm->argc; + break; + case TOMOYO_EXEC_ENVC: + if (!bprm) + goto out; + value = bprm->envc; + break; + case TOMOYO_NUMBER_UNION: + /* Fetch values later. */ + break; + default: + if (!obj) + goto out; + if (!obj->validate_done) { + tomoyo_get_attributes(obj); + obj->validate_done = true; + } + { + u8 stat_index; + struct tomoyo_mini_stat *stat; + switch (index) { + case TOMOYO_PATH1_UID: + case TOMOYO_PATH1_GID: + case TOMOYO_PATH1_INO: + case TOMOYO_PATH1_MAJOR: + case TOMOYO_PATH1_MINOR: + case TOMOYO_PATH1_TYPE: + case TOMOYO_PATH1_DEV_MAJOR: + case TOMOYO_PATH1_DEV_MINOR: + case TOMOYO_PATH1_PERM: + stat_index = TOMOYO_PATH1; + break; + case TOMOYO_PATH2_UID: + case TOMOYO_PATH2_GID: + case TOMOYO_PATH2_INO: + case TOMOYO_PATH2_MAJOR: + case TOMOYO_PATH2_MINOR: + case TOMOYO_PATH2_TYPE: + case TOMOYO_PATH2_DEV_MAJOR: + case TOMOYO_PATH2_DEV_MINOR: + case TOMOYO_PATH2_PERM: + stat_index = TOMOYO_PATH2; + break; + case TOMOYO_PATH1_PARENT_UID: + case TOMOYO_PATH1_PARENT_GID: + case TOMOYO_PATH1_PARENT_INO: + case TOMOYO_PATH1_PARENT_PERM: + stat_index = + TOMOYO_PATH1_PARENT; + break; + case TOMOYO_PATH2_PARENT_UID: + case TOMOYO_PATH2_PARENT_GID: + case TOMOYO_PATH2_PARENT_INO: + case TOMOYO_PATH2_PARENT_PERM: + stat_index = + TOMOYO_PATH2_PARENT; + break; + default: + goto out; + } + if (!obj->stat_valid[stat_index]) + goto out; + stat = &obj->stat[stat_index]; + switch (index) { + case TOMOYO_PATH1_UID: + case TOMOYO_PATH2_UID: + case TOMOYO_PATH1_PARENT_UID: + case TOMOYO_PATH2_PARENT_UID: + value = stat->uid; + break; + case TOMOYO_PATH1_GID: + case TOMOYO_PATH2_GID: + case TOMOYO_PATH1_PARENT_GID: + case TOMOYO_PATH2_PARENT_GID: + value = stat->gid; + break; + case TOMOYO_PATH1_INO: + case TOMOYO_PATH2_INO: + case TOMOYO_PATH1_PARENT_INO: + case TOMOYO_PATH2_PARENT_INO: + value = stat->ino; + break; + case TOMOYO_PATH1_MAJOR: + case TOMOYO_PATH2_MAJOR: + value = MAJOR(stat->dev); + break; + case TOMOYO_PATH1_MINOR: + case TOMOYO_PATH2_MINOR: + value = MINOR(stat->dev); + break; + case TOMOYO_PATH1_TYPE: + case TOMOYO_PATH2_TYPE: + value = stat->mode & S_IFMT; + break; + case TOMOYO_PATH1_DEV_MAJOR: + case TOMOYO_PATH2_DEV_MAJOR: + value = MAJOR(stat->rdev); + break; + case TOMOYO_PATH1_DEV_MINOR: + case TOMOYO_PATH2_DEV_MINOR: + value = MINOR(stat->rdev); + break; + case TOMOYO_PATH1_PERM: + case TOMOYO_PATH2_PERM: + case TOMOYO_PATH1_PARENT_PERM: + case TOMOYO_PATH2_PARENT_PERM: + value = stat->mode & S_IALLUGO; + break; + } + } + break; + } + max_v[j] = value; + min_v[j] = value; + switch (index) { + case TOMOYO_MODE_SETUID: + case TOMOYO_MODE_SETGID: + case TOMOYO_MODE_STICKY: + case TOMOYO_MODE_OWNER_READ: + case TOMOYO_MODE_OWNER_WRITE: + case TOMOYO_MODE_OWNER_EXECUTE: + case TOMOYO_MODE_GROUP_READ: + case TOMOYO_MODE_GROUP_WRITE: + case TOMOYO_MODE_GROUP_EXECUTE: + case TOMOYO_MODE_OTHERS_READ: + case TOMOYO_MODE_OTHERS_WRITE: + case TOMOYO_MODE_OTHERS_EXECUTE: + is_bitop[j] = true; + } + } + if (left == TOMOYO_NUMBER_UNION) { + /* Fetch values now. */ + const struct tomoyo_number_union *ptr = numbers_p++; + min_v[0] = ptr->values[0]; + max_v[0] = ptr->values[1]; + } + if (right == TOMOYO_NUMBER_UNION) { + /* Fetch values now. */ + const struct tomoyo_number_union *ptr = numbers_p++; + if (ptr->group) { + if (tomoyo_number_matches_group(min_v[0], + max_v[0], + ptr->group) + == match) + continue; + } else { + if ((min_v[0] <= ptr->values[1] && + max_v[0] >= ptr->values[0]) == match) + continue; + } + goto out; + } + /* + * Bit operation is valid only when counterpart value + * represents permission. + */ + if (is_bitop[0] && is_bitop[1]) { + goto out; + } else if (is_bitop[0]) { + switch (right) { + case TOMOYO_PATH1_PERM: + case TOMOYO_PATH1_PARENT_PERM: + case TOMOYO_PATH2_PERM: + case TOMOYO_PATH2_PARENT_PERM: + if (!(max_v[0] & max_v[1]) == !match) + continue; + } + goto out; + } else if (is_bitop[1]) { + switch (left) { + case TOMOYO_PATH1_PERM: + case TOMOYO_PATH1_PARENT_PERM: + case TOMOYO_PATH2_PERM: + case TOMOYO_PATH2_PARENT_PERM: + if (!(max_v[0] & max_v[1]) == !match) + continue; + } + goto out; + } + /* Normal value range comparison. */ + if ((min_v[0] <= max_v[1] && max_v[0] >= min_v[1]) == match) + continue; +out: + return false; + } + /* Check argv[] and envp[] now. */ + if (r->ee && (argc || envc)) + return tomoyo_scan_bprm(r->ee, argc, argv, envc, envp); + return true; +} diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 35388408e475..cd0f92d88bb4 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -1,9 +1,7 @@ /* * security/tomoyo/domain.c * - * Domain transition functions for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" @@ -20,8 +18,7 @@ struct tomoyo_domain_info tomoyo_kernel_domain; * * @new_entry: Pointer to "struct tomoyo_acl_info". * @size: Size of @new_entry in bytes. - * @is_delete: True if it is a delete request. - * @list: Pointer to "struct list_head". + * @param: Pointer to "struct tomoyo_acl_param". * @check_duplicate: Callback function to find duplicated entry. * * Returns 0 on success, negative value otherwise. @@ -29,25 +26,26 @@ struct tomoyo_domain_info tomoyo_kernel_domain; * Caller holds tomoyo_read_lock(). */ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, - bool is_delete, struct list_head *list, + struct tomoyo_acl_param *param, bool (*check_duplicate) (const struct tomoyo_acl_head *, const struct tomoyo_acl_head *)) { - int error = is_delete ? -ENOENT : -ENOMEM; + int error = param->is_delete ? -ENOENT : -ENOMEM; struct tomoyo_acl_head *entry; + struct list_head *list = param->list; if (mutex_lock_interruptible(&tomoyo_policy_lock)) return -ENOMEM; list_for_each_entry_rcu(entry, list, list) { if (!check_duplicate(entry, new_entry)) continue; - entry->is_deleted = is_delete; + entry->is_deleted = param->is_delete; error = 0; break; } - if (error && !is_delete) { + if (error && !param->is_delete) { entry = tomoyo_commit_ok(new_entry, size); if (entry) { list_add_tail_rcu(&entry->list, list); @@ -59,12 +57,25 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, } /** + * tomoyo_same_acl_head - Check for duplicated "struct tomoyo_acl_info" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b, false otherwise. + */ +static inline bool tomoyo_same_acl_head(const struct tomoyo_acl_info *a, + const struct tomoyo_acl_info *b) +{ + return a->type == b->type && a->cond == b->cond; +} + +/** * tomoyo_update_domain - Update an entry for domain policy. * * @new_entry: Pointer to "struct tomoyo_acl_info". * @size: Size of @new_entry in bytes. - * @is_delete: True if it is a delete request. - * @domain: Pointer to "struct tomoyo_domain_info". + * @param: Pointer to "struct tomoyo_acl_param". * @check_duplicate: Callback function to find duplicated entry. * @merge_duplicate: Callback function to merge duplicated entry. * @@ -73,7 +84,7 @@ int tomoyo_update_policy(struct tomoyo_acl_head *new_entry, const int size, * Caller holds tomoyo_read_lock(). */ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, - bool is_delete, struct tomoyo_domain_info *domain, + struct tomoyo_acl_param *param, bool (*check_duplicate) (const struct tomoyo_acl_info *, const struct tomoyo_acl_info @@ -82,13 +93,21 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, struct tomoyo_acl_info *, const bool)) { + const bool is_delete = param->is_delete; int error = is_delete ? -ENOENT : -ENOMEM; struct tomoyo_acl_info *entry; + struct list_head * const list = param->list; + if (param->data[0]) { + new_entry->cond = tomoyo_get_condition(param); + if (!new_entry->cond) + return -EINVAL; + } if (mutex_lock_interruptible(&tomoyo_policy_lock)) - return error; - list_for_each_entry_rcu(entry, &domain->acl_info_list, list) { - if (!check_duplicate(entry, new_entry)) + goto out; + list_for_each_entry_rcu(entry, list, list) { + if (!tomoyo_same_acl_head(entry, new_entry) || + !check_duplicate(entry, new_entry)) continue; if (merge_duplicate) entry->is_deleted = merge_duplicate(entry, new_entry, @@ -101,28 +120,50 @@ int tomoyo_update_domain(struct tomoyo_acl_info *new_entry, const int size, if (error && !is_delete) { entry = tomoyo_commit_ok(new_entry, size); if (entry) { - list_add_tail_rcu(&entry->list, &domain->acl_info_list); + list_add_tail_rcu(&entry->list, list); error = 0; } } mutex_unlock(&tomoyo_policy_lock); +out: + tomoyo_put_condition(new_entry->cond); return error; } +/** + * tomoyo_check_acl - Do permission check. + * + * @r: Pointer to "struct tomoyo_request_info". + * @check_entry: Callback function to check type specific parameters. + * + * Returns 0 on success, negative value otherwise. + * + * Caller holds tomoyo_read_lock(). + */ void tomoyo_check_acl(struct tomoyo_request_info *r, bool (*check_entry) (struct tomoyo_request_info *, const struct tomoyo_acl_info *)) { const struct tomoyo_domain_info *domain = r->domain; struct tomoyo_acl_info *ptr; + bool retried = false; + const struct list_head *list = &domain->acl_info_list; - list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) { +retry: + list_for_each_entry_rcu(ptr, list, list) { if (ptr->is_deleted || ptr->type != r->param_type) continue; - if (check_entry(r, ptr)) { - r->granted = true; - return; - } + if (!check_entry(r, ptr)) + continue; + if (!tomoyo_condition(r, ptr->cond)) + continue; + r->granted = true; + return; + } + if (!retried) { + retried = true; + list = &domain->ns->acl_group[domain->group]; + goto retry; } r->granted = false; } @@ -130,24 +171,29 @@ void tomoyo_check_acl(struct tomoyo_request_info *r, /* The list for "struct tomoyo_domain_info". */ LIST_HEAD(tomoyo_domain_list); -struct list_head tomoyo_policy_list[TOMOYO_MAX_POLICY]; -struct list_head tomoyo_group_list[TOMOYO_MAX_GROUP]; - /** * tomoyo_last_word - Get last component of a domainname. * - * @domainname: Domainname to check. + * @name: Domainname to check. * * Returns the last word of @domainname. */ static const char *tomoyo_last_word(const char *name) { - const char *cp = strrchr(name, ' '); - if (cp) - return cp + 1; - return name; + const char *cp = strrchr(name, ' '); + if (cp) + return cp + 1; + return name; } +/** + * tomoyo_same_transition_control - Check for duplicated "struct tomoyo_transition_control" entry. + * + * @a: Pointer to "struct tomoyo_acl_head". + * @b: Pointer to "struct tomoyo_acl_head". + * + * Returns true if @a == @b, false otherwise. + */ static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a, const struct tomoyo_acl_head *b) { @@ -163,30 +209,36 @@ static bool tomoyo_same_transition_control(const struct tomoyo_acl_head *a, } /** - * tomoyo_update_transition_control_entry - Update "struct tomoyo_transition_control" list. + * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list. * - * @domainname: The name of domain. Maybe NULL. - * @program: The name of program. Maybe NULL. - * @type: Type of transition. - * @is_delete: True if it is a delete request. + * @param: Pointer to "struct tomoyo_acl_param". + * @type: Type of this entry. * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_update_transition_control_entry(const char *domainname, - const char *program, - const u8 type, - const bool is_delete) +int tomoyo_write_transition_control(struct tomoyo_acl_param *param, + const u8 type) { struct tomoyo_transition_control e = { .type = type }; - int error = is_delete ? -ENOENT : -ENOMEM; - if (program) { + int error = param->is_delete ? -ENOENT : -ENOMEM; + char *program = param->data; + char *domainname = strstr(program, " from "); + if (domainname) { + *domainname = '\0'; + domainname += 6; + } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP || + type == TOMOYO_TRANSITION_CONTROL_KEEP) { + domainname = program; + program = NULL; + } + if (program && strcmp(program, "any")) { if (!tomoyo_correct_path(program)) return -EINVAL; e.program = tomoyo_get_name(program); if (!e.program) goto out; } - if (domainname) { + if (domainname && strcmp(domainname, "any")) { if (!tomoyo_correct_domain(domainname)) { if (!tomoyo_correct_path(domainname)) goto out; @@ -196,126 +248,136 @@ static int tomoyo_update_transition_control_entry(const char *domainname, if (!e.domainname) goto out; } - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list - [TOMOYO_ID_TRANSITION_CONTROL], + param->list = ¶m->ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL]; + error = tomoyo_update_policy(&e.head, sizeof(e), param, tomoyo_same_transition_control); - out: +out: tomoyo_put_name(e.domainname); tomoyo_put_name(e.program); return error; } /** - * tomoyo_write_transition_control - Write "struct tomoyo_transition_control" list. + * tomoyo_scan_transition - Try to find specific domain transition type. * - * @data: String to parse. - * @is_delete: True if it is a delete request. - * @type: Type of this entry. + * @list: Pointer to "struct list_head". + * @domainname: The name of current domain. + * @program: The name of requested program. + * @last_name: The last component of @domainname. + * @type: One of values in "enum tomoyo_transition_type". * - * Returns 0 on success, negative value otherwise. + * Returns true if found one, false otherwise. + * + * Caller holds tomoyo_read_lock(). */ -int tomoyo_write_transition_control(char *data, const bool is_delete, - const u8 type) +static inline bool tomoyo_scan_transition +(const struct list_head *list, const struct tomoyo_path_info *domainname, + const struct tomoyo_path_info *program, const char *last_name, + const enum tomoyo_transition_type type) { - char *domainname = strstr(data, " from "); - if (domainname) { - *domainname = '\0'; - domainname += 6; - } else if (type == TOMOYO_TRANSITION_CONTROL_NO_KEEP || - type == TOMOYO_TRANSITION_CONTROL_KEEP) { - domainname = data; - data = NULL; + const struct tomoyo_transition_control *ptr; + list_for_each_entry_rcu(ptr, list, head.list) { + if (ptr->head.is_deleted || ptr->type != type) + continue; + if (ptr->domainname) { + if (!ptr->is_last_name) { + if (ptr->domainname != domainname) + continue; + } else { + /* + * Use direct strcmp() since this is + * unlikely used. + */ + if (strcmp(ptr->domainname->name, last_name)) + continue; + } + } + if (ptr->program && tomoyo_pathcmp(ptr->program, program)) + continue; + return true; } - return tomoyo_update_transition_control_entry(domainname, data, type, - is_delete); + return false; } /** * tomoyo_transition_type - Get domain transition type. * - * @domainname: The name of domain. - * @program: The name of program. + * @ns: Pointer to "struct tomoyo_policy_namespace". + * @domainname: The name of current domain. + * @program: The name of requested program. * - * Returns TOMOYO_TRANSITION_CONTROL_INITIALIZE if executing @program - * reinitializes domain transition, TOMOYO_TRANSITION_CONTROL_KEEP if executing - * @program suppresses domain transition, others otherwise. + * Returns TOMOYO_TRANSITION_CONTROL_TRANSIT if executing @program causes + * domain transition across namespaces, TOMOYO_TRANSITION_CONTROL_INITIALIZE if + * executing @program reinitializes domain transition within that namespace, + * TOMOYO_TRANSITION_CONTROL_KEEP if executing @program stays at @domainname , + * others otherwise. * * Caller holds tomoyo_read_lock(). */ -static u8 tomoyo_transition_type(const struct tomoyo_path_info *domainname, - const struct tomoyo_path_info *program) +static enum tomoyo_transition_type tomoyo_transition_type +(const struct tomoyo_policy_namespace *ns, + const struct tomoyo_path_info *domainname, + const struct tomoyo_path_info *program) { - const struct tomoyo_transition_control *ptr; const char *last_name = tomoyo_last_word(domainname->name); - u8 type; - for (type = 0; type < TOMOYO_MAX_TRANSITION_TYPE; type++) { - next: - list_for_each_entry_rcu(ptr, &tomoyo_policy_list - [TOMOYO_ID_TRANSITION_CONTROL], - head.list) { - if (ptr->head.is_deleted || ptr->type != type) - continue; - if (ptr->domainname) { - if (!ptr->is_last_name) { - if (ptr->domainname != domainname) - continue; - } else { - /* - * Use direct strcmp() since this is - * unlikely used. - */ - if (strcmp(ptr->domainname->name, - last_name)) - continue; - } - } - if (ptr->program && - tomoyo_pathcmp(ptr->program, program)) - continue; - if (type == TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) { - /* - * Do not check for initialize_domain if - * no_initialize_domain matched. - */ - type = TOMOYO_TRANSITION_CONTROL_NO_KEEP; - goto next; - } - goto done; + enum tomoyo_transition_type type = TOMOYO_TRANSITION_CONTROL_NO_RESET; + while (type < TOMOYO_MAX_TRANSITION_TYPE) { + const struct list_head * const list = + &ns->policy_list[TOMOYO_ID_TRANSITION_CONTROL]; + if (!tomoyo_scan_transition(list, domainname, program, + last_name, type)) { + type++; + continue; } + if (type != TOMOYO_TRANSITION_CONTROL_NO_RESET && + type != TOMOYO_TRANSITION_CONTROL_NO_INITIALIZE) + break; + /* + * Do not check for reset_domain if no_reset_domain matched. + * Do not check for initialize_domain if no_initialize_domain + * matched. + */ + type++; + type++; } - done: return type; } +/** + * tomoyo_same_aggregator - Check for duplicated "struct tomoyo_aggregator" entry. + * + * @a: Pointer to "struct tomoyo_acl_head". + * @b: Pointer to "struct tomoyo_acl_head". + * + * Returns true if @a == @b, false otherwise. + */ static bool tomoyo_same_aggregator(const struct tomoyo_acl_head *a, const struct tomoyo_acl_head *b) { - const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), head); - const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), head); + const struct tomoyo_aggregator *p1 = container_of(a, typeof(*p1), + head); + const struct tomoyo_aggregator *p2 = container_of(b, typeof(*p2), + head); return p1->original_name == p2->original_name && p1->aggregated_name == p2->aggregated_name; } /** - * tomoyo_update_aggregator_entry - Update "struct tomoyo_aggregator" list. + * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list. * - * @original_name: The original program's name. - * @aggregated_name: The program name to use. - * @is_delete: True if it is a delete request. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -static int tomoyo_update_aggregator_entry(const char *original_name, - const char *aggregated_name, - const bool is_delete) +int tomoyo_write_aggregator(struct tomoyo_acl_param *param) { struct tomoyo_aggregator e = { }; - int error = is_delete ? -ENOENT : -ENOMEM; - - if (!tomoyo_correct_path(original_name) || + int error = param->is_delete ? -ENOENT : -ENOMEM; + const char *original_name = tomoyo_read_token(param); + const char *aggregated_name = tomoyo_read_token(param); + if (!tomoyo_correct_word(original_name) || !tomoyo_correct_path(aggregated_name)) return -EINVAL; e.original_name = tomoyo_get_name(original_name); @@ -323,83 +385,181 @@ static int tomoyo_update_aggregator_entry(const char *original_name, if (!e.original_name || !e.aggregated_name || e.aggregated_name->is_patterned) /* No patterns allowed. */ goto out; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list[TOMOYO_ID_AGGREGATOR], + param->list = ¶m->ns->policy_list[TOMOYO_ID_AGGREGATOR]; + error = tomoyo_update_policy(&e.head, sizeof(e), param, tomoyo_same_aggregator); - out: +out: tomoyo_put_name(e.original_name); tomoyo_put_name(e.aggregated_name); return error; } /** - * tomoyo_write_aggregator - Write "struct tomoyo_aggregator" list. + * tomoyo_find_namespace - Find specified namespace. * - * @data: String to parse. - * @is_delete: True if it is a delete request. + * @name: Name of namespace to find. + * @len: Length of @name. * - * Returns 0 on success, negative value otherwise. + * Returns pointer to "struct tomoyo_policy_namespace" if found, + * NULL otherwise. * * Caller holds tomoyo_read_lock(). */ -int tomoyo_write_aggregator(char *data, const bool is_delete) +static struct tomoyo_policy_namespace *tomoyo_find_namespace +(const char *name, const unsigned int len) { - char *cp = strchr(data, ' '); + struct tomoyo_policy_namespace *ns; + list_for_each_entry(ns, &tomoyo_namespace_list, namespace_list) { + if (strncmp(name, ns->name, len) || + (name[len] && name[len] != ' ')) + continue; + return ns; + } + return NULL; +} - if (!cp) - return -EINVAL; - *cp++ = '\0'; - return tomoyo_update_aggregator_entry(data, cp, is_delete); +/** + * tomoyo_assign_namespace - Create a new namespace. + * + * @domainname: Name of namespace to create. + * + * Returns pointer to "struct tomoyo_policy_namespace" on success, + * NULL otherwise. + * + * Caller holds tomoyo_read_lock(). + */ +struct tomoyo_policy_namespace *tomoyo_assign_namespace(const char *domainname) +{ + struct tomoyo_policy_namespace *ptr; + struct tomoyo_policy_namespace *entry; + const char *cp = domainname; + unsigned int len = 0; + while (*cp && *cp++ != ' ') + len++; + ptr = tomoyo_find_namespace(domainname, len); + if (ptr) + return ptr; + if (len >= TOMOYO_EXEC_TMPSIZE - 10 || !tomoyo_domain_def(domainname)) + return NULL; + entry = kzalloc(sizeof(*entry) + len + 1, GFP_NOFS); + if (!entry) + return NULL; + if (mutex_lock_interruptible(&tomoyo_policy_lock)) + goto out; + ptr = tomoyo_find_namespace(domainname, len); + if (!ptr && tomoyo_memory_ok(entry)) { + char *name = (char *) (entry + 1); + ptr = entry; + memmove(name, domainname, len); + name[len] = '\0'; + entry->name = name; + tomoyo_init_policy_namespace(entry); + entry = NULL; + } + mutex_unlock(&tomoyo_policy_lock); +out: + kfree(entry); + return ptr; } /** - * tomoyo_assign_domain - Create a domain. + * tomoyo_namespace_jump - Check for namespace jump. + * + * @domainname: Name of domain. + * + * Returns true if namespace differs, false otherwise. + */ +static bool tomoyo_namespace_jump(const char *domainname) +{ + const char *namespace = tomoyo_current_namespace()->name; + const int len = strlen(namespace); + return strncmp(domainname, namespace, len) || + (domainname[len] && domainname[len] != ' '); +} + +/** + * tomoyo_assign_domain - Create a domain or a namespace. * * @domainname: The name of domain. - * @profile: Profile number to assign if the domain was newly created. + * @transit: True if transit to domain found or created. * * Returns pointer to "struct tomoyo_domain_info" on success, NULL otherwise. * * Caller holds tomoyo_read_lock(). */ struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, - const u8 profile) + const bool transit) { - struct tomoyo_domain_info *entry; - struct tomoyo_domain_info *domain = NULL; - const struct tomoyo_path_info *saved_domainname; - bool found = false; - - if (!tomoyo_correct_domain(domainname)) + struct tomoyo_domain_info e = { }; + struct tomoyo_domain_info *entry = tomoyo_find_domain(domainname); + bool created = false; + if (entry) { + if (transit) { + /* + * Since namespace is created at runtime, profiles may + * not be created by the moment the process transits to + * that domain. Do not perform domain transition if + * profile for that domain is not yet created. + */ + if (!entry->ns->profile_ptr[entry->profile]) + return NULL; + } + return entry; + } + /* Requested domain does not exist. */ + /* Don't create requested domain if domainname is invalid. */ + if (strlen(domainname) >= TOMOYO_EXEC_TMPSIZE - 10 || + !tomoyo_correct_domain(domainname)) + return NULL; + /* + * Since definition of profiles and acl_groups may differ across + * namespaces, do not inherit "use_profile" and "use_group" settings + * by automatically creating requested domain upon domain transition. + */ + if (transit && tomoyo_namespace_jump(domainname)) + return NULL; + e.ns = tomoyo_assign_namespace(domainname); + if (!e.ns) return NULL; - saved_domainname = tomoyo_get_name(domainname); - if (!saved_domainname) + /* + * "use_profile" and "use_group" settings for automatically created + * domains are inherited from current domain. These are 0 for manually + * created domains. + */ + if (transit) { + const struct tomoyo_domain_info *domain = tomoyo_domain(); + e.profile = domain->profile; + e.group = domain->group; + } + e.domainname = tomoyo_get_name(domainname); + if (!e.domainname) return NULL; - entry = kzalloc(sizeof(*entry), GFP_NOFS); if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; - list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) { - if (domain->is_deleted || - tomoyo_pathcmp(saved_domainname, domain->domainname)) - continue; - found = true; - break; - } - if (!found && tomoyo_memory_ok(entry)) { - INIT_LIST_HEAD(&entry->acl_info_list); - entry->domainname = saved_domainname; - saved_domainname = NULL; - entry->profile = profile; - list_add_tail_rcu(&entry->list, &tomoyo_domain_list); - domain = entry; - entry = NULL; - found = true; + entry = tomoyo_find_domain(domainname); + if (!entry) { + entry = tomoyo_commit_ok(&e, sizeof(e)); + if (entry) { + INIT_LIST_HEAD(&entry->acl_info_list); + list_add_tail_rcu(&entry->list, &tomoyo_domain_list); + created = true; + } } mutex_unlock(&tomoyo_policy_lock); - out: - tomoyo_put_name(saved_domainname); - kfree(entry); - return found ? domain : NULL; +out: + tomoyo_put_name(e.domainname); + if (entry && transit) { + if (created) { + struct tomoyo_request_info r; + tomoyo_init_request_info(&r, entry, + TOMOYO_MAC_FILE_EXECUTE); + r.granted = false; + tomoyo_write_log(&r, "use_profile %u\n", + entry->profile); + tomoyo_write_log(&r, "use_group %u\n", entry->group); + } + } + return entry; } /** @@ -413,22 +573,27 @@ struct tomoyo_domain_info *tomoyo_assign_domain(const char *domainname, */ int tomoyo_find_next_domain(struct linux_binprm *bprm) { - struct tomoyo_request_info r; - char *tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS); struct tomoyo_domain_info *old_domain = tomoyo_domain(); struct tomoyo_domain_info *domain = NULL; const char *original_name = bprm->filename; - u8 mode; - bool is_enforce; int retval = -ENOMEM; bool need_kfree = false; + bool reject_on_transition_failure = false; struct tomoyo_path_info rn = { }; /* real name */ - - mode = tomoyo_init_request_info(&r, NULL, TOMOYO_MAC_FILE_EXECUTE); - is_enforce = (mode == TOMOYO_CONFIG_ENFORCING); - if (!tmp) - goto out; - + struct tomoyo_execve *ee = kzalloc(sizeof(*ee), GFP_NOFS); + if (!ee) + return -ENOMEM; + ee->tmp = kzalloc(TOMOYO_EXEC_TMPSIZE, GFP_NOFS); + if (!ee->tmp) { + kfree(ee); + return -ENOMEM; + } + /* ee->dump->data is allocated by tomoyo_dump_page(). */ + tomoyo_init_request_info(&ee->r, NULL, TOMOYO_MAC_FILE_EXECUTE); + ee->r.ee = ee; + ee->bprm = bprm; + ee->r.obj = &ee->obj; + ee->obj.path1 = bprm->file->f_path; retry: if (need_kfree) { kfree(rn.name); @@ -445,8 +610,10 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) /* Check 'aggregator' directive. */ { struct tomoyo_aggregator *ptr; - list_for_each_entry_rcu(ptr, &tomoyo_policy_list - [TOMOYO_ID_AGGREGATOR], head.list) { + struct list_head *list = + &old_domain->ns->policy_list[TOMOYO_ID_AGGREGATOR]; + /* Check 'aggregator' directive. */ + list_for_each_entry_rcu(ptr, list, head.list) { if (ptr->head.is_deleted || !tomoyo_path_matches_pattern(&rn, ptr->original_name)) @@ -460,7 +627,7 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) } /* Check execute permission. */ - retval = tomoyo_path_permission(&r, TOMOYO_TYPE_EXECUTE, &rn); + retval = tomoyo_path_permission(&ee->r, TOMOYO_TYPE_EXECUTE, &rn); if (retval == TOMOYO_RETRY_REQUEST) goto retry; if (retval < 0) @@ -471,20 +638,30 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) * wildcard) rather than the pathname passed to execve() * (which never contains wildcard). */ - if (r.param.path.matched_path) { + if (ee->r.param.path.matched_path) { if (need_kfree) kfree(rn.name); need_kfree = false; /* This is OK because it is read only. */ - rn = *r.param.path.matched_path; + rn = *ee->r.param.path.matched_path; } /* Calculate domain to transit to. */ - switch (tomoyo_transition_type(old_domain->domainname, &rn)) { + switch (tomoyo_transition_type(old_domain->ns, old_domain->domainname, + &rn)) { + case TOMOYO_TRANSITION_CONTROL_RESET: + /* Transit to the root of specified namespace. */ + snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "<%s>", rn.name); + /* + * Make do_execve() fail if domain transition across namespaces + * has failed. + */ + reject_on_transition_failure = true; + break; case TOMOYO_TRANSITION_CONTROL_INITIALIZE: - /* Transit to the child of tomoyo_kernel_domain domain. */ - snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, TOMOYO_ROOT_NAME " " - "%s", rn.name); + /* Transit to the child of current namespace's root. */ + snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", + old_domain->ns->name, rn.name); break; case TOMOYO_TRANSITION_CONTROL_KEEP: /* Keep current domain. */ @@ -502,33 +679,32 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) domain = old_domain; } else { /* Normal domain transition. */ - snprintf(tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", + snprintf(ee->tmp, TOMOYO_EXEC_TMPSIZE - 1, "%s %s", old_domain->domainname->name, rn.name); } break; } - if (domain || strlen(tmp) >= TOMOYO_EXEC_TMPSIZE - 10) - goto done; - domain = tomoyo_find_domain(tmp); + if (!domain) + domain = tomoyo_assign_domain(ee->tmp, true); if (domain) - goto done; - if (is_enforce) { - int error = tomoyo_supervisor(&r, "# wants to create domain\n" - "%s\n", tmp); - if (error == TOMOYO_RETRY_REQUEST) - goto retry; - if (error < 0) - goto done; + retval = 0; + else if (reject_on_transition_failure) { + printk(KERN_WARNING "ERROR: Domain '%s' not ready.\n", + ee->tmp); + retval = -ENOMEM; + } else if (ee->r.mode == TOMOYO_CONFIG_ENFORCING) + retval = -ENOMEM; + else { + retval = 0; + if (!old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED]) { + old_domain->flags[TOMOYO_DIF_TRANSITION_FAILED] = true; + ee->r.granted = false; + tomoyo_write_log(&ee->r, "%s", tomoyo_dif + [TOMOYO_DIF_TRANSITION_FAILED]); + printk(KERN_WARNING + "ERROR: Domain '%s' not defined.\n", ee->tmp); + } } - domain = tomoyo_assign_domain(tmp, old_domain->profile); - done: - if (domain) - goto out; - printk(KERN_WARNING "TOMOYO-ERROR: Domain '%s' not defined.\n", tmp); - if (is_enforce) - retval = -EPERM; - else - old_domain->transition_failed = true; out: if (!domain) domain = old_domain; @@ -537,6 +713,54 @@ int tomoyo_find_next_domain(struct linux_binprm *bprm) bprm->cred->security = domain; if (need_kfree) kfree(rn.name); - kfree(tmp); + kfree(ee->tmp); + kfree(ee->dump.data); + kfree(ee); return retval; } + +/** + * tomoyo_dump_page - Dump a page to buffer. + * + * @bprm: Pointer to "struct linux_binprm". + * @pos: Location to dump. + * @dump: Poiner to "struct tomoyo_page_dump". + * + * Returns true on success, false otherwise. + */ +bool tomoyo_dump_page(struct linux_binprm *bprm, unsigned long pos, + struct tomoyo_page_dump *dump) +{ + struct page *page; + /* dump->data is released by tomoyo_finish_execve(). */ + if (!dump->data) { + dump->data = kzalloc(PAGE_SIZE, GFP_NOFS); + if (!dump->data) + return false; + } + /* Same with get_arg_page(bprm, pos, 0) in fs/exec.c */ +#ifdef CONFIG_MMU + if (get_user_pages(current, bprm->mm, pos, 1, 0, 1, &page, NULL) <= 0) + return false; +#else + page = bprm->page[pos / PAGE_SIZE]; +#endif + if (page != dump->page) { + const unsigned int offset = pos % PAGE_SIZE; + /* + * Maybe kmap()/kunmap() should be used here. + * But remove_arg_zero() uses kmap_atomic()/kunmap_atomic(). + * So do I. + */ + char *kaddr = kmap_atomic(page, KM_USER0); + dump->page = page; + memcpy(dump->data + offset, kaddr + offset, + PAGE_SIZE - offset); + kunmap_atomic(kaddr, KM_USER0); + } + /* Same with put_arg_page(page) in fs/exec.c */ +#ifdef CONFIG_MMU + put_page(page); +#endif + return true; +} diff --git a/security/tomoyo/file.c b/security/tomoyo/file.c index d64e8ecb6fb3..743c35f5084a 100644 --- a/security/tomoyo/file.c +++ b/security/tomoyo/file.c @@ -1,80 +1,51 @@ /* * security/tomoyo/file.c * - * Pathname restriction functions. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/slab.h> -/* Keyword array for operations with one pathname. */ -const char *tomoyo_path_keyword[TOMOYO_MAX_PATH_OPERATION] = { - [TOMOYO_TYPE_READ_WRITE] = "read/write", - [TOMOYO_TYPE_EXECUTE] = "execute", - [TOMOYO_TYPE_READ] = "read", - [TOMOYO_TYPE_WRITE] = "write", - [TOMOYO_TYPE_UNLINK] = "unlink", - [TOMOYO_TYPE_RMDIR] = "rmdir", - [TOMOYO_TYPE_TRUNCATE] = "truncate", - [TOMOYO_TYPE_SYMLINK] = "symlink", - [TOMOYO_TYPE_REWRITE] = "rewrite", - [TOMOYO_TYPE_CHROOT] = "chroot", - [TOMOYO_TYPE_UMOUNT] = "unmount", -}; - -/* Keyword array for operations with one pathname and three numbers. */ -const char *tomoyo_mkdev_keyword[TOMOYO_MAX_MKDEV_OPERATION] = { - [TOMOYO_TYPE_MKBLOCK] = "mkblock", - [TOMOYO_TYPE_MKCHAR] = "mkchar", -}; - -/* Keyword array for operations with two pathnames. */ -const char *tomoyo_path2_keyword[TOMOYO_MAX_PATH2_OPERATION] = { - [TOMOYO_TYPE_LINK] = "link", - [TOMOYO_TYPE_RENAME] = "rename", - [TOMOYO_TYPE_PIVOT_ROOT] = "pivot_root", -}; - -/* Keyword array for operations with one pathname and one number. */ -const char *tomoyo_path_number_keyword[TOMOYO_MAX_PATH_NUMBER_OPERATION] = { - [TOMOYO_TYPE_CREATE] = "create", - [TOMOYO_TYPE_MKDIR] = "mkdir", - [TOMOYO_TYPE_MKFIFO] = "mkfifo", - [TOMOYO_TYPE_MKSOCK] = "mksock", - [TOMOYO_TYPE_IOCTL] = "ioctl", - [TOMOYO_TYPE_CHMOD] = "chmod", - [TOMOYO_TYPE_CHOWN] = "chown", - [TOMOYO_TYPE_CHGRP] = "chgrp", -}; - +/* + * Mapping table from "enum tomoyo_path_acl_index" to "enum tomoyo_mac_index". + */ static const u8 tomoyo_p2mac[TOMOYO_MAX_PATH_OPERATION] = { - [TOMOYO_TYPE_READ_WRITE] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_EXECUTE] = TOMOYO_MAC_FILE_EXECUTE, [TOMOYO_TYPE_READ] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_WRITE] = TOMOYO_MAC_FILE_OPEN, + [TOMOYO_TYPE_APPEND] = TOMOYO_MAC_FILE_OPEN, [TOMOYO_TYPE_UNLINK] = TOMOYO_MAC_FILE_UNLINK, + [TOMOYO_TYPE_GETATTR] = TOMOYO_MAC_FILE_GETATTR, [TOMOYO_TYPE_RMDIR] = TOMOYO_MAC_FILE_RMDIR, [TOMOYO_TYPE_TRUNCATE] = TOMOYO_MAC_FILE_TRUNCATE, [TOMOYO_TYPE_SYMLINK] = TOMOYO_MAC_FILE_SYMLINK, - [TOMOYO_TYPE_REWRITE] = TOMOYO_MAC_FILE_REWRITE, [TOMOYO_TYPE_CHROOT] = TOMOYO_MAC_FILE_CHROOT, [TOMOYO_TYPE_UMOUNT] = TOMOYO_MAC_FILE_UMOUNT, }; -static const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = { +/* + * Mapping table from "enum tomoyo_mkdev_acl_index" to "enum tomoyo_mac_index". + */ +const u8 tomoyo_pnnn2mac[TOMOYO_MAX_MKDEV_OPERATION] = { [TOMOYO_TYPE_MKBLOCK] = TOMOYO_MAC_FILE_MKBLOCK, [TOMOYO_TYPE_MKCHAR] = TOMOYO_MAC_FILE_MKCHAR, }; -static const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = { +/* + * Mapping table from "enum tomoyo_path2_acl_index" to "enum tomoyo_mac_index". + */ +const u8 tomoyo_pp2mac[TOMOYO_MAX_PATH2_OPERATION] = { [TOMOYO_TYPE_LINK] = TOMOYO_MAC_FILE_LINK, [TOMOYO_TYPE_RENAME] = TOMOYO_MAC_FILE_RENAME, [TOMOYO_TYPE_PIVOT_ROOT] = TOMOYO_MAC_FILE_PIVOT_ROOT, }; -static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = { +/* + * Mapping table from "enum tomoyo_path_number_acl_index" to + * "enum tomoyo_mac_index". + */ +const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = { [TOMOYO_TYPE_CREATE] = TOMOYO_MAC_FILE_CREATE, [TOMOYO_TYPE_MKDIR] = TOMOYO_MAC_FILE_MKDIR, [TOMOYO_TYPE_MKFIFO] = TOMOYO_MAC_FILE_MKFIFO, @@ -85,41 +56,76 @@ static const u8 tomoyo_pn2mac[TOMOYO_MAX_PATH_NUMBER_OPERATION] = { [TOMOYO_TYPE_CHGRP] = TOMOYO_MAC_FILE_CHGRP, }; +/** + * tomoyo_put_name_union - Drop reference on "struct tomoyo_name_union". + * + * @ptr: Pointer to "struct tomoyo_name_union". + * + * Returns nothing. + */ void tomoyo_put_name_union(struct tomoyo_name_union *ptr) { - if (!ptr) - return; - if (ptr->is_group) - tomoyo_put_group(ptr->group); - else - tomoyo_put_name(ptr->filename); + tomoyo_put_group(ptr->group); + tomoyo_put_name(ptr->filename); } +/** + * tomoyo_compare_name_union - Check whether a name matches "struct tomoyo_name_union" or not. + * + * @name: Pointer to "struct tomoyo_path_info". + * @ptr: Pointer to "struct tomoyo_name_union". + * + * Returns "struct tomoyo_path_info" if @name matches @ptr, NULL otherwise. + */ const struct tomoyo_path_info * tomoyo_compare_name_union(const struct tomoyo_path_info *name, const struct tomoyo_name_union *ptr) { - if (ptr->is_group) + if (ptr->group) return tomoyo_path_matches_group(name, ptr->group); if (tomoyo_path_matches_pattern(name, ptr->filename)) return ptr->filename; return NULL; } +/** + * tomoyo_put_number_union - Drop reference on "struct tomoyo_number_union". + * + * @ptr: Pointer to "struct tomoyo_number_union". + * + * Returns nothing. + */ void tomoyo_put_number_union(struct tomoyo_number_union *ptr) { - if (ptr && ptr->is_group) - tomoyo_put_group(ptr->group); + tomoyo_put_group(ptr->group); } +/** + * tomoyo_compare_number_union - Check whether a value matches "struct tomoyo_number_union" or not. + * + * @value: Number to check. + * @ptr: Pointer to "struct tomoyo_number_union". + * + * Returns true if @value matches @ptr, false otherwise. + */ bool tomoyo_compare_number_union(const unsigned long value, const struct tomoyo_number_union *ptr) { - if (ptr->is_group) + if (ptr->group) return tomoyo_number_matches_group(value, value, ptr->group); return value >= ptr->values[0] && value <= ptr->values[1]; } +/** + * tomoyo_add_slash - Add trailing '/' if needed. + * + * @buf: Pointer to "struct tomoyo_path_info". + * + * Returns nothing. + * + * @buf must be generated by tomoyo_encode() because this function does not + * allocate memory for adding '/'. + */ static void tomoyo_add_slash(struct tomoyo_path_info *buf) { if (buf->is_dir) @@ -132,24 +138,6 @@ static void tomoyo_add_slash(struct tomoyo_path_info *buf) } /** - * tomoyo_strendswith - Check whether the token ends with the given token. - * - * @name: The token to check. - * @tail: The token to find. - * - * Returns true if @name ends with @tail, false otherwise. - */ -static bool tomoyo_strendswith(const char *name, const char *tail) -{ - int len; - - if (!name || !tail) - return false; - len = strlen(name) - strlen(tail); - return len >= 0 && !strcmp(name + len, tail); -} - -/** * tomoyo_get_realpath - Get realpath. * * @buf: Pointer to "struct tomoyo_path_info". @@ -164,7 +152,7 @@ static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path) tomoyo_fill_path_info(buf); return true; } - return false; + return false; } /** @@ -176,13 +164,9 @@ static bool tomoyo_get_realpath(struct tomoyo_path_info *buf, struct path *path) */ static int tomoyo_audit_path_log(struct tomoyo_request_info *r) { - const char *operation = tomoyo_path_keyword[r->param.path.operation]; - const struct tomoyo_path_info *filename = r->param.path.filename; - if (r->granted) - return 0; - tomoyo_warn_log(r, "%s %s", operation, filename->name); - return tomoyo_supervisor(r, "allow_%s %s\n", operation, - tomoyo_pattern(filename)); + return tomoyo_supervisor(r, "file %s %s\n", tomoyo_path_keyword + [r->param.path.operation], + r->param.path.filename->name); } /** @@ -194,16 +178,10 @@ static int tomoyo_audit_path_log(struct tomoyo_request_info *r) */ static int tomoyo_audit_path2_log(struct tomoyo_request_info *r) { - const char *operation = tomoyo_path2_keyword[r->param.path2.operation]; - const struct tomoyo_path_info *filename1 = r->param.path2.filename1; - const struct tomoyo_path_info *filename2 = r->param.path2.filename2; - if (r->granted) - return 0; - tomoyo_warn_log(r, "%s %s %s", operation, filename1->name, - filename2->name); - return tomoyo_supervisor(r, "allow_%s %s %s\n", operation, - tomoyo_pattern(filename1), - tomoyo_pattern(filename2)); + return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords + [tomoyo_pp2mac[r->param.path2.operation]], + r->param.path2.filename1->name, + r->param.path2.filename2->name); } /** @@ -215,24 +193,18 @@ static int tomoyo_audit_path2_log(struct tomoyo_request_info *r) */ static int tomoyo_audit_mkdev_log(struct tomoyo_request_info *r) { - const char *operation = tomoyo_mkdev_keyword[r->param.mkdev.operation]; - const struct tomoyo_path_info *filename = r->param.mkdev.filename; - const unsigned int major = r->param.mkdev.major; - const unsigned int minor = r->param.mkdev.minor; - const unsigned int mode = r->param.mkdev.mode; - if (r->granted) - return 0; - tomoyo_warn_log(r, "%s %s 0%o %u %u", operation, filename->name, mode, - major, minor); - return tomoyo_supervisor(r, "allow_%s %s 0%o %u %u\n", operation, - tomoyo_pattern(filename), mode, major, minor); + return tomoyo_supervisor(r, "file %s %s 0%o %u %u\n", + tomoyo_mac_keywords + [tomoyo_pnnn2mac[r->param.mkdev.operation]], + r->param.mkdev.filename->name, + r->param.mkdev.mode, r->param.mkdev.major, + r->param.mkdev.minor); } /** * tomoyo_audit_path_number_log - Audit path/number request log. * - * @r: Pointer to "struct tomoyo_request_info". - * @error: Error code. + * @r: Pointer to "struct tomoyo_request_info". * * Returns 0 on success, negative value otherwise. */ @@ -240,11 +212,7 @@ static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r) { const u8 type = r->param.path_number.operation; u8 radix; - const struct tomoyo_path_info *filename = r->param.path_number.filename; - const char *operation = tomoyo_path_number_keyword[type]; char buffer[64]; - if (r->granted) - return 0; switch (type) { case TOMOYO_TYPE_CREATE: case TOMOYO_TYPE_MKDIR: @@ -262,251 +230,23 @@ static int tomoyo_audit_path_number_log(struct tomoyo_request_info *r) } tomoyo_print_ulong(buffer, sizeof(buffer), r->param.path_number.number, radix); - tomoyo_warn_log(r, "%s %s %s", operation, filename->name, buffer); - return tomoyo_supervisor(r, "allow_%s %s %s\n", operation, - tomoyo_pattern(filename), buffer); -} - -static bool tomoyo_same_globally_readable(const struct tomoyo_acl_head *a, - const struct tomoyo_acl_head *b) -{ - return container_of(a, struct tomoyo_readable_file, - head)->filename == - container_of(b, struct tomoyo_readable_file, - head)->filename; -} - -/** - * tomoyo_update_globally_readable_entry - Update "struct tomoyo_readable_file" list. - * - * @filename: Filename unconditionally permitted to open() for reading. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -static int tomoyo_update_globally_readable_entry(const char *filename, - const bool is_delete) -{ - struct tomoyo_readable_file e = { }; - int error; - - if (!tomoyo_correct_word(filename)) - return -EINVAL; - e.filename = tomoyo_get_name(filename); - if (!e.filename) - return -ENOMEM; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list - [TOMOYO_ID_GLOBALLY_READABLE], - tomoyo_same_globally_readable); - tomoyo_put_name(e.filename); - return error; -} - -/** - * tomoyo_globally_readable_file - Check if the file is unconditionnaly permitted to be open()ed for reading. - * - * @filename: The filename to check. - * - * Returns true if any domain can open @filename for reading, false otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -static bool tomoyo_globally_readable_file(const struct tomoyo_path_info * - filename) -{ - struct tomoyo_readable_file *ptr; - bool found = false; - - list_for_each_entry_rcu(ptr, &tomoyo_policy_list - [TOMOYO_ID_GLOBALLY_READABLE], head.list) { - if (!ptr->head.is_deleted && - tomoyo_path_matches_pattern(filename, ptr->filename)) { - found = true; - break; - } - } - return found; -} - -/** - * tomoyo_write_globally_readable - Write "struct tomoyo_readable_file" list. - * - * @data: String to parse. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -int tomoyo_write_globally_readable(char *data, const bool is_delete) -{ - return tomoyo_update_globally_readable_entry(data, is_delete); -} - -static bool tomoyo_same_pattern(const struct tomoyo_acl_head *a, - const struct tomoyo_acl_head *b) -{ - return container_of(a, struct tomoyo_no_pattern, head)->pattern == - container_of(b, struct tomoyo_no_pattern, head)->pattern; -} - -/** - * tomoyo_update_file_pattern_entry - Update "struct tomoyo_no_pattern" list. - * - * @pattern: Pathname pattern. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -static int tomoyo_update_file_pattern_entry(const char *pattern, - const bool is_delete) -{ - struct tomoyo_no_pattern e = { }; - int error; - - if (!tomoyo_correct_word(pattern)) - return -EINVAL; - e.pattern = tomoyo_get_name(pattern); - if (!e.pattern) - return -ENOMEM; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list[TOMOYO_ID_PATTERN], - tomoyo_same_pattern); - tomoyo_put_name(e.pattern); - return error; -} - -/** - * tomoyo_pattern - Get patterned pathname. - * - * @filename: The filename to find patterned pathname. - * - * Returns pointer to pathname pattern if matched, @filename otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -const char *tomoyo_pattern(const struct tomoyo_path_info *filename) -{ - struct tomoyo_no_pattern *ptr; - const struct tomoyo_path_info *pattern = NULL; - - list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_PATTERN], - head.list) { - if (ptr->head.is_deleted) - continue; - if (!tomoyo_path_matches_pattern(filename, ptr->pattern)) - continue; - pattern = ptr->pattern; - if (tomoyo_strendswith(pattern->name, "/\\*")) { - /* Do nothing. Try to find the better match. */ - } else { - /* This would be the better match. Use this. */ - break; - } - } - if (pattern) - filename = pattern; - return filename->name; -} - -/** - * tomoyo_write_pattern - Write "struct tomoyo_no_pattern" list. - * - * @data: String to parse. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -int tomoyo_write_pattern(char *data, const bool is_delete) -{ - return tomoyo_update_file_pattern_entry(data, is_delete); -} - -static bool tomoyo_same_no_rewrite(const struct tomoyo_acl_head *a, - const struct tomoyo_acl_head *b) -{ - return container_of(a, struct tomoyo_no_rewrite, head)->pattern - == container_of(b, struct tomoyo_no_rewrite, head) - ->pattern; -} - -/** - * tomoyo_update_no_rewrite_entry - Update "struct tomoyo_no_rewrite" list. - * - * @pattern: Pathname pattern that are not rewritable by default. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -static int tomoyo_update_no_rewrite_entry(const char *pattern, - const bool is_delete) -{ - struct tomoyo_no_rewrite e = { }; - int error; - - if (!tomoyo_correct_word(pattern)) - return -EINVAL; - e.pattern = tomoyo_get_name(pattern); - if (!e.pattern) - return -ENOMEM; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE], - tomoyo_same_no_rewrite); - tomoyo_put_name(e.pattern); - return error; + return tomoyo_supervisor(r, "file %s %s %s\n", tomoyo_mac_keywords + [tomoyo_pn2mac[type]], + r->param.path_number.filename->name, buffer); } /** - * tomoyo_no_rewrite_file - Check if the given pathname is not permitted to be rewrited. + * tomoyo_check_path_acl - Check permission for path operation. * - * @filename: Filename to check. + * @r: Pointer to "struct tomoyo_request_info". + * @ptr: Pointer to "struct tomoyo_acl_info". * - * Returns true if @filename is specified by "deny_rewrite" directive, - * false otherwise. + * Returns true if granted, false otherwise. * - * Caller holds tomoyo_read_lock(). + * To be able to use wildcard for domain transition, this function sets + * matching entry on success. Since the caller holds tomoyo_read_lock(), + * it is safe to set matching entry. */ -static bool tomoyo_no_rewrite_file(const struct tomoyo_path_info *filename) -{ - struct tomoyo_no_rewrite *ptr; - bool found = false; - - list_for_each_entry_rcu(ptr, &tomoyo_policy_list[TOMOYO_ID_NO_REWRITE], - head.list) { - if (ptr->head.is_deleted) - continue; - if (!tomoyo_path_matches_pattern(filename, ptr->pattern)) - continue; - found = true; - break; - } - return found; -} - -/** - * tomoyo_write_no_rewrite - Write "struct tomoyo_no_rewrite" list. - * - * @data: String to parse. - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -int tomoyo_write_no_rewrite(char *data, const bool is_delete) -{ - return tomoyo_update_no_rewrite_entry(data, is_delete); -} - static bool tomoyo_check_path_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { @@ -521,6 +261,14 @@ static bool tomoyo_check_path_acl(struct tomoyo_request_info *r, return false; } +/** + * tomoyo_check_path_number_acl - Check permission for path number operation. + * + * @r: Pointer to "struct tomoyo_request_info". + * @ptr: Pointer to "struct tomoyo_acl_info". + * + * Returns true if granted, false otherwise. + */ static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { @@ -533,6 +281,14 @@ static bool tomoyo_check_path_number_acl(struct tomoyo_request_info *r, &acl->name); } +/** + * tomoyo_check_path2_acl - Check permission for path path operation. + * + * @r: Pointer to "struct tomoyo_request_info". + * @ptr: Pointer to "struct tomoyo_acl_info". + * + * Returns true if granted, false otherwise. + */ static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { @@ -544,8 +300,16 @@ static bool tomoyo_check_path2_acl(struct tomoyo_request_info *r, &acl->name2); } +/** + * tomoyo_check_mkdev_acl - Check permission for path number number number operation. + * + * @r: Pointer to "struct tomoyo_request_info". + * @ptr: Pointer to "struct tomoyo_acl_info". + * + * Returns true if granted, false otherwise. + */ static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r, - const struct tomoyo_acl_info *ptr) + const struct tomoyo_acl_info *ptr) { const struct tomoyo_mkdev_acl *acl = container_of(ptr, typeof(*acl), head); @@ -560,15 +324,31 @@ static bool tomoyo_check_mkdev_acl(struct tomoyo_request_info *r, &acl->name); } +/** + * tomoyo_same_path_acl - Check for duplicated "struct tomoyo_path_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b except permission bits, false otherwise. + */ static bool tomoyo_same_path_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_path_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_path_acl *p2 = container_of(b, typeof(*p2), head); - return tomoyo_same_acl_head(&p1->head, &p2->head) && - tomoyo_same_name_union(&p1->name, &p2->name); + return tomoyo_same_name_union(&p1->name, &p2->name); } +/** + * tomoyo_merge_path_acl - Merge duplicated "struct tomoyo_path_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * @is_delete: True for @a &= ~@b, false for @a |= @b. + * + * Returns true if @a is empty, false otherwise. + */ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) @@ -577,19 +357,10 @@ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a, ->perm; u16 perm = *a_perm; const u16 b_perm = container_of(b, struct tomoyo_path_acl, head)->perm; - if (is_delete) { + if (is_delete) perm &= ~b_perm; - if ((perm & TOMOYO_RW_MASK) != TOMOYO_RW_MASK) - perm &= ~(1 << TOMOYO_TYPE_READ_WRITE); - else if (!(perm & (1 << TOMOYO_TYPE_READ_WRITE))) - perm &= ~TOMOYO_RW_MASK; - } else { + else perm |= b_perm; - if ((perm & TOMOYO_RW_MASK) == TOMOYO_RW_MASK) - perm |= (1 << TOMOYO_TYPE_READ_WRITE); - else if (perm & (1 << TOMOYO_TYPE_READ_WRITE)) - perm |= TOMOYO_RW_MASK; - } *a_perm = perm; return !perm; } @@ -597,52 +368,62 @@ static bool tomoyo_merge_path_acl(struct tomoyo_acl_info *a, /** * tomoyo_update_path_acl - Update "struct tomoyo_path_acl" list. * - * @type: Type of operation. - * @filename: Filename. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. + * @perm: Permission. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -static int tomoyo_update_path_acl(const u8 type, const char *filename, - struct tomoyo_domain_info * const domain, - const bool is_delete) +static int tomoyo_update_path_acl(const u16 perm, + struct tomoyo_acl_param *param) { struct tomoyo_path_acl e = { .head.type = TOMOYO_TYPE_PATH_ACL, - .perm = 1 << type + .perm = perm }; int error; - if (e.perm == (1 << TOMOYO_TYPE_READ_WRITE)) - e.perm |= TOMOYO_RW_MASK; - if (!tomoyo_parse_name_union(filename, &e.name)) - return -EINVAL; - error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain, - tomoyo_same_path_acl, - tomoyo_merge_path_acl); + if (!tomoyo_parse_name_union(param, &e.name)) + error = -EINVAL; + else + error = tomoyo_update_domain(&e.head, sizeof(e), param, + tomoyo_same_path_acl, + tomoyo_merge_path_acl); tomoyo_put_name_union(&e.name); return error; } +/** + * tomoyo_same_mkdev_acl - Check for duplicated "struct tomoyo_mkdev_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b except permission bits, false otherwise. + */ static bool tomoyo_same_mkdev_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { - const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), - head); - const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), - head); - return tomoyo_same_acl_head(&p1->head, &p2->head) - && tomoyo_same_name_union(&p1->name, &p2->name) - && tomoyo_same_number_union(&p1->mode, &p2->mode) - && tomoyo_same_number_union(&p1->major, &p2->major) - && tomoyo_same_number_union(&p1->minor, &p2->minor); + const struct tomoyo_mkdev_acl *p1 = container_of(a, typeof(*p1), head); + const struct tomoyo_mkdev_acl *p2 = container_of(b, typeof(*p2), head); + return tomoyo_same_name_union(&p1->name, &p2->name) && + tomoyo_same_number_union(&p1->mode, &p2->mode) && + tomoyo_same_number_union(&p1->major, &p2->major) && + tomoyo_same_number_union(&p1->minor, &p2->minor); } +/** + * tomoyo_merge_mkdev_acl - Merge duplicated "struct tomoyo_mkdev_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * @is_delete: True for @a &= ~@b, false for @a |= @b. + * + * Returns true if @a is empty, false otherwise. + */ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a, - struct tomoyo_acl_info *b, - const bool is_delete) + struct tomoyo_acl_info *b, + const bool is_delete) { u8 *const a_perm = &container_of(a, struct tomoyo_mkdev_acl, head)->perm; @@ -660,37 +441,30 @@ static bool tomoyo_merge_mkdev_acl(struct tomoyo_acl_info *a, /** * tomoyo_update_mkdev_acl - Update "struct tomoyo_mkdev_acl" list. * - * @type: Type of operation. - * @filename: Filename. - * @mode: Create mode. - * @major: Device major number. - * @minor: Device minor number. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. + * @perm: Permission. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -static int tomoyo_update_mkdev_acl(const u8 type, const char *filename, - char *mode, char *major, char *minor, - struct tomoyo_domain_info * const - domain, const bool is_delete) +static int tomoyo_update_mkdev_acl(const u8 perm, + struct tomoyo_acl_param *param) { struct tomoyo_mkdev_acl e = { .head.type = TOMOYO_TYPE_MKDEV_ACL, - .perm = 1 << type + .perm = perm }; - int error = is_delete ? -ENOENT : -ENOMEM; - if (!tomoyo_parse_name_union(filename, &e.name) || - !tomoyo_parse_number_union(mode, &e.mode) || - !tomoyo_parse_number_union(major, &e.major) || - !tomoyo_parse_number_union(minor, &e.minor)) - goto out; - error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain, - tomoyo_same_mkdev_acl, - tomoyo_merge_mkdev_acl); - out: + int error; + if (!tomoyo_parse_name_union(param, &e.name) || + !tomoyo_parse_number_union(param, &e.mode) || + !tomoyo_parse_number_union(param, &e.major) || + !tomoyo_parse_number_union(param, &e.minor)) + error = -EINVAL; + else + error = tomoyo_update_domain(&e.head, sizeof(e), param, + tomoyo_same_mkdev_acl, + tomoyo_merge_mkdev_acl); tomoyo_put_name_union(&e.name); tomoyo_put_number_union(&e.mode); tomoyo_put_number_union(&e.major); @@ -698,16 +472,32 @@ static int tomoyo_update_mkdev_acl(const u8 type, const char *filename, return error; } +/** + * tomoyo_same_path2_acl - Check for duplicated "struct tomoyo_path2_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b except permission bits, false otherwise. + */ static bool tomoyo_same_path2_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { const struct tomoyo_path2_acl *p1 = container_of(a, typeof(*p1), head); const struct tomoyo_path2_acl *p2 = container_of(b, typeof(*p2), head); - return tomoyo_same_acl_head(&p1->head, &p2->head) - && tomoyo_same_name_union(&p1->name1, &p2->name1) - && tomoyo_same_name_union(&p1->name2, &p2->name2); + return tomoyo_same_name_union(&p1->name1, &p2->name1) && + tomoyo_same_name_union(&p1->name2, &p2->name2); } +/** + * tomoyo_merge_path2_acl - Merge duplicated "struct tomoyo_path2_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * @is_delete: True for @a &= ~@b, false for @a |= @b. + * + * Returns true if @a is empty, false otherwise. + */ static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) @@ -727,33 +517,28 @@ static bool tomoyo_merge_path2_acl(struct tomoyo_acl_info *a, /** * tomoyo_update_path2_acl - Update "struct tomoyo_path2_acl" list. * - * @type: Type of operation. - * @filename1: First filename. - * @filename2: Second filename. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. + * @perm: Permission. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -static int tomoyo_update_path2_acl(const u8 type, const char *filename1, - const char *filename2, - struct tomoyo_domain_info * const domain, - const bool is_delete) +static int tomoyo_update_path2_acl(const u8 perm, + struct tomoyo_acl_param *param) { struct tomoyo_path2_acl e = { .head.type = TOMOYO_TYPE_PATH2_ACL, - .perm = 1 << type + .perm = perm }; - int error = is_delete ? -ENOENT : -ENOMEM; - if (!tomoyo_parse_name_union(filename1, &e.name1) || - !tomoyo_parse_name_union(filename2, &e.name2)) - goto out; - error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain, - tomoyo_same_path2_acl, - tomoyo_merge_path2_acl); - out: + int error; + if (!tomoyo_parse_name_union(param, &e.name1) || + !tomoyo_parse_name_union(param, &e.name2)) + error = -EINVAL; + else + error = tomoyo_update_domain(&e.head, sizeof(e), param, + tomoyo_same_path2_acl, + tomoyo_merge_path2_acl); tomoyo_put_name_union(&e.name1); tomoyo_put_name_union(&e.name2); return error; @@ -775,9 +560,8 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, { int error; - next: r->type = tomoyo_p2mac[operation]; - r->mode = tomoyo_get_mode(r->profile, r->type); + r->mode = tomoyo_get_mode(r->domain->ns, r->profile, r->type); if (r->mode == TOMOYO_CONFIG_DISABLED) return 0; r->param_type = TOMOYO_TYPE_PATH_ACL; @@ -785,10 +569,6 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, r->param.path.operation = operation; do { tomoyo_check_acl(r, tomoyo_check_path_acl); - if (!r->granted && operation == TOMOYO_TYPE_READ && - !r->domain->ignore_global_allow_read && - tomoyo_globally_readable_file(filename)) - r->granted = true; error = tomoyo_audit_path_log(r); /* * Do not retry for execute request, for alias may have @@ -796,19 +576,17 @@ int tomoyo_path_permission(struct tomoyo_request_info *r, u8 operation, */ } while (error == TOMOYO_RETRY_REQUEST && operation != TOMOYO_TYPE_EXECUTE); - /* - * Since "allow_truncate" doesn't imply "allow_rewrite" permission, - * we need to check "allow_rewrite" permission if the filename is - * specified by "deny_rewrite" keyword. - */ - if (!error && operation == TOMOYO_TYPE_TRUNCATE && - tomoyo_no_rewrite_file(filename)) { - operation = TOMOYO_TYPE_REWRITE; - goto next; - } return error; } +/** + * tomoyo_same_path_number_acl - Check for duplicated "struct tomoyo_path_number_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b except permission bits, false otherwise. + */ static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a, const struct tomoyo_acl_info *b) { @@ -816,11 +594,19 @@ static bool tomoyo_same_path_number_acl(const struct tomoyo_acl_info *a, head); const struct tomoyo_path_number_acl *p2 = container_of(b, typeof(*p2), head); - return tomoyo_same_acl_head(&p1->head, &p2->head) - && tomoyo_same_name_union(&p1->name, &p2->name) - && tomoyo_same_number_union(&p1->number, &p2->number); + return tomoyo_same_name_union(&p1->name, &p2->name) && + tomoyo_same_number_union(&p1->number, &p2->number); } +/** + * tomoyo_merge_path_number_acl - Merge duplicated "struct tomoyo_path_number_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * @is_delete: True for @a &= ~@b, false for @a |= @b. + * + * Returns true if @a is empty, false otherwise. + */ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a, struct tomoyo_acl_info *b, const bool is_delete) @@ -841,33 +627,26 @@ static bool tomoyo_merge_path_number_acl(struct tomoyo_acl_info *a, /** * tomoyo_update_path_number_acl - Update ioctl/chmod/chown/chgrp ACL. * - * @type: Type of operation. - * @filename: Filename. - * @number: Number. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. + * @perm: Permission. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. */ -static int tomoyo_update_path_number_acl(const u8 type, const char *filename, - char *number, - struct tomoyo_domain_info * const - domain, - const bool is_delete) +static int tomoyo_update_path_number_acl(const u8 perm, + struct tomoyo_acl_param *param) { struct tomoyo_path_number_acl e = { .head.type = TOMOYO_TYPE_PATH_NUMBER_ACL, - .perm = 1 << type + .perm = perm }; - int error = is_delete ? -ENOENT : -ENOMEM; - if (!tomoyo_parse_name_union(filename, &e.name)) - return -EINVAL; - if (!tomoyo_parse_number_union(number, &e.number)) - goto out; - error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain, - tomoyo_same_path_number_acl, - tomoyo_merge_path_number_acl); - out: + int error; + if (!tomoyo_parse_name_union(param, &e.name) || + !tomoyo_parse_number_union(param, &e.number)) + error = -EINVAL; + else + error = tomoyo_update_domain(&e.head, sizeof(e), param, + tomoyo_same_path_number_acl, + tomoyo_merge_path_number_acl); tomoyo_put_name_union(&e.name); tomoyo_put_number_union(&e.number); return error; @@ -886,16 +665,20 @@ int tomoyo_path_number_perm(const u8 type, struct path *path, unsigned long number) { struct tomoyo_request_info r; + struct tomoyo_obj_info obj = { + .path1 = *path, + }; int error = -ENOMEM; struct tomoyo_path_info buf; int idx; if (tomoyo_init_request_info(&r, NULL, tomoyo_pn2mac[type]) - == TOMOYO_CONFIG_DISABLED || !path->mnt || !path->dentry) + == TOMOYO_CONFIG_DISABLED || !path->dentry) return 0; idx = tomoyo_read_lock(); if (!tomoyo_get_realpath(&buf, path)) goto out; + r.obj = &obj; if (type == TOMOYO_TYPE_MKDIR) tomoyo_add_slash(&buf); r.param_type = TOMOYO_TYPE_PATH_NUMBER_ACL; @@ -930,45 +713,30 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, int error = 0; struct tomoyo_path_info buf; struct tomoyo_request_info r; + struct tomoyo_obj_info obj = { + .path1 = *path, + }; int idx; - if (!path->mnt || - (path->dentry->d_inode && S_ISDIR(path->dentry->d_inode->i_mode))) - return 0; buf.name = NULL; r.mode = TOMOYO_CONFIG_DISABLED; idx = tomoyo_read_lock(); - /* - * If the filename is specified by "deny_rewrite" keyword, - * we need to check "allow_rewrite" permission when the filename is not - * opened for append mode or the filename is truncated at open time. - */ - if ((acc_mode & MAY_WRITE) && !(flag & O_APPEND) - && tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_REWRITE) + if (acc_mode && + tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN) != TOMOYO_CONFIG_DISABLED) { if (!tomoyo_get_realpath(&buf, path)) { error = -ENOMEM; goto out; } - if (tomoyo_no_rewrite_file(&buf)) - error = tomoyo_path_permission(&r, TOMOYO_TYPE_REWRITE, + r.obj = &obj; + if (acc_mode & MAY_READ) + error = tomoyo_path_permission(&r, TOMOYO_TYPE_READ, + &buf); + if (!error && (acc_mode & MAY_WRITE)) + error = tomoyo_path_permission(&r, (flag & O_APPEND) ? + TOMOYO_TYPE_APPEND : + TOMOYO_TYPE_WRITE, &buf); - } - if (!error && acc_mode && - tomoyo_init_request_info(&r, domain, TOMOYO_MAC_FILE_OPEN) - != TOMOYO_CONFIG_DISABLED) { - u8 operation; - if (!buf.name && !tomoyo_get_realpath(&buf, path)) { - error = -ENOMEM; - goto out; - } - if (acc_mode == (MAY_READ | MAY_WRITE)) - operation = TOMOYO_TYPE_READ_WRITE; - else if (acc_mode == MAY_READ) - operation = TOMOYO_TYPE_READ; - else - operation = TOMOYO_TYPE_WRITE; - error = tomoyo_path_permission(&r, operation, &buf); } out: kfree(buf.name); @@ -979,46 +747,57 @@ int tomoyo_check_open_permission(struct tomoyo_domain_info *domain, } /** - * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "rewrite", "chroot" and "unmount". + * tomoyo_path_perm - Check permission for "unlink", "rmdir", "truncate", "symlink", "append", "chroot" and "unmount". * * @operation: Type of operation. * @path: Pointer to "struct path". + * @target: Symlink's target if @operation is TOMOYO_TYPE_SYMLINK, + * NULL otherwise. * * Returns 0 on success, negative value otherwise. */ -int tomoyo_path_perm(const u8 operation, struct path *path) +int tomoyo_path_perm(const u8 operation, struct path *path, const char *target) { - int error = -ENOMEM; - struct tomoyo_path_info buf; struct tomoyo_request_info r; + struct tomoyo_obj_info obj = { + .path1 = *path, + }; + int error; + struct tomoyo_path_info buf; + bool is_enforce; + struct tomoyo_path_info symlink_target; int idx; - if (!path->mnt) - return 0; if (tomoyo_init_request_info(&r, NULL, tomoyo_p2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; + is_enforce = (r.mode == TOMOYO_CONFIG_ENFORCING); + error = -ENOMEM; buf.name = NULL; idx = tomoyo_read_lock(); if (!tomoyo_get_realpath(&buf, path)) goto out; + r.obj = &obj; switch (operation) { - case TOMOYO_TYPE_REWRITE: - if (!tomoyo_no_rewrite_file(&buf)) { - error = 0; - goto out; - } - break; case TOMOYO_TYPE_RMDIR: case TOMOYO_TYPE_CHROOT: tomoyo_add_slash(&buf); break; + case TOMOYO_TYPE_SYMLINK: + symlink_target.name = tomoyo_encode(target); + if (!symlink_target.name) + goto out; + tomoyo_fill_path_info(&symlink_target); + obj.symlink_target = &symlink_target; + break; } error = tomoyo_path_permission(&r, operation, &buf); + if (operation == TOMOYO_TYPE_SYMLINK) + kfree(symlink_target.name); out: kfree(buf.name); tomoyo_read_unlock(idx); - if (r.mode != TOMOYO_CONFIG_ENFORCING) + if (!is_enforce) error = 0; return error; } @@ -1034,20 +813,23 @@ int tomoyo_path_perm(const u8 operation, struct path *path) * Returns 0 on success, negative value otherwise. */ int tomoyo_mkdev_perm(const u8 operation, struct path *path, - const unsigned int mode, unsigned int dev) + const unsigned int mode, unsigned int dev) { struct tomoyo_request_info r; + struct tomoyo_obj_info obj = { + .path1 = *path, + }; int error = -ENOMEM; struct tomoyo_path_info buf; int idx; - if (!path->mnt || - tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation]) + if (tomoyo_init_request_info(&r, NULL, tomoyo_pnnn2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; idx = tomoyo_read_lock(); error = -ENOMEM; if (tomoyo_get_realpath(&buf, path)) { + r.obj = &obj; dev = new_decode_dev(dev); r.param_type = TOMOYO_TYPE_MKDEV_ACL; r.param.mkdev.filename = &buf; @@ -1081,10 +863,13 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1, struct tomoyo_path_info buf1; struct tomoyo_path_info buf2; struct tomoyo_request_info r; + struct tomoyo_obj_info obj = { + .path1 = *path1, + .path2 = *path2, + }; int idx; - if (!path1->mnt || !path2->mnt || - tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation]) + if (tomoyo_init_request_info(&r, NULL, tomoyo_pp2mac[operation]) == TOMOYO_CONFIG_DISABLED) return 0; buf1.name = NULL; @@ -1096,16 +881,17 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1, switch (operation) { struct dentry *dentry; case TOMOYO_TYPE_RENAME: - case TOMOYO_TYPE_LINK: + case TOMOYO_TYPE_LINK: dentry = path1->dentry; - if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) - break; - /* fall through */ - case TOMOYO_TYPE_PIVOT_ROOT: - tomoyo_add_slash(&buf1); - tomoyo_add_slash(&buf2); + if (!dentry->d_inode || !S_ISDIR(dentry->d_inode->i_mode)) + break; + /* fall through */ + case TOMOYO_TYPE_PIVOT_ROOT: + tomoyo_add_slash(&buf1); + tomoyo_add_slash(&buf2); break; - } + } + r.obj = &obj; r.param_type = TOMOYO_TYPE_PATH2_ACL; r.param.path2.operation = operation; r.param.path2.filename1 = &buf1; @@ -1124,53 +910,91 @@ int tomoyo_path2_perm(const u8 operation, struct path *path1, } /** + * tomoyo_same_mount_acl - Check for duplicated "struct tomoyo_mount_acl" entry. + * + * @a: Pointer to "struct tomoyo_acl_info". + * @b: Pointer to "struct tomoyo_acl_info". + * + * Returns true if @a == @b, false otherwise. + */ +static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a, + const struct tomoyo_acl_info *b) +{ + const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head); + const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head); + return tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) && + tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) && + tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) && + tomoyo_same_number_union(&p1->flags, &p2->flags); +} + +/** + * tomoyo_update_mount_acl - Write "struct tomoyo_mount_acl" list. + * + * @param: Pointer to "struct tomoyo_acl_param". + * + * Returns 0 on success, negative value otherwise. + * + * Caller holds tomoyo_read_lock(). + */ +static int tomoyo_update_mount_acl(struct tomoyo_acl_param *param) +{ + struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL }; + int error; + if (!tomoyo_parse_name_union(param, &e.dev_name) || + !tomoyo_parse_name_union(param, &e.dir_name) || + !tomoyo_parse_name_union(param, &e.fs_type) || + !tomoyo_parse_number_union(param, &e.flags)) + error = -EINVAL; + else + error = tomoyo_update_domain(&e.head, sizeof(e), param, + tomoyo_same_mount_acl, NULL); + tomoyo_put_name_union(&e.dev_name); + tomoyo_put_name_union(&e.dir_name); + tomoyo_put_name_union(&e.fs_type); + tomoyo_put_number_union(&e.flags); + return error; +} + +/** * tomoyo_write_file - Update file related list. * - * @data: String to parse. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. + * @param: Pointer to "struct tomoyo_acl_param". * * Returns 0 on success, negative value otherwise. * * Caller holds tomoyo_read_lock(). */ -int tomoyo_write_file(char *data, struct tomoyo_domain_info *domain, - const bool is_delete) +int tomoyo_write_file(struct tomoyo_acl_param *param) { - char *w[5]; + u16 perm = 0; u8 type; - if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0]) - return -EINVAL; - if (strncmp(w[0], "allow_", 6)) - goto out; - w[0] += 6; - for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) { - if (strcmp(w[0], tomoyo_path_keyword[type])) - continue; - return tomoyo_update_path_acl(type, w[1], domain, is_delete); - } - if (!w[2][0]) - goto out; - for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) { - if (strcmp(w[0], tomoyo_path2_keyword[type])) - continue; - return tomoyo_update_path2_acl(type, w[1], w[2], domain, - is_delete); - } - for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) { - if (strcmp(w[0], tomoyo_path_number_keyword[type])) - continue; - return tomoyo_update_path_number_acl(type, w[1], w[2], domain, - is_delete); - } - if (!w[3][0] || !w[4][0]) - goto out; - for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) { - if (strcmp(w[0], tomoyo_mkdev_keyword[type])) - continue; - return tomoyo_update_mkdev_acl(type, w[1], w[2], w[3], - w[4], domain, is_delete); - } - out: + const char *operation = tomoyo_read_token(param); + for (type = 0; type < TOMOYO_MAX_PATH_OPERATION; type++) + if (tomoyo_permstr(operation, tomoyo_path_keyword[type])) + perm |= 1 << type; + if (perm) + return tomoyo_update_path_acl(perm, param); + for (type = 0; type < TOMOYO_MAX_PATH2_OPERATION; type++) + if (tomoyo_permstr(operation, + tomoyo_mac_keywords[tomoyo_pp2mac[type]])) + perm |= 1 << type; + if (perm) + return tomoyo_update_path2_acl(perm, param); + for (type = 0; type < TOMOYO_MAX_PATH_NUMBER_OPERATION; type++) + if (tomoyo_permstr(operation, + tomoyo_mac_keywords[tomoyo_pn2mac[type]])) + perm |= 1 << type; + if (perm) + return tomoyo_update_path_number_acl(perm, param); + for (type = 0; type < TOMOYO_MAX_MKDEV_OPERATION; type++) + if (tomoyo_permstr(operation, + tomoyo_mac_keywords[tomoyo_pnnn2mac[type]])) + perm |= 1 << type; + if (perm) + return tomoyo_update_mkdev_acl(perm, param); + if (tomoyo_permstr(operation, + tomoyo_mac_keywords[TOMOYO_MAC_FILE_MOUNT])) + return tomoyo_update_mount_acl(param); return -EINVAL; } diff --git a/security/tomoyo/gc.c b/security/tomoyo/gc.c index a877e4c3b101..ae135fbbbe95 100644 --- a/security/tomoyo/gc.c +++ b/security/tomoyo/gc.c @@ -1,58 +1,205 @@ /* * security/tomoyo/gc.c * - * Implementation of the Domain-Based Mandatory Access Control. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION - * + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" #include <linux/kthread.h> #include <linux/slab.h> +/* The list for "struct tomoyo_io_buffer". */ +static LIST_HEAD(tomoyo_io_buffer_list); +/* Lock for protecting tomoyo_io_buffer_list. */ +static DEFINE_SPINLOCK(tomoyo_io_buffer_list_lock); + +/* Size of an element. */ +static const u8 tomoyo_element_size[TOMOYO_MAX_POLICY] = { + [TOMOYO_ID_GROUP] = sizeof(struct tomoyo_group), + [TOMOYO_ID_PATH_GROUP] = sizeof(struct tomoyo_path_group), + [TOMOYO_ID_NUMBER_GROUP] = sizeof(struct tomoyo_number_group), + [TOMOYO_ID_AGGREGATOR] = sizeof(struct tomoyo_aggregator), + [TOMOYO_ID_TRANSITION_CONTROL] = + sizeof(struct tomoyo_transition_control), + [TOMOYO_ID_MANAGER] = sizeof(struct tomoyo_manager), + /* [TOMOYO_ID_CONDITION] = "struct tomoyo_condition"->size, */ + /* [TOMOYO_ID_NAME] = "struct tomoyo_name"->size, */ + /* [TOMOYO_ID_ACL] = + tomoyo_acl_size["struct tomoyo_acl_info"->type], */ + [TOMOYO_ID_DOMAIN] = sizeof(struct tomoyo_domain_info), +}; + +/* Size of a domain ACL element. */ +static const u8 tomoyo_acl_size[] = { + [TOMOYO_TYPE_PATH_ACL] = sizeof(struct tomoyo_path_acl), + [TOMOYO_TYPE_PATH2_ACL] = sizeof(struct tomoyo_path2_acl), + [TOMOYO_TYPE_PATH_NUMBER_ACL] = sizeof(struct tomoyo_path_number_acl), + [TOMOYO_TYPE_MKDEV_ACL] = sizeof(struct tomoyo_mkdev_acl), + [TOMOYO_TYPE_MOUNT_ACL] = sizeof(struct tomoyo_mount_acl), +}; + +/** + * tomoyo_struct_used_by_io_buffer - Check whether the list element is used by /sys/kernel/security/tomoyo/ users or not. + * + * @element: Pointer to "struct list_head". + * + * Returns true if @element is used by /sys/kernel/security/tomoyo/ users, + * false otherwise. + */ +static bool tomoyo_struct_used_by_io_buffer(const struct list_head *element) +{ + struct tomoyo_io_buffer *head; + bool in_use = false; + + spin_lock(&tomoyo_io_buffer_list_lock); + list_for_each_entry(head, &tomoyo_io_buffer_list, list) { + head->users++; + spin_unlock(&tomoyo_io_buffer_list_lock); + if (mutex_lock_interruptible(&head->io_sem)) { + in_use = true; + goto out; + } + if (head->r.domain == element || head->r.group == element || + head->r.acl == element || &head->w.domain->list == element) + in_use = true; + mutex_unlock(&head->io_sem); +out: + spin_lock(&tomoyo_io_buffer_list_lock); + head->users--; + if (in_use) + break; + } + spin_unlock(&tomoyo_io_buffer_list_lock); + return in_use; +} + +/** + * tomoyo_name_used_by_io_buffer - Check whether the string is used by /sys/kernel/security/tomoyo/ users or not. + * + * @string: String to check. + * @size: Memory allocated for @string . + * + * Returns true if @string is used by /sys/kernel/security/tomoyo/ users, + * false otherwise. + */ +static bool tomoyo_name_used_by_io_buffer(const char *string, + const size_t size) +{ + struct tomoyo_io_buffer *head; + bool in_use = false; + + spin_lock(&tomoyo_io_buffer_list_lock); + list_for_each_entry(head, &tomoyo_io_buffer_list, list) { + int i; + head->users++; + spin_unlock(&tomoyo_io_buffer_list_lock); + if (mutex_lock_interruptible(&head->io_sem)) { + in_use = true; + goto out; + } + for (i = 0; i < TOMOYO_MAX_IO_READ_QUEUE; i++) { + const char *w = head->r.w[i]; + if (w < string || w > string + size) + continue; + in_use = true; + break; + } + mutex_unlock(&head->io_sem); +out: + spin_lock(&tomoyo_io_buffer_list_lock); + head->users--; + if (in_use) + break; + } + spin_unlock(&tomoyo_io_buffer_list_lock); + return in_use; +} + +/* Structure for garbage collection. */ struct tomoyo_gc { struct list_head list; - int type; + enum tomoyo_policy_id type; + size_t size; struct list_head *element; }; -static LIST_HEAD(tomoyo_gc_queue); -static DEFINE_MUTEX(tomoyo_gc_mutex); +/* List of entries to be deleted. */ +static LIST_HEAD(tomoyo_gc_list); +/* Length of tomoyo_gc_list. */ +static int tomoyo_gc_list_len; -/* Caller holds tomoyo_policy_lock mutex. */ +/** + * tomoyo_add_to_gc - Add an entry to to be deleted list. + * + * @type: One of values in "enum tomoyo_policy_id". + * @element: Pointer to "struct list_head". + * + * Returns true on success, false otherwise. + * + * Caller holds tomoyo_policy_lock mutex. + * + * Adding an entry needs kmalloc(). Thus, if we try to add thousands of + * entries at once, it will take too long time. Thus, do not add more than 128 + * entries per a scan. But to be able to handle worst case where all entries + * are in-use, we accept one more entry per a scan. + * + * If we use singly linked list using "struct list_head"->prev (which is + * LIST_POISON2), we can avoid kmalloc(). + */ static bool tomoyo_add_to_gc(const int type, struct list_head *element) { struct tomoyo_gc *entry = kzalloc(sizeof(*entry), GFP_ATOMIC); if (!entry) return false; entry->type = type; + if (type == TOMOYO_ID_ACL) + entry->size = tomoyo_acl_size[ + container_of(element, + typeof(struct tomoyo_acl_info), + list)->type]; + else if (type == TOMOYO_ID_NAME) + entry->size = strlen(container_of(element, + typeof(struct tomoyo_name), + head.list)->entry.name) + 1; + else if (type == TOMOYO_ID_CONDITION) + entry->size = + container_of(element, typeof(struct tomoyo_condition), + head.list)->size; + else + entry->size = tomoyo_element_size[type]; entry->element = element; - list_add(&entry->list, &tomoyo_gc_queue); + list_add(&entry->list, &tomoyo_gc_list); list_del_rcu(element); - return true; + return tomoyo_gc_list_len++ < 128; } -static void tomoyo_del_allow_read(struct list_head *element) -{ - struct tomoyo_readable_file *ptr = - container_of(element, typeof(*ptr), head.list); - tomoyo_put_name(ptr->filename); -} - -static void tomoyo_del_file_pattern(struct list_head *element) -{ - struct tomoyo_no_pattern *ptr = - container_of(element, typeof(*ptr), head.list); - tomoyo_put_name(ptr->pattern); -} - -static void tomoyo_del_no_rewrite(struct list_head *element) +/** + * tomoyo_element_linked_by_gc - Validate next element of an entry. + * + * @element: Pointer to an element. + * @size: Size of @element in byte. + * + * Returns true if @element is linked by other elements in the garbage + * collector's queue, false otherwise. + */ +static bool tomoyo_element_linked_by_gc(const u8 *element, const size_t size) { - struct tomoyo_no_rewrite *ptr = - container_of(element, typeof(*ptr), head.list); - tomoyo_put_name(ptr->pattern); + struct tomoyo_gc *p; + list_for_each_entry(p, &tomoyo_gc_list, list) { + const u8 *ptr = (const u8 *) p->element->next; + if (ptr < element || element + size < ptr) + continue; + return true; + } + return false; } +/** + * tomoyo_del_transition_control - Delete members in "struct tomoyo_transition_control". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_transition_control(struct list_head *element) { struct tomoyo_transition_control *ptr = @@ -61,6 +208,13 @@ static void tomoyo_del_transition_control(struct list_head *element) tomoyo_put_name(ptr->program); } +/** + * tomoyo_del_aggregator - Delete members in "struct tomoyo_aggregator". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_aggregator(struct list_head *element) { struct tomoyo_aggregator *ptr = @@ -69,6 +223,13 @@ static void tomoyo_del_aggregator(struct list_head *element) tomoyo_put_name(ptr->aggregated_name); } +/** + * tomoyo_del_manager - Delete members in "struct tomoyo_manager". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_manager(struct list_head *element) { struct tomoyo_manager *ptr = @@ -76,10 +237,18 @@ static void tomoyo_del_manager(struct list_head *element) tomoyo_put_name(ptr->manager); } +/** + * tomoyo_del_acl - Delete members in "struct tomoyo_acl_info". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_acl(struct list_head *element) { struct tomoyo_acl_info *acl = container_of(element, typeof(*acl), list); + tomoyo_put_condition(acl->cond); switch (acl->type) { case TOMOYO_TYPE_PATH_ACL: { @@ -127,6 +296,13 @@ static void tomoyo_del_acl(struct list_head *element) } } +/** + * tomoyo_del_domain - Delete members in "struct tomoyo_domain_info". + * + * @element: Pointer to "struct list_head". + * + * Returns true if deleted, false otherwise. + */ static bool tomoyo_del_domain(struct list_head *element) { struct tomoyo_domain_info *domain = @@ -165,13 +341,65 @@ static bool tomoyo_del_domain(struct list_head *element) return true; } +/** + * tomoyo_del_condition - Delete members in "struct tomoyo_condition". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ +void tomoyo_del_condition(struct list_head *element) +{ + struct tomoyo_condition *cond = container_of(element, typeof(*cond), + head.list); + const u16 condc = cond->condc; + const u16 numbers_count = cond->numbers_count; + const u16 names_count = cond->names_count; + const u16 argc = cond->argc; + const u16 envc = cond->envc; + unsigned int i; + const struct tomoyo_condition_element *condp + = (const struct tomoyo_condition_element *) (cond + 1); + struct tomoyo_number_union *numbers_p + = (struct tomoyo_number_union *) (condp + condc); + struct tomoyo_name_union *names_p + = (struct tomoyo_name_union *) (numbers_p + numbers_count); + const struct tomoyo_argv *argv + = (const struct tomoyo_argv *) (names_p + names_count); + const struct tomoyo_envp *envp + = (const struct tomoyo_envp *) (argv + argc); + for (i = 0; i < numbers_count; i++) + tomoyo_put_number_union(numbers_p++); + for (i = 0; i < names_count; i++) + tomoyo_put_name_union(names_p++); + for (i = 0; i < argc; argv++, i++) + tomoyo_put_name(argv->value); + for (i = 0; i < envc; envp++, i++) { + tomoyo_put_name(envp->name); + tomoyo_put_name(envp->value); + } +} +/** + * tomoyo_del_name - Delete members in "struct tomoyo_name". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_name(struct list_head *element) { const struct tomoyo_name *ptr = - container_of(element, typeof(*ptr), list); + container_of(element, typeof(*ptr), head.list); } +/** + * tomoyo_del_path_group - Delete members in "struct tomoyo_path_group". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_path_group(struct list_head *element) { struct tomoyo_path_group *member = @@ -179,20 +407,43 @@ static void tomoyo_del_path_group(struct list_head *element) tomoyo_put_name(member->member_name); } +/** + * tomoyo_del_group - Delete "struct tomoyo_group". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_group(struct list_head *element) { struct tomoyo_group *group = - container_of(element, typeof(*group), list); + container_of(element, typeof(*group), head.list); tomoyo_put_name(group->group_name); } +/** + * tomoyo_del_number_group - Delete members in "struct tomoyo_number_group". + * + * @element: Pointer to "struct list_head". + * + * Returns nothing. + */ static void tomoyo_del_number_group(struct list_head *element) { struct tomoyo_number_group *member = container_of(element, typeof(*member), head.list); } -static bool tomoyo_collect_member(struct list_head *member_list, int id) +/** + * tomoyo_collect_member - Delete elements with "struct tomoyo_acl_head". + * + * @id: One of values in "enum tomoyo_policy_id". + * @member_list: Pointer to "struct list_head". + * + * Returns true if some elements are deleted, false otherwise. + */ +static bool tomoyo_collect_member(const enum tomoyo_policy_id id, + struct list_head *member_list) { struct tomoyo_acl_head *member; list_for_each_entry(member, member_list, list) { @@ -201,13 +452,20 @@ static bool tomoyo_collect_member(struct list_head *member_list, int id) if (!tomoyo_add_to_gc(id, &member->list)) return false; } - return true; + return true; } -static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain) +/** + * tomoyo_collect_acl - Delete elements in "struct tomoyo_domain_info". + * + * @list: Pointer to "struct list_head". + * + * Returns true if some elements are deleted, false otherwise. + */ +static bool tomoyo_collect_acl(struct list_head *list) { struct tomoyo_acl_info *acl; - list_for_each_entry(acl, &domain->acl_info_list, list) { + list_for_each_entry(acl, list, list) { if (!acl->is_deleted) continue; if (!tomoyo_add_to_gc(TOMOYO_ID_ACL, &acl->list)) @@ -216,19 +474,24 @@ static bool tomoyo_collect_acl(struct tomoyo_domain_info *domain) return true; } +/** + * tomoyo_collect_entry - Scan lists for deleted elements. + * + * Returns nothing. + */ static void tomoyo_collect_entry(void) { int i; + enum tomoyo_policy_id id; + struct tomoyo_policy_namespace *ns; + int idx; if (mutex_lock_interruptible(&tomoyo_policy_lock)) return; - for (i = 0; i < TOMOYO_MAX_POLICY; i++) { - if (!tomoyo_collect_member(&tomoyo_policy_list[i], i)) - goto unlock; - } + idx = tomoyo_read_lock(); { struct tomoyo_domain_info *domain; list_for_each_entry_rcu(domain, &tomoyo_domain_list, list) { - if (!tomoyo_collect_acl(domain)) + if (!tomoyo_collect_acl(&domain->acl_info_list)) goto unlock; if (!domain->is_deleted || atomic_read(&domain->users)) continue; @@ -241,48 +504,93 @@ static void tomoyo_collect_entry(void) goto unlock; } } - for (i = 0; i < TOMOYO_MAX_HASH; i++) { - struct tomoyo_name *ptr; - list_for_each_entry_rcu(ptr, &tomoyo_name_list[i], list) { - if (atomic_read(&ptr->users)) - continue; - if (!tomoyo_add_to_gc(TOMOYO_ID_NAME, &ptr->list)) + list_for_each_entry_rcu(ns, &tomoyo_namespace_list, namespace_list) { + for (id = 0; id < TOMOYO_MAX_POLICY; id++) + if (!tomoyo_collect_member(id, &ns->policy_list[id])) goto unlock; + for (i = 0; i < TOMOYO_MAX_ACL_GROUPS; i++) + if (!tomoyo_collect_acl(&ns->acl_group[i])) + goto unlock; + for (i = 0; i < TOMOYO_MAX_GROUP; i++) { + struct list_head *list = &ns->group_list[i]; + struct tomoyo_group *group; + switch (i) { + case 0: + id = TOMOYO_ID_PATH_GROUP; + break; + default: + id = TOMOYO_ID_NUMBER_GROUP; + break; + } + list_for_each_entry(group, list, head.list) { + if (!tomoyo_collect_member + (id, &group->member_list)) + goto unlock; + if (!list_empty(&group->member_list) || + atomic_read(&group->head.users)) + continue; + if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP, + &group->head.list)) + goto unlock; + } } } - for (i = 0; i < TOMOYO_MAX_GROUP; i++) { - struct list_head *list = &tomoyo_group_list[i]; - int id; - struct tomoyo_group *group; - switch (i) { - case 0: - id = TOMOYO_ID_PATH_GROUP; - break; - default: - id = TOMOYO_ID_NUMBER_GROUP; - break; - } - list_for_each_entry(group, list, list) { - if (!tomoyo_collect_member(&group->member_list, id)) - goto unlock; - if (!list_empty(&group->member_list) || - atomic_read(&group->users)) + id = TOMOYO_ID_CONDITION; + for (i = 0; i < TOMOYO_MAX_HASH + 1; i++) { + struct list_head *list = !i ? + &tomoyo_condition_list : &tomoyo_name_list[i - 1]; + struct tomoyo_shared_acl_head *ptr; + list_for_each_entry(ptr, list, list) { + if (atomic_read(&ptr->users)) continue; - if (!tomoyo_add_to_gc(TOMOYO_ID_GROUP, &group->list)) + if (!tomoyo_add_to_gc(id, &ptr->list)) goto unlock; } + id = TOMOYO_ID_NAME; } - unlock: +unlock: + tomoyo_read_unlock(idx); mutex_unlock(&tomoyo_policy_lock); } -static void tomoyo_kfree_entry(void) +/** + * tomoyo_kfree_entry - Delete entries in tomoyo_gc_list. + * + * Returns true if some entries were kfree()d, false otherwise. + */ +static bool tomoyo_kfree_entry(void) { struct tomoyo_gc *p; struct tomoyo_gc *tmp; + bool result = false; - list_for_each_entry_safe(p, tmp, &tomoyo_gc_queue, list) { + list_for_each_entry_safe(p, tmp, &tomoyo_gc_list, list) { struct list_head *element = p->element; + + /* + * list_del_rcu() in tomoyo_add_to_gc() guarantees that the + * list element became no longer reachable from the list which + * the element was originally on (e.g. tomoyo_domain_list). + * Also, synchronize_srcu() in tomoyo_gc_thread() guarantees + * that the list element became no longer referenced by syscall + * users. + * + * However, there are three users which may still be using the + * list element. We need to defer until all of these users + * forget the list element. + * + * Firstly, defer until "struct tomoyo_io_buffer"->r.{domain, + * group,acl} and "struct tomoyo_io_buffer"->w.domain forget + * the list element. + */ + if (tomoyo_struct_used_by_io_buffer(element)) + continue; + /* + * Secondly, defer until all other elements in the + * tomoyo_gc_list list forget the list element. + */ + if (tomoyo_element_linked_by_gc((const u8 *) element, p->size)) + continue; switch (p->type) { case TOMOYO_ID_TRANSITION_CONTROL: tomoyo_del_transition_control(element); @@ -290,19 +598,21 @@ static void tomoyo_kfree_entry(void) case TOMOYO_ID_AGGREGATOR: tomoyo_del_aggregator(element); break; - case TOMOYO_ID_GLOBALLY_READABLE: - tomoyo_del_allow_read(element); - break; - case TOMOYO_ID_PATTERN: - tomoyo_del_file_pattern(element); - break; - case TOMOYO_ID_NO_REWRITE: - tomoyo_del_no_rewrite(element); - break; case TOMOYO_ID_MANAGER: tomoyo_del_manager(element); break; + case TOMOYO_ID_CONDITION: + tomoyo_del_condition(element); + break; case TOMOYO_ID_NAME: + /* + * Thirdly, defer until all "struct tomoyo_io_buffer" + * ->r.w[] forget the list element. + */ + if (tomoyo_name_used_by_io_buffer( + container_of(element, typeof(struct tomoyo_name), + head.list)->entry.name, p->size)) + continue; tomoyo_del_name(element); break; case TOMOYO_ID_ACL: @@ -321,34 +631,95 @@ static void tomoyo_kfree_entry(void) case TOMOYO_ID_NUMBER_GROUP: tomoyo_del_number_group(element); break; + case TOMOYO_MAX_POLICY: + break; } tomoyo_memory_free(element); list_del(&p->list); kfree(p); + tomoyo_gc_list_len--; + result = true; } + return result; } +/** + * tomoyo_gc_thread - Garbage collector thread function. + * + * @unused: Unused. + * + * In case OOM-killer choose this thread for termination, we create this thread + * as a short live thread whenever /sys/kernel/security/tomoyo/ interface was + * close()d. + * + * Returns 0. + */ static int tomoyo_gc_thread(void *unused) { + /* Garbage collector thread is exclusive. */ + static DEFINE_MUTEX(tomoyo_gc_mutex); + if (!mutex_trylock(&tomoyo_gc_mutex)) + goto out; daemonize("GC for TOMOYO"); - if (mutex_trylock(&tomoyo_gc_mutex)) { - int i; - for (i = 0; i < 10; i++) { - tomoyo_collect_entry(); - if (list_empty(&tomoyo_gc_queue)) - break; - synchronize_srcu(&tomoyo_ss); - tomoyo_kfree_entry(); + do { + tomoyo_collect_entry(); + if (list_empty(&tomoyo_gc_list)) + break; + synchronize_srcu(&tomoyo_ss); + } while (tomoyo_kfree_entry()); + { + struct tomoyo_io_buffer *head; + struct tomoyo_io_buffer *tmp; + + spin_lock(&tomoyo_io_buffer_list_lock); + list_for_each_entry_safe(head, tmp, &tomoyo_io_buffer_list, + list) { + if (head->users) + continue; + list_del(&head->list); + kfree(head->read_buf); + kfree(head->write_buf); + kfree(head); } - mutex_unlock(&tomoyo_gc_mutex); + spin_unlock(&tomoyo_io_buffer_list_lock); } - do_exit(0); + mutex_unlock(&tomoyo_gc_mutex); +out: + /* This acts as do_exit(0). */ + return 0; } -void tomoyo_run_gc(void) +/** + * tomoyo_notify_gc - Register/unregister /sys/kernel/security/tomoyo/ users. + * + * @head: Pointer to "struct tomoyo_io_buffer". + * @is_register: True if register, false if unregister. + * + * Returns nothing. + */ +void tomoyo_notify_gc(struct tomoyo_io_buffer *head, const bool is_register) { - struct task_struct *task = kthread_create(tomoyo_gc_thread, NULL, - "GC for TOMOYO"); - if (!IS_ERR(task)) - wake_up_process(task); + bool is_write = false; + + spin_lock(&tomoyo_io_buffer_list_lock); + if (is_register) { + head->users = 1; + list_add(&head->list, &tomoyo_io_buffer_list); + } else { + is_write = head->write_buf != NULL; + if (!--head->users) { + list_del(&head->list); + kfree(head->read_buf); + kfree(head->write_buf); + kfree(head); + } + } + spin_unlock(&tomoyo_io_buffer_list_lock); + if (is_write) { + struct task_struct *task = kthread_create(tomoyo_gc_thread, + NULL, + "GC for TOMOYO"); + if (!IS_ERR(task)) + wake_up_process(task); + } } diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c index e94352ce723f..5fb0e1298400 100644 --- a/security/tomoyo/group.c +++ b/security/tomoyo/group.c @@ -1,21 +1,37 @@ /* * security/tomoyo/group.c * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/slab.h> #include "common.h" +/** + * tomoyo_same_path_group - Check for duplicated "struct tomoyo_path_group" entry. + * + * @a: Pointer to "struct tomoyo_acl_head". + * @b: Pointer to "struct tomoyo_acl_head". + * + * Returns true if @a == @b, false otherwise. + */ static bool tomoyo_same_path_group(const struct tomoyo_acl_head *a, - const struct tomoyo_acl_head *b) + const struct tomoyo_acl_head *b) { return container_of(a, struct tomoyo_path_group, head)->member_name == container_of(b, struct tomoyo_path_group, head)->member_name; } +/** + * tomoyo_same_number_group - Check for duplicated "struct tomoyo_number_group" entry. + * + * @a: Pointer to "struct tomoyo_acl_head". + * @b: Pointer to "struct tomoyo_acl_head". + * + * Returns true if @a == @b, false otherwise. + */ static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a, - const struct tomoyo_acl_head *b) + const struct tomoyo_acl_head *b) { return !memcmp(&container_of(a, struct tomoyo_number_group, head) ->number, @@ -28,48 +44,41 @@ static bool tomoyo_same_number_group(const struct tomoyo_acl_head *a, /** * tomoyo_write_group - Write "struct tomoyo_path_group"/"struct tomoyo_number_group" list. * - * @data: String to parse. - * @is_delete: True if it is a delete request. - * @type: Type of this group. + * @param: Pointer to "struct tomoyo_acl_param". + * @type: Type of this group. * * Returns 0 on success, negative value otherwise. */ -int tomoyo_write_group(char *data, const bool is_delete, const u8 type) +int tomoyo_write_group(struct tomoyo_acl_param *param, const u8 type) { - struct tomoyo_group *group; - struct list_head *member; - char *w[2]; + struct tomoyo_group *group = tomoyo_get_group(param, type); int error = -EINVAL; - if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[1][0]) - return -EINVAL; - group = tomoyo_get_group(w[0], type); if (!group) return -ENOMEM; - member = &group->member_list; + param->list = &group->member_list; if (type == TOMOYO_PATH_GROUP) { struct tomoyo_path_group e = { }; - e.member_name = tomoyo_get_name(w[1]); + e.member_name = tomoyo_get_name(tomoyo_read_token(param)); if (!e.member_name) { error = -ENOMEM; goto out; } - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - member, tomoyo_same_path_group); + error = tomoyo_update_policy(&e.head, sizeof(e), param, + tomoyo_same_path_group); tomoyo_put_name(e.member_name); } else if (type == TOMOYO_NUMBER_GROUP) { struct tomoyo_number_group e = { }; - if (w[1][0] == '@' - || !tomoyo_parse_number_union(w[1], &e.number) - || e.number.values[0] > e.number.values[1]) + if (param->data[0] == '@' || + !tomoyo_parse_number_union(param, &e.number)) goto out; - error = tomoyo_update_policy(&e.head, sizeof(e), is_delete, - member, tomoyo_same_number_group); + error = tomoyo_update_policy(&e.head, sizeof(e), param, + tomoyo_same_number_group); /* * tomoyo_put_number_union() is not needed because - * w[1][0] != '@'. + * param->data[0] != '@'. */ } - out: +out: tomoyo_put_group(group); return error; } @@ -77,8 +86,8 @@ int tomoyo_write_group(char *data, const bool is_delete, const u8 type) /** * tomoyo_path_matches_group - Check whether the given pathname matches members of the given pathname group. * - * @pathname: The name of pathname. - * @group: Pointer to "struct tomoyo_path_group". + * @pathname: The name of pathname. + * @group: Pointer to "struct tomoyo_path_group". * * Returns matched member's pathname if @pathname matches pathnames in @group, * NULL otherwise. diff --git a/security/tomoyo/load_policy.c b/security/tomoyo/load_policy.c index 3312e5624f24..67975405140f 100644 --- a/security/tomoyo/load_policy.c +++ b/security/tomoyo/load_policy.c @@ -1,15 +1,32 @@ /* * security/tomoyo/load_policy.c * - * Policy loader launcher for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include "common.h" -/* path to policy loader */ -static const char *tomoyo_loader = "/sbin/tomoyo-init"; +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER + +/* + * Path to the policy loader. (default = CONFIG_SECURITY_TOMOYO_POLICY_LOADER) + */ +static const char *tomoyo_loader; + +/** + * tomoyo_loader_setup - Set policy loader. + * + * @str: Program to use as a policy loader (e.g. /sbin/tomoyo-init ). + * + * Returns 0. + */ +static int __init tomoyo_loader_setup(char *str) +{ + tomoyo_loader = str; + return 0; +} + +__setup("TOMOYO_loader=", tomoyo_loader_setup); /** * tomoyo_policy_loader_exists - Check whether /sbin/tomoyo-init exists. @@ -18,24 +35,38 @@ static const char *tomoyo_loader = "/sbin/tomoyo-init"; */ static bool tomoyo_policy_loader_exists(void) { - /* - * Don't activate MAC if the policy loader doesn't exist. - * If the initrd includes /sbin/init but real-root-dev has not - * mounted on / yet, activating MAC will block the system since - * policies are not loaded yet. - * Thus, let do_execve() call this function every time. - */ struct path path; - + if (!tomoyo_loader) + tomoyo_loader = CONFIG_SECURITY_TOMOYO_POLICY_LOADER; if (kern_path(tomoyo_loader, LOOKUP_FOLLOW, &path)) { - printk(KERN_INFO "Not activating Mandatory Access Control now " - "since %s doesn't exist.\n", tomoyo_loader); + printk(KERN_INFO "Not activating Mandatory Access Control " + "as %s does not exist.\n", tomoyo_loader); return false; } path_put(&path); return true; } +/* + * Path to the trigger. (default = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER) + */ +static const char *tomoyo_trigger; + +/** + * tomoyo_trigger_setup - Set trigger for activation. + * + * @str: Program to use as an activation trigger (e.g. /sbin/init ). + * + * Returns 0. + */ +static int __init tomoyo_trigger_setup(char *str) +{ + tomoyo_trigger = str; + return 0; +} + +__setup("TOMOYO_trigger=", tomoyo_trigger_setup); + /** * tomoyo_load_policy - Run external policy loader to load policy. * @@ -51,24 +82,19 @@ static bool tomoyo_policy_loader_exists(void) */ void tomoyo_load_policy(const char *filename) { + static bool done; char *argv[2]; char *envp[3]; - if (tomoyo_policy_loaded) + if (tomoyo_policy_loaded || done) return; - /* - * Check filename is /sbin/init or /sbin/tomoyo-start. - * /sbin/tomoyo-start is a dummy filename in case where /sbin/init can't - * be passed. - * You can create /sbin/tomoyo-start by - * "ln -s /bin/true /sbin/tomoyo-start". - */ - if (strcmp(filename, "/sbin/init") && - strcmp(filename, "/sbin/tomoyo-start")) + if (!tomoyo_trigger) + tomoyo_trigger = CONFIG_SECURITY_TOMOYO_ACTIVATION_TRIGGER; + if (strcmp(filename, tomoyo_trigger)) return; if (!tomoyo_policy_loader_exists()) return; - + done = true; printk(KERN_INFO "Calling %s to load policy. Please wait.\n", tomoyo_loader); argv[0] = (char *) tomoyo_loader; @@ -79,3 +105,5 @@ void tomoyo_load_policy(const char *filename) call_usermodehelper(argv[0], argv, envp, 1); tomoyo_check_profile(); } + +#endif diff --git a/security/tomoyo/memory.c b/security/tomoyo/memory.c index 42a7b1ba8cbf..7a56051146c2 100644 --- a/security/tomoyo/memory.c +++ b/security/tomoyo/memory.c @@ -1,9 +1,7 @@ /* * security/tomoyo/memory.c * - * Memory management functions for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/hash.h> @@ -29,10 +27,12 @@ void tomoyo_warn_oom(const char *function) panic("MAC Initialization failed.\n"); } -/* Memory allocated for policy. */ -static atomic_t tomoyo_policy_memory_size; -/* Quota for holding policy. */ -static unsigned int tomoyo_quota_for_policy; +/* Lock for protecting tomoyo_memory_used. */ +static DEFINE_SPINLOCK(tomoyo_policy_memory_lock); +/* Memoy currently used by policy/audit log/query. */ +unsigned int tomoyo_memory_used[TOMOYO_MAX_MEMORY_STAT]; +/* Memory quota for "policy"/"audit log"/"query". */ +unsigned int tomoyo_memory_quota[TOMOYO_MAX_MEMORY_STAT]; /** * tomoyo_memory_ok - Check memory quota. @@ -45,15 +45,20 @@ static unsigned int tomoyo_quota_for_policy; */ bool tomoyo_memory_ok(void *ptr) { - size_t s = ptr ? ksize(ptr) : 0; - atomic_add(s, &tomoyo_policy_memory_size); - if (ptr && (!tomoyo_quota_for_policy || - atomic_read(&tomoyo_policy_memory_size) - <= tomoyo_quota_for_policy)) { - memset(ptr, 0, s); - return true; + if (ptr) { + const size_t s = ksize(ptr); + bool result; + spin_lock(&tomoyo_policy_memory_lock); + tomoyo_memory_used[TOMOYO_MEMORY_POLICY] += s; + result = !tomoyo_memory_quota[TOMOYO_MEMORY_POLICY] || + tomoyo_memory_used[TOMOYO_MEMORY_POLICY] <= + tomoyo_memory_quota[TOMOYO_MEMORY_POLICY]; + if (!result) + tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s; + spin_unlock(&tomoyo_policy_memory_lock); + if (result) + return true; } - atomic_sub(s, &tomoyo_policy_memory_size); tomoyo_warn_oom(__func__); return false; } @@ -86,22 +91,28 @@ void *tomoyo_commit_ok(void *data, const unsigned int size) */ void tomoyo_memory_free(void *ptr) { - atomic_sub(ksize(ptr), &tomoyo_policy_memory_size); + size_t s = ksize(ptr); + spin_lock(&tomoyo_policy_memory_lock); + tomoyo_memory_used[TOMOYO_MEMORY_POLICY] -= s; + spin_unlock(&tomoyo_policy_memory_lock); kfree(ptr); } /** * tomoyo_get_group - Allocate memory for "struct tomoyo_path_group"/"struct tomoyo_number_group". * - * @group_name: The name of address group. - * @idx: Index number. + * @param: Pointer to "struct tomoyo_acl_param". + * @idx: Index number. * * Returns pointer to "struct tomoyo_group" on success, NULL otherwise. */ -struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx) +struct tomoyo_group *tomoyo_get_group(struct tomoyo_acl_param *param, + const u8 idx) { struct tomoyo_group e = { }; struct tomoyo_group *group = NULL; + struct list_head *list; + const char *group_name = tomoyo_read_token(param); bool found = false; if (!tomoyo_correct_word(group_name) || idx >= TOMOYO_MAX_GROUP) return NULL; @@ -110,10 +121,11 @@ struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx) return NULL; if (mutex_lock_interruptible(&tomoyo_policy_lock)) goto out; - list_for_each_entry(group, &tomoyo_group_list[idx], list) { + list = ¶m->ns->group_list[idx]; + list_for_each_entry(group, list, head.list) { if (e.group_name != group->group_name) continue; - atomic_inc(&group->users); + atomic_inc(&group->head.users); found = true; break; } @@ -121,15 +133,14 @@ struct tomoyo_group *tomoyo_get_group(const char *group_name, const u8 idx) struct tomoyo_group *entry = tomoyo_commit_ok(&e, sizeof(e)); if (entry) { INIT_LIST_HEAD(&entry->member_list); - atomic_set(&entry->users, 1); - list_add_tail_rcu(&entry->list, - &tomoyo_group_list[idx]); + atomic_set(&entry->head.users, 1); + list_add_tail_rcu(&entry->head.list, list); group = entry; found = true; } } mutex_unlock(&tomoyo_policy_lock); - out: +out: tomoyo_put_name(e.group_name); return found ? group : NULL; } @@ -154,7 +165,6 @@ const struct tomoyo_path_info *tomoyo_get_name(const char *name) struct tomoyo_name *ptr; unsigned int hash; int len; - int allocated_len; struct list_head *head; if (!name) @@ -164,120 +174,43 @@ const struct tomoyo_path_info *tomoyo_get_name(const char *name) head = &tomoyo_name_list[hash_long(hash, TOMOYO_HASH_BITS)]; if (mutex_lock_interruptible(&tomoyo_policy_lock)) return NULL; - list_for_each_entry(ptr, head, list) { + list_for_each_entry(ptr, head, head.list) { if (hash != ptr->entry.hash || strcmp(name, ptr->entry.name)) continue; - atomic_inc(&ptr->users); + atomic_inc(&ptr->head.users); goto out; } ptr = kzalloc(sizeof(*ptr) + len, GFP_NOFS); - allocated_len = ptr ? ksize(ptr) : 0; - if (!ptr || (tomoyo_quota_for_policy && - atomic_read(&tomoyo_policy_memory_size) + allocated_len - > tomoyo_quota_for_policy)) { + if (tomoyo_memory_ok(ptr)) { + ptr->entry.name = ((char *) ptr) + sizeof(*ptr); + memmove((char *) ptr->entry.name, name, len); + atomic_set(&ptr->head.users, 1); + tomoyo_fill_path_info(&ptr->entry); + list_add_tail(&ptr->head.list, head); + } else { kfree(ptr); ptr = NULL; - tomoyo_warn_oom(__func__); - goto out; } - atomic_add(allocated_len, &tomoyo_policy_memory_size); - ptr->entry.name = ((char *) ptr) + sizeof(*ptr); - memmove((char *) ptr->entry.name, name, len); - atomic_set(&ptr->users, 1); - tomoyo_fill_path_info(&ptr->entry); - list_add_tail(&ptr->list, head); - out: +out: mutex_unlock(&tomoyo_policy_lock); return ptr ? &ptr->entry : NULL; } +/* Initial namespace.*/ +struct tomoyo_policy_namespace tomoyo_kernel_namespace; + /** * tomoyo_mm_init - Initialize mm related code. */ void __init tomoyo_mm_init(void) { int idx; - - for (idx = 0; idx < TOMOYO_MAX_POLICY; idx++) - INIT_LIST_HEAD(&tomoyo_policy_list[idx]); - for (idx = 0; idx < TOMOYO_MAX_GROUP; idx++) - INIT_LIST_HEAD(&tomoyo_group_list[idx]); for (idx = 0; idx < TOMOYO_MAX_HASH; idx++) INIT_LIST_HEAD(&tomoyo_name_list[idx]); + tomoyo_kernel_namespace.name = "<kernel>"; + tomoyo_init_policy_namespace(&tomoyo_kernel_namespace); + tomoyo_kernel_domain.ns = &tomoyo_kernel_namespace; INIT_LIST_HEAD(&tomoyo_kernel_domain.acl_info_list); - tomoyo_kernel_domain.domainname = tomoyo_get_name(TOMOYO_ROOT_NAME); + tomoyo_kernel_domain.domainname = tomoyo_get_name("<kernel>"); list_add_tail_rcu(&tomoyo_kernel_domain.list, &tomoyo_domain_list); - idx = tomoyo_read_lock(); - if (tomoyo_find_domain(TOMOYO_ROOT_NAME) != &tomoyo_kernel_domain) - panic("Can't register tomoyo_kernel_domain"); - { - /* Load built-in policy. */ - tomoyo_write_transition_control("/sbin/hotplug", false, - TOMOYO_TRANSITION_CONTROL_INITIALIZE); - tomoyo_write_transition_control("/sbin/modprobe", false, - TOMOYO_TRANSITION_CONTROL_INITIALIZE); - } - tomoyo_read_unlock(idx); -} - - -/* Memory allocated for query lists. */ -unsigned int tomoyo_query_memory_size; -/* Quota for holding query lists. */ -unsigned int tomoyo_quota_for_query; - -/** - * tomoyo_read_memory_counter - Check for memory usage in bytes. - * - * @head: Pointer to "struct tomoyo_io_buffer". - * - * Returns memory usage. - */ -void tomoyo_read_memory_counter(struct tomoyo_io_buffer *head) -{ - if (!head->r.eof) { - const unsigned int policy - = atomic_read(&tomoyo_policy_memory_size); - const unsigned int query = tomoyo_query_memory_size; - char buffer[64]; - - memset(buffer, 0, sizeof(buffer)); - if (tomoyo_quota_for_policy) - snprintf(buffer, sizeof(buffer) - 1, - " (Quota: %10u)", - tomoyo_quota_for_policy); - else - buffer[0] = '\0'; - tomoyo_io_printf(head, "Policy: %10u%s\n", policy, - buffer); - if (tomoyo_quota_for_query) - snprintf(buffer, sizeof(buffer) - 1, - " (Quota: %10u)", - tomoyo_quota_for_query); - else - buffer[0] = '\0'; - tomoyo_io_printf(head, "Query lists: %10u%s\n", query, - buffer); - tomoyo_io_printf(head, "Total: %10u\n", policy + query); - head->r.eof = true; - } -} - -/** - * tomoyo_write_memory_quota - Set memory quota. - * - * @head: Pointer to "struct tomoyo_io_buffer". - * - * Returns 0. - */ -int tomoyo_write_memory_quota(struct tomoyo_io_buffer *head) -{ - char *data = head->write_buf; - unsigned int size; - - if (sscanf(data, "Policy: %u", &size) == 1) - tomoyo_quota_for_policy = size; - else if (sscanf(data, "Query lists: %u", &size) == 1) - tomoyo_quota_for_query = size; - return 0; } diff --git a/security/tomoyo/mount.c b/security/tomoyo/mount.c index 9fc2e15841c9..bee09d062057 100644 --- a/security/tomoyo/mount.c +++ b/security/tomoyo/mount.c @@ -1,28 +1,22 @@ /* * security/tomoyo/mount.c * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/slab.h> #include "common.h" -/* Keywords for mount restrictions. */ - -/* Allow to call 'mount --bind /source_dir /dest_dir' */ -#define TOMOYO_MOUNT_BIND_KEYWORD "--bind" -/* Allow to call 'mount --move /old_dir /new_dir ' */ -#define TOMOYO_MOUNT_MOVE_KEYWORD "--move" -/* Allow to call 'mount -o remount /dir ' */ -#define TOMOYO_MOUNT_REMOUNT_KEYWORD "--remount" -/* Allow to call 'mount --make-unbindable /dir' */ -#define TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD "--make-unbindable" -/* Allow to call 'mount --make-private /dir' */ -#define TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD "--make-private" -/* Allow to call 'mount --make-slave /dir' */ -#define TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD "--make-slave" -/* Allow to call 'mount --make-shared /dir' */ -#define TOMOYO_MOUNT_MAKE_SHARED_KEYWORD "--make-shared" +/* String table for special mount operations. */ +static const char * const tomoyo_mounts[TOMOYO_MAX_SPECIAL_MOUNT] = { + [TOMOYO_MOUNT_BIND] = "--bind", + [TOMOYO_MOUNT_MOVE] = "--move", + [TOMOYO_MOUNT_REMOUNT] = "--remount", + [TOMOYO_MOUNT_MAKE_UNBINDABLE] = "--make-unbindable", + [TOMOYO_MOUNT_MAKE_PRIVATE] = "--make-private", + [TOMOYO_MOUNT_MAKE_SLAVE] = "--make-slave", + [TOMOYO_MOUNT_MAKE_SHARED] = "--make-shared", +}; /** * tomoyo_audit_mount_log - Audit mount log. @@ -33,50 +27,42 @@ */ static int tomoyo_audit_mount_log(struct tomoyo_request_info *r) { - const char *dev = r->param.mount.dev->name; - const char *dir = r->param.mount.dir->name; - const char *type = r->param.mount.type->name; - const unsigned long flags = r->param.mount.flags; - if (r->granted) - return 0; - if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD)) - tomoyo_warn_log(r, "mount -o remount %s 0x%lX", dir, flags); - else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD) - || !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD)) - tomoyo_warn_log(r, "mount %s %s %s 0x%lX", type, dev, dir, - flags); - else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD)) - tomoyo_warn_log(r, "mount %s %s 0x%lX", type, dir, flags); - else - tomoyo_warn_log(r, "mount -t %s %s %s 0x%lX", type, dev, dir, - flags); - return tomoyo_supervisor(r, - TOMOYO_KEYWORD_ALLOW_MOUNT "%s %s %s 0x%lX\n", - tomoyo_pattern(r->param.mount.dev), - tomoyo_pattern(r->param.mount.dir), type, - flags); + return tomoyo_supervisor(r, "file mount %s %s %s 0x%lX\n", + r->param.mount.dev->name, + r->param.mount.dir->name, + r->param.mount.type->name, + r->param.mount.flags); } +/** + * tomoyo_check_mount_acl - Check permission for path path path number operation. + * + * @r: Pointer to "struct tomoyo_request_info". + * @ptr: Pointer to "struct tomoyo_acl_info". + * + * Returns true if granted, false otherwise. + */ static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r, const struct tomoyo_acl_info *ptr) { const struct tomoyo_mount_acl *acl = container_of(ptr, typeof(*acl), head); - return tomoyo_compare_number_union(r->param.mount.flags, &acl->flags) && - tomoyo_compare_name_union(r->param.mount.type, &acl->fs_type) && - tomoyo_compare_name_union(r->param.mount.dir, &acl->dir_name) && + return tomoyo_compare_number_union(r->param.mount.flags, + &acl->flags) && + tomoyo_compare_name_union(r->param.mount.type, + &acl->fs_type) && + tomoyo_compare_name_union(r->param.mount.dir, + &acl->dir_name) && (!r->param.mount.need_dev || - tomoyo_compare_name_union(r->param.mount.dev, &acl->dev_name)); + tomoyo_compare_name_union(r->param.mount.dev, + &acl->dev_name)); } /** * tomoyo_mount_acl - Check permission for mount() operation. * * @r: Pointer to "struct tomoyo_request_info". - * @dev_name: Name of device file. + * @dev_name: Name of device file. Maybe NULL. * @dir: Pointer to "struct path". * @type: Name of filesystem type. * @flags: Mount options. @@ -86,8 +72,10 @@ static bool tomoyo_check_mount_acl(struct tomoyo_request_info *r, * Caller holds tomoyo_read_lock(). */ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, - struct path *dir, char *type, unsigned long flags) + struct path *dir, const char *type, + unsigned long flags) { + struct tomoyo_obj_info obj = { }; struct path path; struct file_system_type *fstype = NULL; const char *requested_type = NULL; @@ -98,6 +86,7 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, struct tomoyo_path_info rdir; int need_dev = 0; int error = -ENOMEM; + r->obj = &obj; /* Get fstype. */ requested_type = tomoyo_encode(type); @@ -107,6 +96,7 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, tomoyo_fill_path_info(&rtype); /* Get mount point. */ + obj.path2 = *dir; requested_dir_name = tomoyo_realpath_from_path(dir); if (!requested_dir_name) { error = -ENOMEM; @@ -116,15 +106,15 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, tomoyo_fill_path_info(&rdir); /* Compare fs name. */ - if (!strcmp(type, TOMOYO_MOUNT_REMOUNT_KEYWORD)) { + if (type == tomoyo_mounts[TOMOYO_MOUNT_REMOUNT]) { /* dev_name is ignored. */ - } else if (!strcmp(type, TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MAKE_SHARED_KEYWORD)) { + } else if (type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE] || + type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE] || + type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE] || + type == tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED]) { /* dev_name is ignored. */ - } else if (!strcmp(type, TOMOYO_MOUNT_BIND_KEYWORD) || - !strcmp(type, TOMOYO_MOUNT_MOVE_KEYWORD)) { + } else if (type == tomoyo_mounts[TOMOYO_MOUNT_BIND] || + type == tomoyo_mounts[TOMOYO_MOUNT_MOVE]) { need_dev = -1; /* dev_name is a directory */ } else { fstype = get_fs_type(type); @@ -142,8 +132,8 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, error = -ENOENT; goto out; } + obj.path1 = path; requested_dev_name = tomoyo_realpath_from_path(&path); - path_put(&path); if (!requested_dev_name) { error = -ENOENT; goto out; @@ -176,22 +166,26 @@ static int tomoyo_mount_acl(struct tomoyo_request_info *r, char *dev_name, if (fstype) put_filesystem(fstype); kfree(requested_type); + /* Drop refcount obtained by kern_path(). */ + if (obj.path1.dentry) + path_put(&obj.path1); return error; } /** * tomoyo_mount_permission - Check permission for mount() operation. * - * @dev_name: Name of device file. + * @dev_name: Name of device file. Maybe NULL. * @path: Pointer to "struct path". - * @type: Name of filesystem type. May be NULL. + * @type: Name of filesystem type. Maybe NULL. * @flags: Mount options. - * @data_page: Optional data. May be NULL. + * @data_page: Optional data. Maybe NULL. * * Returns 0 on success, negative value otherwise. */ -int tomoyo_mount_permission(char *dev_name, struct path *path, char *type, - unsigned long flags, void *data_page) +int tomoyo_mount_permission(char *dev_name, struct path *path, + const char *type, unsigned long flags, + void *data_page) { struct tomoyo_request_info r; int error; @@ -203,31 +197,31 @@ int tomoyo_mount_permission(char *dev_name, struct path *path, char *type, if ((flags & MS_MGC_MSK) == MS_MGC_VAL) flags &= ~MS_MGC_MSK; if (flags & MS_REMOUNT) { - type = TOMOYO_MOUNT_REMOUNT_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_REMOUNT]; flags &= ~MS_REMOUNT; } if (flags & MS_MOVE) { - type = TOMOYO_MOUNT_MOVE_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_MOVE]; flags &= ~MS_MOVE; } if (flags & MS_BIND) { - type = TOMOYO_MOUNT_BIND_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_BIND]; flags &= ~MS_BIND; } if (flags & MS_UNBINDABLE) { - type = TOMOYO_MOUNT_MAKE_UNBINDABLE_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_UNBINDABLE]; flags &= ~MS_UNBINDABLE; } if (flags & MS_PRIVATE) { - type = TOMOYO_MOUNT_MAKE_PRIVATE_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_PRIVATE]; flags &= ~MS_PRIVATE; } if (flags & MS_SLAVE) { - type = TOMOYO_MOUNT_MAKE_SLAVE_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SLAVE]; flags &= ~MS_SLAVE; } if (flags & MS_SHARED) { - type = TOMOYO_MOUNT_MAKE_SHARED_KEYWORD; + type = tomoyo_mounts[TOMOYO_MOUNT_MAKE_SHARED]; flags &= ~MS_SHARED; } if (!type) @@ -237,49 +231,3 @@ int tomoyo_mount_permission(char *dev_name, struct path *path, char *type, tomoyo_read_unlock(idx); return error; } - -static bool tomoyo_same_mount_acl(const struct tomoyo_acl_info *a, - const struct tomoyo_acl_info *b) -{ - const struct tomoyo_mount_acl *p1 = container_of(a, typeof(*p1), head); - const struct tomoyo_mount_acl *p2 = container_of(b, typeof(*p2), head); - return tomoyo_same_acl_head(&p1->head, &p2->head) && - tomoyo_same_name_union(&p1->dev_name, &p2->dev_name) && - tomoyo_same_name_union(&p1->dir_name, &p2->dir_name) && - tomoyo_same_name_union(&p1->fs_type, &p2->fs_type) && - tomoyo_same_number_union(&p1->flags, &p2->flags); -} - -/** - * tomoyo_write_mount - Write "struct tomoyo_mount_acl" list. - * - * @data: String to parse. - * @domain: Pointer to "struct tomoyo_domain_info". - * @is_delete: True if it is a delete request. - * - * Returns 0 on success, negative value otherwise. - * - * Caller holds tomoyo_read_lock(). - */ -int tomoyo_write_mount(char *data, struct tomoyo_domain_info *domain, - const bool is_delete) -{ - struct tomoyo_mount_acl e = { .head.type = TOMOYO_TYPE_MOUNT_ACL }; - int error = is_delete ? -ENOENT : -ENOMEM; - char *w[4]; - if (!tomoyo_tokenize(data, w, sizeof(w)) || !w[3][0]) - return -EINVAL; - if (!tomoyo_parse_name_union(w[0], &e.dev_name) || - !tomoyo_parse_name_union(w[1], &e.dir_name) || - !tomoyo_parse_name_union(w[2], &e.fs_type) || - !tomoyo_parse_number_union(w[3], &e.flags)) - goto out; - error = tomoyo_update_domain(&e.head, sizeof(e), is_delete, domain, - tomoyo_same_mount_acl, NULL); - out: - tomoyo_put_name_union(&e.dev_name); - tomoyo_put_name_union(&e.dir_name); - tomoyo_put_name_union(&e.fs_type); - tomoyo_put_number_union(&e.flags); - return error; -} diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c index 8d95e91c9fc4..6c601bd300f3 100644 --- a/security/tomoyo/realpath.c +++ b/security/tomoyo/realpath.c @@ -1,9 +1,7 @@ /* * security/tomoyo/realpath.c * - * Pathname calculation functions for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/types.h> @@ -70,6 +68,161 @@ char *tomoyo_encode(const char *str) } /** + * tomoyo_get_absolute_path - Get the path of a dentry but ignores chroot'ed root. + * + * @path: Pointer to "struct path". + * @buffer: Pointer to buffer to return value in. + * @buflen: Sizeof @buffer. + * + * Returns the buffer on success, an error code otherwise. + * + * If dentry is a directory, trailing '/' is appended. + */ +static char *tomoyo_get_absolute_path(struct path *path, char * const buffer, + const int buflen) +{ + char *pos = ERR_PTR(-ENOMEM); + if (buflen >= 256) { + struct path ns_root = { }; + /* go to whatever namespace root we are under */ + pos = __d_path(path, &ns_root, buffer, buflen - 1); + if (!IS_ERR(pos) && *pos == '/' && pos[1]) { + struct inode *inode = path->dentry->d_inode; + if (inode && S_ISDIR(inode->i_mode)) { + buffer[buflen - 2] = '/'; + buffer[buflen - 1] = '\0'; + } + } + } + return pos; +} + +/** + * tomoyo_get_dentry_path - Get the path of a dentry. + * + * @dentry: Pointer to "struct dentry". + * @buffer: Pointer to buffer to return value in. + * @buflen: Sizeof @buffer. + * + * Returns the buffer on success, an error code otherwise. + * + * If dentry is a directory, trailing '/' is appended. + */ +static char *tomoyo_get_dentry_path(struct dentry *dentry, char * const buffer, + const int buflen) +{ + char *pos = ERR_PTR(-ENOMEM); + if (buflen >= 256) { + pos = dentry_path_raw(dentry, buffer, buflen - 1); + if (!IS_ERR(pos) && *pos == '/' && pos[1]) { + struct inode *inode = dentry->d_inode; + if (inode && S_ISDIR(inode->i_mode)) { + buffer[buflen - 2] = '/'; + buffer[buflen - 1] = '\0'; + } + } + } + return pos; +} + +/** + * tomoyo_get_local_path - Get the path of a dentry. + * + * @dentry: Pointer to "struct dentry". + * @buffer: Pointer to buffer to return value in. + * @buflen: Sizeof @buffer. + * + * Returns the buffer on success, an error code otherwise. + */ +static char *tomoyo_get_local_path(struct dentry *dentry, char * const buffer, + const int buflen) +{ + struct super_block *sb = dentry->d_sb; + char *pos = tomoyo_get_dentry_path(dentry, buffer, buflen); + if (IS_ERR(pos)) + return pos; + /* Convert from $PID to self if $PID is current thread. */ + if (sb->s_magic == PROC_SUPER_MAGIC && *pos == '/') { + char *ep; + const pid_t pid = (pid_t) simple_strtoul(pos + 1, &ep, 10); + if (*ep == '/' && pid && pid == + task_tgid_nr_ns(current, sb->s_fs_info)) { + pos = ep - 5; + if (pos < buffer) + goto out; + memmove(pos, "/self", 5); + } + goto prepend_filesystem_name; + } + /* Use filesystem name for unnamed devices. */ + if (!MAJOR(sb->s_dev)) + goto prepend_filesystem_name; + { + struct inode *inode = sb->s_root->d_inode; + /* + * Use filesystem name if filesystem does not support rename() + * operation. + */ + if (inode->i_op && !inode->i_op->rename) + goto prepend_filesystem_name; + } + /* Prepend device name. */ + { + char name[64]; + int name_len; + const dev_t dev = sb->s_dev; + name[sizeof(name) - 1] = '\0'; + snprintf(name, sizeof(name) - 1, "dev(%u,%u):", MAJOR(dev), + MINOR(dev)); + name_len = strlen(name); + pos -= name_len; + if (pos < buffer) + goto out; + memmove(pos, name, name_len); + return pos; + } + /* Prepend filesystem name. */ +prepend_filesystem_name: + { + const char *name = sb->s_type->name; + const int name_len = strlen(name); + pos -= name_len + 1; + if (pos < buffer) + goto out; + memmove(pos, name, name_len); + pos[name_len] = ':'; + } + return pos; +out: + return ERR_PTR(-ENOMEM); +} + +/** + * tomoyo_get_socket_name - Get the name of a socket. + * + * @path: Pointer to "struct path". + * @buffer: Pointer to buffer to return value in. + * @buflen: Sizeof @buffer. + * + * Returns the buffer. + */ +static char *tomoyo_get_socket_name(struct path *path, char * const buffer, + const int buflen) +{ + struct inode *inode = path->dentry->d_inode; + struct socket *sock = inode ? SOCKET_I(inode) : NULL; + struct sock *sk = sock ? sock->sk : NULL; + if (sk) { + snprintf(buffer, buflen, "socket:[family=%u:type=%u:" + "protocol=%u]", sk->sk_family, sk->sk_type, + sk->sk_protocol); + } else { + snprintf(buffer, buflen, "socket:[unknown]"); + } + return buffer; +} + +/** * tomoyo_realpath_from_path - Returns realpath(3) of the given pathname but ignores chroot'ed root. * * @path: Pointer to "struct path". @@ -90,55 +243,42 @@ char *tomoyo_realpath_from_path(struct path *path) char *name = NULL; unsigned int buf_len = PAGE_SIZE / 2; struct dentry *dentry = path->dentry; - bool is_dir; + struct super_block *sb; if (!dentry) return NULL; - is_dir = dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode); + sb = dentry->d_sb; while (1) { - struct path ns_root = { .mnt = NULL, .dentry = NULL }; char *pos; + struct inode *inode; buf_len <<= 1; kfree(buf); buf = kmalloc(buf_len, GFP_NOFS); if (!buf) break; + /* To make sure that pos is '\0' terminated. */ + buf[buf_len - 1] = '\0'; /* Get better name for socket. */ - if (dentry->d_sb->s_magic == SOCKFS_MAGIC) { - struct inode *inode = dentry->d_inode; - struct socket *sock = inode ? SOCKET_I(inode) : NULL; - struct sock *sk = sock ? sock->sk : NULL; - if (sk) { - snprintf(buf, buf_len - 1, "socket:[family=%u:" - "type=%u:protocol=%u]", sk->sk_family, - sk->sk_type, sk->sk_protocol); - } else { - snprintf(buf, buf_len - 1, "socket:[unknown]"); - } - name = tomoyo_encode(buf); - break; + if (sb->s_magic == SOCKFS_MAGIC) { + pos = tomoyo_get_socket_name(path, buf, buf_len - 1); + goto encode; } - /* For "socket:[\$]" and "pipe:[\$]". */ + /* For "pipe:[\$]". */ if (dentry->d_op && dentry->d_op->d_dname) { pos = dentry->d_op->d_dname(dentry, buf, buf_len - 1); - if (IS_ERR(pos)) - continue; - name = tomoyo_encode(pos); - break; - } - /* If we don't have a vfsmount, we can't calculate. */ - if (!path->mnt) - break; - /* go to whatever namespace root we are under */ - pos = __d_path(path, &ns_root, buf, buf_len); - /* Prepend "/proc" prefix if using internal proc vfs mount. */ - if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) && - (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) { - pos -= 5; - if (pos >= buf) - memcpy(pos, "/proc", 5); - else - pos = ERR_PTR(-ENOMEM); + goto encode; } + inode = sb->s_root->d_inode; + /* + * Get local name for filesystems without rename() operation + * or dentry without vfsmount. + */ + if (!path->mnt || (inode->i_op && !inode->i_op->rename)) + pos = tomoyo_get_local_path(path->dentry, buf, + buf_len - 1); + /* Get absolute name for the rest. */ + else + pos = tomoyo_get_absolute_path(path, buf, buf_len - 1); +encode: if (IS_ERR(pos)) continue; name = tomoyo_encode(pos); @@ -147,16 +287,6 @@ char *tomoyo_realpath_from_path(struct path *path) kfree(buf); if (!name) tomoyo_warn_oom(__func__); - else if (is_dir && *name) { - /* Append trailing '/' if dentry is a directory. */ - char *pos = name + strlen(name) - 1; - if (*pos != '/') - /* - * This is OK because tomoyo_encode() reserves space - * for appending "/". - */ - *++pos = '/'; - } return name; } diff --git a/security/tomoyo/securityfs_if.c b/security/tomoyo/securityfs_if.c index e43d5554b506..a49c3bfd4dd5 100644 --- a/security/tomoyo/securityfs_if.c +++ b/security/tomoyo/securityfs_if.c @@ -1,9 +1,7 @@ /* - * security/tomoyo/common.c + * security/tomoyo/securityfs_if.c * - * Securityfs interface for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/security.h> @@ -34,11 +32,11 @@ static int tomoyo_open(struct inode *inode, struct file *file) */ static int tomoyo_release(struct inode *inode, struct file *file) { - return tomoyo_close_control(file); + return tomoyo_close_control(file->private_data); } /** - * tomoyo_poll - poll() for /proc/ccs/ interface. + * tomoyo_poll - poll() for /sys/kernel/security/tomoyo/ interface. * * @file: Pointer to "struct file". * @wait: Pointer to "poll_table". @@ -63,7 +61,7 @@ static unsigned int tomoyo_poll(struct file *file, poll_table *wait) static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) { - return tomoyo_read_control(file, buf, count); + return tomoyo_read_control(file->private_data, buf, count); } /** @@ -79,7 +77,7 @@ static ssize_t tomoyo_read(struct file *file, char __user *buf, size_t count, static ssize_t tomoyo_write(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { - return tomoyo_write_control(file, buf, count); + return tomoyo_write_control(file->private_data, buf, count); } /* @@ -135,14 +133,14 @@ static int __init tomoyo_initerface_init(void) TOMOYO_DOMAINPOLICY); tomoyo_create_entry("exception_policy", 0600, tomoyo_dir, TOMOYO_EXCEPTIONPOLICY); + tomoyo_create_entry("audit", 0400, tomoyo_dir, + TOMOYO_AUDIT); tomoyo_create_entry("self_domain", 0400, tomoyo_dir, TOMOYO_SELFDOMAIN); - tomoyo_create_entry(".domain_status", 0600, tomoyo_dir, - TOMOYO_DOMAIN_STATUS); tomoyo_create_entry(".process_status", 0600, tomoyo_dir, TOMOYO_PROCESS_STATUS); - tomoyo_create_entry("meminfo", 0600, tomoyo_dir, - TOMOYO_MEMINFO); + tomoyo_create_entry("stat", 0644, tomoyo_dir, + TOMOYO_STAT); tomoyo_create_entry("profile", 0600, tomoyo_dir, TOMOYO_PROFILE); tomoyo_create_entry("manager", 0600, tomoyo_dir, diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c index 95d3f9572237..f776400a8f31 100644 --- a/security/tomoyo/tomoyo.c +++ b/security/tomoyo/tomoyo.c @@ -1,20 +1,35 @@ /* * security/tomoyo/tomoyo.c * - * LSM hooks for TOMOYO Linux. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/security.h> #include "common.h" +/** + * tomoyo_cred_alloc_blank - Target for security_cred_alloc_blank(). + * + * @new: Pointer to "struct cred". + * @gfp: Memory allocation flags. + * + * Returns 0. + */ static int tomoyo_cred_alloc_blank(struct cred *new, gfp_t gfp) { new->security = NULL; return 0; } +/** + * tomoyo_cred_prepare - Target for security_prepare_creds(). + * + * @new: Pointer to "struct cred". + * @old: Pointer to "struct cred". + * @gfp: Memory allocation flags. + * + * Returns 0. + */ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, gfp_t gfp) { @@ -25,11 +40,22 @@ static int tomoyo_cred_prepare(struct cred *new, const struct cred *old, return 0; } +/** + * tomoyo_cred_transfer - Target for security_transfer_creds(). + * + * @new: Pointer to "struct cred". + * @old: Pointer to "struct cred". + */ static void tomoyo_cred_transfer(struct cred *new, const struct cred *old) { tomoyo_cred_prepare(new, old, 0); } +/** + * tomoyo_cred_free - Target for security_cred_free(). + * + * @cred: Pointer to "struct cred". + */ static void tomoyo_cred_free(struct cred *cred) { struct tomoyo_domain_info *domain = cred->security; @@ -37,6 +63,13 @@ static void tomoyo_cred_free(struct cred *cred) atomic_dec(&domain->users); } +/** + * tomoyo_bprm_set_creds - Target for security_bprm_set_creds(). + * + * @bprm: Pointer to "struct linux_binprm". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) { int rc; @@ -51,12 +84,14 @@ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) */ if (bprm->cred_prepared) return 0; +#ifndef CONFIG_SECURITY_TOMOYO_OMIT_USERSPACE_LOADER /* * Load policy if /sbin/tomoyo-init exists and /sbin/init is requested * for the first time. */ if (!tomoyo_policy_loaded) tomoyo_load_policy(bprm->filename); +#endif /* * Release reference to "struct tomoyo_domain_info" stored inside * "bprm->cred->security". New reference to "struct tomoyo_domain_info" @@ -73,6 +108,13 @@ static int tomoyo_bprm_set_creds(struct linux_binprm *bprm) return 0; } +/** + * tomoyo_bprm_check_security - Target for security_bprm_check(). + * + * @bprm: Pointer to "struct linux_binprm". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_bprm_check_security(struct linux_binprm *bprm) { struct tomoyo_domain_info *domain = bprm->cred->security; @@ -90,20 +132,59 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm) /* * Read permission is checked against interpreters using next domain. */ - return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY); + return tomoyo_check_open_permission(domain, &bprm->file->f_path, + O_RDONLY); +} + +/** + * tomoyo_inode_getattr - Target for security_inode_getattr(). + * + * @mnt: Pointer to "struct vfsmount". + * @dentry: Pointer to "struct dentry". + * + * Returns 0 on success, negative value otherwise. + */ +static int tomoyo_inode_getattr(struct vfsmount *mnt, struct dentry *dentry) +{ + struct path path = { mnt, dentry }; + return tomoyo_path_perm(TOMOYO_TYPE_GETATTR, &path, NULL); } +/** + * tomoyo_path_truncate - Target for security_path_truncate(). + * + * @path: Pointer to "struct path". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_truncate(struct path *path) { - return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path); + return tomoyo_path_perm(TOMOYO_TYPE_TRUNCATE, path, NULL); } +/** + * tomoyo_path_unlink - Target for security_path_unlink(). + * + * @parent: Pointer to "struct path". + * @dentry: Pointer to "struct dentry". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_unlink(struct path *parent, struct dentry *dentry) { struct path path = { parent->mnt, dentry }; - return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path); + return tomoyo_path_perm(TOMOYO_TYPE_UNLINK, &path, NULL); } +/** + * tomoyo_path_mkdir - Target for security_path_mkdir(). + * + * @parent: Pointer to "struct path". + * @dentry: Pointer to "struct dentry". + * @mode: DAC permission mode. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry, int mode) { @@ -112,19 +193,46 @@ static int tomoyo_path_mkdir(struct path *parent, struct dentry *dentry, mode & S_IALLUGO); } +/** + * tomoyo_path_rmdir - Target for security_path_rmdir(). + * + * @parent: Pointer to "struct path". + * @dentry: Pointer to "struct dentry". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_rmdir(struct path *parent, struct dentry *dentry) { struct path path = { parent->mnt, dentry }; - return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path); + return tomoyo_path_perm(TOMOYO_TYPE_RMDIR, &path, NULL); } +/** + * tomoyo_path_symlink - Target for security_path_symlink(). + * + * @parent: Pointer to "struct path". + * @dentry: Pointer to "struct dentry". + * @old_name: Symlink's content. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_symlink(struct path *parent, struct dentry *dentry, const char *old_name) { struct path path = { parent->mnt, dentry }; - return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path); + return tomoyo_path_perm(TOMOYO_TYPE_SYMLINK, &path, old_name); } +/** + * tomoyo_path_mknod - Target for security_path_mknod(). + * + * @parent: Pointer to "struct path". + * @dentry: Pointer to "struct dentry". + * @mode: DAC permission mode. + * @dev: Device attributes. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry, int mode, unsigned int dev) { @@ -155,6 +263,15 @@ static int tomoyo_path_mknod(struct path *parent, struct dentry *dentry, return tomoyo_path_number_perm(type, &path, perm); } +/** + * tomoyo_path_link - Target for security_path_link(). + * + * @old_dentry: Pointer to "struct dentry". + * @new_dir: Pointer to "struct path". + * @new_dentry: Pointer to "struct dentry". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir, struct dentry *new_dentry) { @@ -163,6 +280,16 @@ static int tomoyo_path_link(struct dentry *old_dentry, struct path *new_dir, return tomoyo_path2_perm(TOMOYO_TYPE_LINK, &path1, &path2); } +/** + * tomoyo_path_rename - Target for security_path_rename(). + * + * @old_parent: Pointer to "struct path". + * @old_dentry: Pointer to "struct dentry". + * @new_parent: Pointer to "struct path". + * @new_dentry: Pointer to "struct dentry". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_rename(struct path *old_parent, struct dentry *old_dentry, struct path *new_parent, @@ -173,14 +300,32 @@ static int tomoyo_path_rename(struct path *old_parent, return tomoyo_path2_perm(TOMOYO_TYPE_RENAME, &path1, &path2); } +/** + * tomoyo_file_fcntl - Target for security_file_fcntl(). + * + * @file: Pointer to "struct file". + * @cmd: Command for fcntl(). + * @arg: Argument for @cmd. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg) { - if (cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND)) - return tomoyo_path_perm(TOMOYO_TYPE_REWRITE, &file->f_path); - return 0; + if (!(cmd == F_SETFL && ((arg ^ file->f_flags) & O_APPEND))) + return 0; + return tomoyo_check_open_permission(tomoyo_domain(), &file->f_path, + O_WRONLY | (arg & O_APPEND)); } +/** + * tomoyo_dentry_open - Target for security_dentry_open(). + * + * @f: Pointer to "struct file". + * @cred: Pointer to "struct cred". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_dentry_open(struct file *f, const struct cred *cred) { int flags = f->f_flags; @@ -190,12 +335,30 @@ static int tomoyo_dentry_open(struct file *f, const struct cred *cred) return tomoyo_check_open_permission(tomoyo_domain(), &f->f_path, flags); } +/** + * tomoyo_file_ioctl - Target for security_file_ioctl(). + * + * @file: Pointer to "struct file". + * @cmd: Command for ioctl(). + * @arg: Argument for @cmd. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_file_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { return tomoyo_path_number_perm(TOMOYO_TYPE_IOCTL, &file->f_path, cmd); } +/** + * tomoyo_path_chmod - Target for security_path_chmod(). + * + * @dentry: Pointer to "struct dentry". + * @mnt: Pointer to "struct vfsmount". + * @mode: DAC permission mode. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode_t mode) { @@ -204,6 +367,15 @@ static int tomoyo_path_chmod(struct dentry *dentry, struct vfsmount *mnt, mode & S_IALLUGO); } +/** + * tomoyo_path_chown - Target for security_path_chown(). + * + * @path: Pointer to "struct path". + * @uid: Owner ID. + * @gid: Group ID. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid) { int error = 0; @@ -214,23 +386,57 @@ static int tomoyo_path_chown(struct path *path, uid_t uid, gid_t gid) return error; } +/** + * tomoyo_path_chroot - Target for security_path_chroot(). + * + * @path: Pointer to "struct path". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_path_chroot(struct path *path) { - return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path); + return tomoyo_path_perm(TOMOYO_TYPE_CHROOT, path, NULL); } +/** + * tomoyo_sb_mount - Target for security_sb_mount(). + * + * @dev_name: Name of device file. Maybe NULL. + * @path: Pointer to "struct path". + * @type: Name of filesystem type. Maybe NULL. + * @flags: Mount options. + * @data: Optional data. Maybe NULL. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_sb_mount(char *dev_name, struct path *path, char *type, unsigned long flags, void *data) { return tomoyo_mount_permission(dev_name, path, type, flags, data); } +/** + * tomoyo_sb_umount - Target for security_sb_umount(). + * + * @mnt: Pointer to "struct vfsmount". + * @flags: Unmount options. + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_sb_umount(struct vfsmount *mnt, int flags) { struct path path = { mnt, mnt->mnt_root }; - return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path); + return tomoyo_path_perm(TOMOYO_TYPE_UMOUNT, &path, NULL); } +/** + * tomoyo_sb_pivotroot - Target for security_sb_pivotroot(). + * + * @old_path: Pointer to "struct path". + * @new_path: Pointer to "struct path". + * + * Returns 0 on success, negative value otherwise. + */ static int tomoyo_sb_pivotroot(struct path *old_path, struct path *new_path) { return tomoyo_path2_perm(TOMOYO_TYPE_PIVOT_ROOT, new_path, old_path); @@ -258,6 +464,7 @@ static struct security_operations tomoyo_security_ops = { .path_mknod = tomoyo_path_mknod, .path_link = tomoyo_path_link, .path_rename = tomoyo_path_rename, + .inode_getattr = tomoyo_inode_getattr, .file_ioctl = tomoyo_file_ioctl, .path_chmod = tomoyo_path_chmod, .path_chown = tomoyo_path_chown, @@ -270,6 +477,11 @@ static struct security_operations tomoyo_security_ops = { /* Lock for GC. */ struct srcu_struct tomoyo_ss; +/** + * tomoyo_init - Register TOMOYO Linux as a LSM module. + * + * Returns 0. + */ static int __init tomoyo_init(void) { struct cred *cred = (struct cred *) current_cred(); diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 6d5393204d95..c36bd1107fc8 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -1,9 +1,7 @@ /* * security/tomoyo/util.c * - * Utility functions for TOMOYO. - * - * Copyright (C) 2005-2010 NTT DATA CORPORATION + * Copyright (C) 2005-2011 NTT DATA CORPORATION */ #include <linux/slab.h> @@ -15,18 +13,130 @@ DEFINE_MUTEX(tomoyo_policy_lock); /* Has /sbin/init started? */ bool tomoyo_policy_loaded; +/* + * Mapping table from "enum tomoyo_mac_index" to + * "enum tomoyo_mac_category_index". + */ +const u8 tomoyo_index2category[TOMOYO_MAX_MAC_INDEX] = { + /* CONFIG::file group */ + [TOMOYO_MAC_FILE_EXECUTE] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_OPEN] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_CREATE] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_UNLINK] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_GETATTR] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MKDIR] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_RMDIR] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MKFIFO] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MKSOCK] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_TRUNCATE] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_SYMLINK] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MKBLOCK] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MKCHAR] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_LINK] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_RENAME] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_CHMOD] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_CHOWN] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_CHGRP] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_IOCTL] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_CHROOT] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_MOUNT] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_UMOUNT] = TOMOYO_MAC_CATEGORY_FILE, + [TOMOYO_MAC_FILE_PIVOT_ROOT] = TOMOYO_MAC_CATEGORY_FILE, +}; + +/** + * tomoyo_convert_time - Convert time_t to YYYY/MM/DD hh/mm/ss. + * + * @time: Seconds since 1970/01/01 00:00:00. + * @stamp: Pointer to "struct tomoyo_time". + * + * Returns nothing. + * + * This function does not handle Y2038 problem. + */ +void tomoyo_convert_time(time_t time, struct tomoyo_time *stamp) +{ + static const u16 tomoyo_eom[2][12] = { + { 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365 }, + { 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + u16 y; + u8 m; + bool r; + stamp->sec = time % 60; + time /= 60; + stamp->min = time % 60; + time /= 60; + stamp->hour = time % 24; + time /= 24; + for (y = 1970; ; y++) { + const unsigned short days = (y & 3) ? 365 : 366; + if (time < days) + break; + time -= days; + } + r = (y & 3) == 0; + for (m = 0; m < 11 && time >= tomoyo_eom[r][m]; m++) + ; + if (m) + time -= tomoyo_eom[r][m - 1]; + stamp->year = y; + stamp->month = ++m; + stamp->day = ++time; +} + +/** + * tomoyo_permstr - Find permission keywords. + * + * @string: String representation for permissions in foo/bar/buz format. + * @keyword: Keyword to find from @string/ + * + * Returns ture if @keyword was found in @string, false otherwise. + * + * This function assumes that strncmp(w1, w2, strlen(w1)) != 0 if w1 != w2. + */ +bool tomoyo_permstr(const char *string, const char *keyword) +{ + const char *cp = strstr(string, keyword); + if (cp) + return cp == string || *(cp - 1) == '/'; + return false; +} + +/** + * tomoyo_read_token - Read a word from a line. + * + * @param: Pointer to "struct tomoyo_acl_param". + * + * Returns a word on success, "" otherwise. + * + * To allow the caller to skip NULL check, this function returns "" rather than + * NULL if there is no more words to read. + */ +char *tomoyo_read_token(struct tomoyo_acl_param *param) +{ + char *pos = param->data; + char *del = strchr(pos, ' '); + if (del) + *del++ = '\0'; + else + del = pos + strlen(pos); + param->data = del; + return pos; +} + /** * tomoyo_parse_ulong - Parse an "unsigned long" value. * * @result: Pointer to "unsigned long". * @str: Pointer to string to parse. * - * Returns value type on success, 0 otherwise. + * Returns one of values in "enum tomoyo_value_type". * * The @src is updated to point the first character after the value * on success. */ -static u8 tomoyo_parse_ulong(unsigned long *result, char **str) +u8 tomoyo_parse_ulong(unsigned long *result, char **str) { const char *cp = *str; char *ep; @@ -43,7 +153,7 @@ static u8 tomoyo_parse_ulong(unsigned long *result, char **str) } *result = simple_strtoul(cp, &ep, base); if (cp == ep) - return 0; + return TOMOYO_VALUE_TYPE_INVALID; *str = ep; switch (base) { case 16: @@ -81,63 +191,65 @@ void tomoyo_print_ulong(char *buffer, const int buffer_len, /** * tomoyo_parse_name_union - Parse a tomoyo_name_union. * - * @filename: Name or name group. - * @ptr: Pointer to "struct tomoyo_name_union". + * @param: Pointer to "struct tomoyo_acl_param". + * @ptr: Pointer to "struct tomoyo_name_union". * * Returns true on success, false otherwise. */ -bool tomoyo_parse_name_union(const char *filename, +bool tomoyo_parse_name_union(struct tomoyo_acl_param *param, struct tomoyo_name_union *ptr) { - if (!tomoyo_correct_word(filename)) - return false; - if (filename[0] == '@') { - ptr->group = tomoyo_get_group(filename + 1, TOMOYO_PATH_GROUP); - ptr->is_group = true; + char *filename; + if (param->data[0] == '@') { + param->data++; + ptr->group = tomoyo_get_group(param, TOMOYO_PATH_GROUP); return ptr->group != NULL; } + filename = tomoyo_read_token(param); + if (!tomoyo_correct_word(filename)) + return false; ptr->filename = tomoyo_get_name(filename); - ptr->is_group = false; return ptr->filename != NULL; } /** * tomoyo_parse_number_union - Parse a tomoyo_number_union. * - * @data: Number or number range or number group. - * @ptr: Pointer to "struct tomoyo_number_union". + * @param: Pointer to "struct tomoyo_acl_param". + * @ptr: Pointer to "struct tomoyo_number_union". * * Returns true on success, false otherwise. */ -bool tomoyo_parse_number_union(char *data, struct tomoyo_number_union *num) +bool tomoyo_parse_number_union(struct tomoyo_acl_param *param, + struct tomoyo_number_union *ptr) { + char *data; u8 type; unsigned long v; - memset(num, 0, sizeof(*num)); - if (data[0] == '@') { - if (!tomoyo_correct_word(data)) - return false; - num->group = tomoyo_get_group(data + 1, TOMOYO_NUMBER_GROUP); - num->is_group = true; - return num->group != NULL; + memset(ptr, 0, sizeof(*ptr)); + if (param->data[0] == '@') { + param->data++; + ptr->group = tomoyo_get_group(param, TOMOYO_NUMBER_GROUP); + return ptr->group != NULL; } + data = tomoyo_read_token(param); type = tomoyo_parse_ulong(&v, &data); - if (!type) + if (type == TOMOYO_VALUE_TYPE_INVALID) return false; - num->values[0] = v; - num->min_type = type; + ptr->values[0] = v; + ptr->value_type[0] = type; if (!*data) { - num->values[1] = v; - num->max_type = type; + ptr->values[1] = v; + ptr->value_type[1] = type; return true; } if (*data++ != '-') return false; type = tomoyo_parse_ulong(&v, &data); - if (!type || *data) + if (type == TOMOYO_VALUE_TYPE_INVALID || *data || ptr->values[0] > v) return false; - num->values[1] = v; - num->max_type = type; + ptr->values[1] = v; + ptr->value_type[1] = type; return true; } @@ -185,6 +297,30 @@ static inline u8 tomoyo_make_byte(const u8 c1, const u8 c2, const u8 c3) } /** + * tomoyo_valid - Check whether the character is a valid char. + * + * @c: The character to check. + * + * Returns true if @c is a valid character, false otherwise. + */ +static inline bool tomoyo_valid(const unsigned char c) +{ + return c > ' ' && c < 127; +} + +/** + * tomoyo_invalid - Check whether the character is an invalid char. + * + * @c: The character to check. + * + * Returns true if @c is an invalid character, false otherwise. + */ +static inline bool tomoyo_invalid(const unsigned char c) +{ + return c && (c <= ' ' || c >= 127); +} + +/** * tomoyo_str_starts - Check whether the given string starts with the given keyword. * * @src: Pointer to pointer to the string. @@ -238,36 +374,9 @@ void tomoyo_normalize_line(unsigned char *buffer) } /** - * tomoyo_tokenize - Tokenize string. - * - * @buffer: The line to tokenize. - * @w: Pointer to "char *". - * @size: Sizeof @w . - * - * Returns true on success, false otherwise. - */ -bool tomoyo_tokenize(char *buffer, char *w[], size_t size) -{ - int count = size / sizeof(char *); - int i; - for (i = 0; i < count; i++) - w[i] = ""; - for (i = 0; i < count; i++) { - char *cp = strchr(buffer, ' '); - if (cp) - *cp = '\0'; - w[i] = buffer; - if (!cp) - break; - buffer = cp + 1; - } - return i < count || !*buffer; -} - -/** * tomoyo_correct_word2 - Validate a string. * - * @string: The string to check. May be non-'\0'-terminated. + * @string: The string to check. Maybe non-'\0'-terminated. * @len: Length of @string. * * Check whether the given string follows the naming rules. @@ -377,26 +486,21 @@ bool tomoyo_correct_path(const char *filename) */ bool tomoyo_correct_domain(const unsigned char *domainname) { - if (!domainname || strncmp(domainname, TOMOYO_ROOT_NAME, - TOMOYO_ROOT_NAME_LEN)) - goto out; - domainname += TOMOYO_ROOT_NAME_LEN; - if (!*domainname) + if (!domainname || !tomoyo_domain_def(domainname)) + return false; + domainname = strchr(domainname, ' '); + if (!domainname++) return true; - if (*domainname++ != ' ') - goto out; while (1) { const unsigned char *cp = strchr(domainname, ' '); if (!cp) break; if (*domainname != '/' || !tomoyo_correct_word2(domainname, cp - domainname)) - goto out; + return false; domainname = cp + 1; } return tomoyo_correct_path(domainname); - out: - return false; } /** @@ -408,7 +512,19 @@ bool tomoyo_correct_domain(const unsigned char *domainname) */ bool tomoyo_domain_def(const unsigned char *buffer) { - return !strncmp(buffer, TOMOYO_ROOT_NAME, TOMOYO_ROOT_NAME_LEN); + const unsigned char *cp; + int len; + if (*buffer != '<') + return false; + cp = strchr(buffer, ' '); + if (!cp) + len = strlen(buffer); + else + len = cp - buffer; + if (buffer[len - 1] != '>' || + !tomoyo_correct_word2(buffer + 1, len - 2)) + return false; + return true; } /** @@ -794,22 +910,24 @@ const char *tomoyo_get_exe(void) /** * tomoyo_get_mode - Get MAC mode. * + * @ns: Pointer to "struct tomoyo_policy_namespace". * @profile: Profile number. * @index: Index number of functionality. * * Returns mode. */ -int tomoyo_get_mode(const u8 profile, const u8 index) +int tomoyo_get_mode(const struct tomoyo_policy_namespace *ns, const u8 profile, + const u8 index) { u8 mode; const u8 category = TOMOYO_MAC_CATEGORY_FILE; if (!tomoyo_policy_loaded) return TOMOYO_CONFIG_DISABLED; - mode = tomoyo_profile(profile)->config[index]; + mode = tomoyo_profile(ns, profile)->config[index]; if (mode == TOMOYO_CONFIG_USE_DEFAULT) - mode = tomoyo_profile(profile)->config[category]; + mode = tomoyo_profile(ns, profile)->config[category]; if (mode == TOMOYO_CONFIG_USE_DEFAULT) - mode = tomoyo_profile(profile)->default_config; + mode = tomoyo_profile(ns, profile)->default_config; return mode & 3; } @@ -833,65 +951,11 @@ int tomoyo_init_request_info(struct tomoyo_request_info *r, profile = domain->profile; r->profile = profile; r->type = index; - r->mode = tomoyo_get_mode(profile, index); + r->mode = tomoyo_get_mode(domain->ns, profile, index); return r->mode; } /** - * tomoyo_last_word - Get last component of a line. - * - * @line: A line. - * - * Returns the last word of a line. - */ -const char *tomoyo_last_word(const char *name) -{ - const char *cp = strrchr(name, ' '); - if (cp) - return cp + 1; - return name; -} - -/** - * tomoyo_warn_log - Print warning or error message on console. - * - * @r: Pointer to "struct tomoyo_request_info". - * @fmt: The printf()'s format string, followed by parameters. - */ -void tomoyo_warn_log(struct tomoyo_request_info *r, const char *fmt, ...) -{ - va_list args; - char *buffer; - const struct tomoyo_domain_info * const domain = r->domain; - const struct tomoyo_profile *profile = tomoyo_profile(domain->profile); - switch (r->mode) { - case TOMOYO_CONFIG_ENFORCING: - if (!profile->enforcing->enforcing_verbose) - return; - break; - case TOMOYO_CONFIG_PERMISSIVE: - if (!profile->permissive->permissive_verbose) - return; - break; - case TOMOYO_CONFIG_LEARNING: - if (!profile->learning->learning_verbose) - return; - break; - } - buffer = kmalloc(4096, GFP_NOFS); - if (!buffer) - return; - va_start(args, fmt); - vsnprintf(buffer, 4095, fmt, args); - va_end(args); - buffer[4095] = '\0'; - printk(KERN_WARNING "%s: Access %s denied for %s\n", - r->mode == TOMOYO_CONFIG_ENFORCING ? "ERROR" : "WARNING", buffer, - tomoyo_last_word(domain->domainname->name)); - kfree(buffer); -} - -/** * tomoyo_domain_quota_is_ok - Check for domain's quota. * * @r: Pointer to "struct tomoyo_request_info". @@ -911,52 +975,43 @@ bool tomoyo_domain_quota_is_ok(struct tomoyo_request_info *r) if (!domain) return true; list_for_each_entry_rcu(ptr, &domain->acl_info_list, list) { + u16 perm; + u8 i; if (ptr->is_deleted) continue; switch (ptr->type) { - u16 perm; - u8 i; case TOMOYO_TYPE_PATH_ACL: perm = container_of(ptr, struct tomoyo_path_acl, head) ->perm; - for (i = 0; i < TOMOYO_MAX_PATH_OPERATION; i++) - if (perm & (1 << i)) - count++; - if (perm & (1 << TOMOYO_TYPE_READ_WRITE)) - count -= 2; break; case TOMOYO_TYPE_PATH2_ACL: perm = container_of(ptr, struct tomoyo_path2_acl, head) ->perm; - for (i = 0; i < TOMOYO_MAX_PATH2_OPERATION; i++) - if (perm & (1 << i)) - count++; break; case TOMOYO_TYPE_PATH_NUMBER_ACL: perm = container_of(ptr, struct tomoyo_path_number_acl, head)->perm; - for (i = 0; i < TOMOYO_MAX_PATH_NUMBER_OPERATION; i++) - if (perm & (1 << i)) - count++; break; case TOMOYO_TYPE_MKDEV_ACL: perm = container_of(ptr, struct tomoyo_mkdev_acl, head)->perm; - for (i = 0; i < TOMOYO_MAX_MKDEV_OPERATION; i++) - if (perm & (1 << i)) - count++; break; default: - count++; + perm = 1; } + for (i = 0; i < 16; i++) + if (perm & (1 << i)) + count++; } - if (count < tomoyo_profile(domain->profile)->learning-> - learning_max_entry) + if (count < tomoyo_profile(domain->ns, domain->profile)-> + pref[TOMOYO_PREF_MAX_LEARNING_ENTRY]) return true; - if (!domain->quota_warned) { - domain->quota_warned = true; - printk(KERN_WARNING "TOMOYO-WARNING: " - "Domain '%s' has so many ACLs to hold. " + if (!domain->flags[TOMOYO_DIF_QUOTA_WARNED]) { + domain->flags[TOMOYO_DIF_QUOTA_WARNED] = true; + /* r->granted = false; */ + tomoyo_write_log(r, "%s", tomoyo_dif[TOMOYO_DIF_QUOTA_WARNED]); + printk(KERN_WARNING "WARNING: " + "Domain '%s' has too many ACLs to hold. " "Stopped learning mode.\n", domain->domainname->name); } return false; diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index f1341308beda..86d0caf91b35 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -128,7 +128,8 @@ void snd_pcm_playback_silence(struct snd_pcm_substream *substream, snd_pcm_ufram } } -static void pcm_debug_name(struct snd_pcm_substream *substream, +#ifdef CONFIG_SND_DEBUG +void snd_pcm_debug_name(struct snd_pcm_substream *substream, char *name, size_t len) { snprintf(name, len, "pcmC%dD%d%c:%d", @@ -137,6 +138,8 @@ static void pcm_debug_name(struct snd_pcm_substream *substream, substream->stream ? 'c' : 'p', substream->number); } +EXPORT_SYMBOL(snd_pcm_debug_name); +#endif #define XRUN_DEBUG_BASIC (1<<0) #define XRUN_DEBUG_STACK (1<<1) /* dump also stack */ @@ -168,7 +171,7 @@ static void xrun(struct snd_pcm_substream *substream) snd_pcm_stop(substream, SNDRV_PCM_STATE_XRUN); if (xrun_debug(substream, XRUN_DEBUG_BASIC)) { char name[16]; - pcm_debug_name(substream, name, sizeof(name)); + snd_pcm_debug_name(substream, name, sizeof(name)); snd_printd(KERN_DEBUG "XRUN: %s\n", name); dump_stack_on_xrun(substream); } @@ -243,7 +246,7 @@ static void xrun_log_show(struct snd_pcm_substream *substream) return; if (xrun_debug(substream, XRUN_DEBUG_LOGONCE) && log->hit) return; - pcm_debug_name(substream, name, sizeof(name)); + snd_pcm_debug_name(substream, name, sizeof(name)); for (cnt = 0, idx = log->idx; cnt < XRUN_LOG_CNT; cnt++) { entry = &log->entries[idx]; if (entry->period_size == 0) @@ -319,7 +322,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, if (pos >= runtime->buffer_size) { if (printk_ratelimit()) { char name[16]; - pcm_debug_name(substream, name, sizeof(name)); + snd_pcm_debug_name(substream, name, sizeof(name)); xrun_log_show(substream); snd_printd(KERN_ERR "BUG: %s, pos = %ld, " "buffer size = %ld, period size = %ld\n", @@ -364,7 +367,7 @@ static int snd_pcm_update_hw_ptr0(struct snd_pcm_substream *substream, if (xrun_debug(substream, in_interrupt ? XRUN_DEBUG_PERIODUPDATE : XRUN_DEBUG_HWPTRUPDATE)) { char name[16]; - pcm_debug_name(substream, name, sizeof(name)); + snd_pcm_debug_name(substream, name, sizeof(name)); snd_printd("%s_update: %s: pos=%u/%u/%u, " "hwptr=%ld/%ld/%ld/%ld\n", in_interrupt ? "period" : "hwptr", diff --git a/sound/oss/ad1848.c b/sound/oss/ad1848.c index 4d2a6ae978f7..8a197fd3c57e 100644 --- a/sound/oss/ad1848.c +++ b/sound/oss/ad1848.c @@ -458,7 +458,7 @@ static int ad1848_set_recmask(ad1848_info * devc, int mask) return mask; } -static void change_bits(ad1848_info * devc, unsigned char *regval, +static void oss_change_bits(ad1848_info *devc, unsigned char *regval, unsigned char *muteval, int dev, int chn, int newval) { unsigned char mask; @@ -516,10 +516,10 @@ static void ad1848_mixer_set_channel(ad1848_info *devc, int dev, int value, int if (muteregoffs != regoffs) { muteval = ad_read(devc, muteregoffs); - change_bits(devc, &val, &muteval, dev, channel, value); + oss_change_bits(devc, &val, &muteval, dev, channel, value); } else - change_bits(devc, &val, &val, dev, channel, value); + oss_change_bits(devc, &val, &val, dev, channel, value); spin_lock_irqsave(&devc->lock,flags); ad_write(devc, regoffs, val); diff --git a/sound/oss/sb_mixer.c b/sound/oss/sb_mixer.c index 2039d31b7e22..f8f3b7a66b73 100644 --- a/sound/oss/sb_mixer.c +++ b/sound/oss/sb_mixer.c @@ -232,7 +232,7 @@ static int detect_mixer(sb_devc * devc) return 1; } -static void change_bits(sb_devc * devc, unsigned char *regval, int dev, int chn, int newval) +static void oss_change_bits(sb_devc *devc, unsigned char *regval, int dev, int chn, int newval) { unsigned char mask; int shift; @@ -284,7 +284,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right) return -EINVAL; val = sb_getmixer(devc, regoffs); - change_bits(devc, &val, dev, LEFT_CHN, left); + oss_change_bits(devc, &val, dev, LEFT_CHN, left); if ((*devc->iomap)[dev][RIGHT_CHN].regno != regoffs) /* * Change register @@ -304,7 +304,7 @@ int sb_common_mixer_set(sb_devc * devc, int dev, int left, int right) * Read the new one */ } - change_bits(devc, &val, dev, RIGHT_CHN, right); + oss_change_bits(devc, &val, dev, RIGHT_CHN, right); sb_setmixer(devc, regoffs, val); diff --git a/sound/pci/asihpi/asihpi.c b/sound/pci/asihpi/asihpi.c index b941d2541dda..eae62ebbd295 100644 --- a/sound/pci/asihpi/asihpi.c +++ b/sound/pci/asihpi/asihpi.c @@ -41,31 +41,10 @@ #include <sound/tlv.h> #include <sound/hwdep.h> - MODULE_LICENSE("GPL"); MODULE_AUTHOR("AudioScience inc. <support@audioscience.com>"); MODULE_DESCRIPTION("AudioScience ALSA ASI5000 ASI6000 ASI87xx ASI89xx"); -#if defined CONFIG_SND_DEBUG -/* copied from pcm_lib.c, hope later patch will make that version public -and this copy can be removed */ -static inline void -snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size) -{ - snprintf(buf, size, "pcmC%dD%d%c:%d", - substream->pcm->card->number, - substream->pcm->device, - substream->stream ? 'c' : 'p', - substream->number); -} -#else -static inline void -snd_pcm_debug_name(struct snd_pcm_substream *substream, char *buf, size_t size) -{ - *buf = 0; -} -#endif - #if defined CONFIG_SND_DEBUG_VERBOSE /** * snd_printddd - very verbose debug printk diff --git a/sound/pci/asihpi/hpioctl.c b/sound/pci/asihpi/hpioctl.c index 65fcf4770731..9683f84ecdc8 100644 --- a/sound/pci/asihpi/hpioctl.c +++ b/sound/pci/asihpi/hpioctl.c @@ -107,7 +107,6 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) union hpi_response_buffer_v1 *hr; u16 res_max_size; u32 uncopied_bytes; - struct hpi_adapter *pa = NULL; int err = 0; if (cmd != HPI_IOCTL_LINUX) @@ -182,8 +181,9 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) /* -1=no data 0=read from user mem, 1=write to user mem */ int wrflag = -1; u32 adapter = hm->h.adapter_index; + struct hpi_adapter *pa = &adapters[adapter]; - if ((adapter > HPI_MAX_ADAPTERS) || (!pa->type)) { + if ((adapter >= HPI_MAX_ADAPTERS) || (!pa->type)) { hpi_init_response(&hr->r0, HPI_OBJ_ADAPTER, HPI_ADAPTER_OPEN, HPI_ERROR_BAD_ADAPTER_NUMBER); @@ -197,9 +197,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) goto out; } - pa = &adapters[adapter]; - - if (mutex_lock_interruptible(&adapters[adapter].mutex)) { + if (mutex_lock_interruptible(&pa->mutex)) { err = -EINTR; goto out; } @@ -235,8 +233,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) "stream buffer size %d\n", size); - mutex_unlock(&adapters - [adapter].mutex); + mutex_unlock(&pa->mutex); err = -EINVAL; goto out; } @@ -277,7 +274,7 @@ long asihpi_hpi_ioctl(struct file *file, unsigned int cmd, unsigned long arg) uncopied_bytes, size); } - mutex_unlock(&adapters[adapter].mutex); + mutex_unlock(&pa->mutex); } /* on return response size must be set */ diff --git a/sound/pci/hda/Kconfig b/sound/pci/hda/Kconfig index 7489b4608551..bb7e102d6726 100644 --- a/sound/pci/hda/Kconfig +++ b/sound/pci/hda/Kconfig @@ -243,6 +243,7 @@ config SND_HDA_GENERIC config SND_HDA_POWER_SAVE bool "Aggressive power-saving on HD-audio" + depends on PM help Say Y here to enable more aggressive power-saving mode on HD-audio driver. The power-saving timeout can be configured diff --git a/sound/pci/hda/hda_codec.c b/sound/pci/hda/hda_codec.c index 9c27a3a4c4d5..3e7850c238c3 100644 --- a/sound/pci/hda/hda_codec.c +++ b/sound/pci/hda/hda_codec.c @@ -91,8 +91,10 @@ EXPORT_SYMBOL_HDA(snd_hda_delete_codec_preset); #ifdef CONFIG_SND_HDA_POWER_SAVE static void hda_power_work(struct work_struct *work); static void hda_keep_power_on(struct hda_codec *codec); +#define hda_codec_is_power_on(codec) ((codec)->power_on) #else static inline void hda_keep_power_on(struct hda_codec *codec) {} +#define hda_codec_is_power_on(codec) 1 #endif /** @@ -1101,7 +1103,7 @@ void snd_hda_shutup_pins(struct hda_codec *codec) } EXPORT_SYMBOL_HDA(snd_hda_shutup_pins); -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM /* Restore the pin controls cleared previously via snd_hda_shutup_pins() */ static void restore_shutup_pins(struct hda_codec *codec) { @@ -1499,7 +1501,7 @@ static void purify_inactive_streams(struct hda_codec *codec) } } -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM /* clean up all streams; called from suspend */ static void hda_cleanup_all_streams(struct hda_codec *codec) { @@ -1838,7 +1840,7 @@ int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid, } EXPORT_SYMBOL_HDA(snd_hda_codec_amp_stereo); -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM /** * snd_hda_codec_resume_amp - Resume all AMP commands from the cache * @codec: HD-audio codec @@ -1868,7 +1870,7 @@ void snd_hda_codec_resume_amp(struct hda_codec *codec) } } EXPORT_SYMBOL_HDA(snd_hda_codec_resume_amp); -#endif /* SND_HDA_NEEDS_RESUME */ +#endif /* CONFIG_PM */ static u32 get_amp_max_value(struct hda_codec *codec, hda_nid_t nid, int dir, unsigned int ofs) @@ -3082,7 +3084,7 @@ int snd_hda_create_spdif_in_ctls(struct hda_codec *codec, hda_nid_t nid) } EXPORT_SYMBOL_HDA(snd_hda_create_spdif_in_ctls); -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM /* * command cache */ @@ -3199,53 +3201,32 @@ void snd_hda_sequence_write_cache(struct hda_codec *codec, seq->param); } EXPORT_SYMBOL_HDA(snd_hda_sequence_write_cache); -#endif /* SND_HDA_NEEDS_RESUME */ +#endif /* CONFIG_PM */ -/* - * set power state of the codec - */ -static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, - unsigned int power_state) +void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state, + bool eapd_workaround) { - hda_nid_t nid; + hda_nid_t nid = codec->start_nid; int i; - /* this delay seems necessary to avoid click noise at power-down */ - if (power_state == AC_PWRST_D3) - msleep(100); - snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE, - power_state); - /* partial workaround for "azx_get_response timeout" */ - if (power_state == AC_PWRST_D0 && - (codec->vendor_id & 0xffff0000) == 0x14f10000) - msleep(10); - - nid = codec->start_nid; for (i = 0; i < codec->num_nodes; i++, nid++) { unsigned int wcaps = get_wcaps(codec, nid); - if (wcaps & AC_WCAP_POWER) { - unsigned int wid_type = get_wcaps_type(wcaps); - if (power_state == AC_PWRST_D3 && - wid_type == AC_WID_PIN) { - unsigned int pincap; - /* - * don't power down the widget if it controls - * eapd and EAPD_BTLENABLE is set. - */ - pincap = snd_hda_query_pin_caps(codec, nid); - if (pincap & AC_PINCAP_EAPD) { - int eapd = snd_hda_codec_read(codec, - nid, 0, + if (!(wcaps & AC_WCAP_POWER)) + continue; + /* don't power down the widget if it controls eapd and + * EAPD_BTLENABLE is set. + */ + if (eapd_workaround && power_state == AC_PWRST_D3 && + get_wcaps_type(wcaps) == AC_WID_PIN && + (snd_hda_query_pin_caps(codec, nid) & AC_PINCAP_EAPD)) { + int eapd = snd_hda_codec_read(codec, nid, 0, AC_VERB_GET_EAPD_BTLENABLE, 0); - eapd &= 0x02; - if (eapd) - continue; - } - } - snd_hda_codec_write(codec, nid, 0, - AC_VERB_SET_POWER_STATE, - power_state); + if (eapd & 0x02) + continue; } + snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_POWER_STATE, + power_state); } if (power_state == AC_PWRST_D0) { @@ -3262,6 +3243,26 @@ static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, } while (time_after_eq(end_time, jiffies)); } } +EXPORT_SYMBOL_HDA(snd_hda_codec_set_power_to_all); + +/* + * set power state of the codec + */ +static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state) +{ + if (codec->patch_ops.set_power_state) { + codec->patch_ops.set_power_state(codec, fg, power_state); + return; + } + + /* this delay seems necessary to avoid click noise at power-down */ + if (power_state == AC_PWRST_D3) + msleep(100); + snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE, + power_state); + snd_hda_codec_set_power_to_all(codec, fg, power_state, true); +} #ifdef CONFIG_SND_HDA_HWDEP /* execute additional init verbs */ @@ -3274,7 +3275,7 @@ static void hda_exec_init_verbs(struct hda_codec *codec) static inline void hda_exec_init_verbs(struct hda_codec *codec) {} #endif -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM /* * call suspend and power-down; used both from PM and power-save */ @@ -3315,7 +3316,7 @@ static void hda_call_codec_resume(struct hda_codec *codec) snd_hda_codec_resume_cache(codec); } } -#endif /* SND_HDA_NEEDS_RESUME */ +#endif /* CONFIG_PM */ /** @@ -4071,9 +4072,6 @@ int snd_hda_add_new_ctls(struct hda_codec *codec, EXPORT_SYMBOL_HDA(snd_hda_add_new_ctls); #ifdef CONFIG_SND_HDA_POWER_SAVE -static void hda_set_power_state(struct hda_codec *codec, hda_nid_t fg, - unsigned int power_state); - static void hda_power_work(struct work_struct *work) { struct hda_codec *codec = @@ -4376,11 +4374,8 @@ void snd_hda_bus_reboot_notify(struct hda_bus *bus) if (!bus) return; list_for_each_entry(codec, &bus->codec_list, list) { -#ifdef CONFIG_SND_HDA_POWER_SAVE - if (!codec->power_on) - continue; -#endif - if (codec->patch_ops.reboot_notify) + if (hda_codec_is_power_on(codec) && + codec->patch_ops.reboot_notify) codec->patch_ops.reboot_notify(codec); } } @@ -5079,11 +5074,10 @@ int snd_hda_suspend(struct hda_bus *bus) struct hda_codec *codec; list_for_each_entry(codec, &bus->codec_list, list) { -#ifdef CONFIG_SND_HDA_POWER_SAVE - if (!codec->power_on) - continue; -#endif - hda_call_codec_suspend(codec); + if (hda_codec_is_power_on(codec)) + hda_call_codec_suspend(codec); + if (codec->patch_ops.post_suspend) + codec->patch_ops.post_suspend(codec); } return 0; } @@ -5103,6 +5097,8 @@ int snd_hda_resume(struct hda_bus *bus) struct hda_codec *codec; list_for_each_entry(codec, &bus->codec_list, list) { + if (codec->patch_ops.pre_resume) + codec->patch_ops.pre_resume(codec); if (snd_hda_codec_needs_resume(codec)) hda_call_codec_resume(codec); } diff --git a/sound/pci/hda/hda_codec.h b/sound/pci/hda/hda_codec.h index f465e07a4879..755f2b0f9d8e 100644 --- a/sound/pci/hda/hda_codec.h +++ b/sound/pci/hda/hda_codec.h @@ -26,10 +26,6 @@ #include <sound/pcm.h> #include <sound/hwdep.h> -#if defined(CONFIG_PM) || defined(CONFIG_SND_HDA_POWER_SAVE) -#define SND_HDA_NEEDS_RESUME /* resume control code is required */ -#endif - /* * nodes */ @@ -704,8 +700,12 @@ struct hda_codec_ops { int (*init)(struct hda_codec *codec); void (*free)(struct hda_codec *codec); void (*unsol_event)(struct hda_codec *codec, unsigned int res); -#ifdef SND_HDA_NEEDS_RESUME + void (*set_power_state)(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state); +#ifdef CONFIG_PM int (*suspend)(struct hda_codec *codec, pm_message_t state); + int (*post_suspend)(struct hda_codec *codec); + int (*pre_resume)(struct hda_codec *codec); int (*resume)(struct hda_codec *codec); #endif #ifdef CONFIG_SND_HDA_POWER_SAVE @@ -927,7 +927,7 @@ void snd_hda_sequence_write(struct hda_codec *codec, int snd_hda_queue_unsol_event(struct hda_bus *bus, u32 res, u32 res_ex); /* cached write */ -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM int snd_hda_codec_write_cache(struct hda_codec *codec, hda_nid_t nid, int direct, unsigned int verb, unsigned int parm); void snd_hda_sequence_write_cache(struct hda_codec *codec, @@ -1008,6 +1008,9 @@ int snd_hda_is_supported_format(struct hda_codec *codec, hda_nid_t nid, */ void snd_hda_get_codec_name(struct hda_codec *codec, char *name, int namelen); void snd_hda_bus_reboot_notify(struct hda_bus *bus); +void snd_hda_codec_set_power_to_all(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state, + bool eapd_workaround); /* * power management diff --git a/sound/pci/hda/hda_local.h b/sound/pci/hda/hda_local.h index 88b277e97409..2e7ac31afa8d 100644 --- a/sound/pci/hda/hda_local.h +++ b/sound/pci/hda/hda_local.h @@ -131,7 +131,7 @@ int snd_hda_codec_amp_update(struct hda_codec *codec, hda_nid_t nid, int ch, int direction, int idx, int mask, int val); int snd_hda_codec_amp_stereo(struct hda_codec *codec, hda_nid_t nid, int dir, int idx, int mask, int val); -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM void snd_hda_codec_resume_amp(struct hda_codec *codec); #endif diff --git a/sound/pci/hda/patch_analog.c b/sound/pci/hda/patch_analog.c index 1362c8ba4d1f..8648917acffb 100644 --- a/sound/pci/hda/patch_analog.c +++ b/sound/pci/hda/patch_analog.c @@ -563,7 +563,7 @@ static void ad198x_free(struct hda_codec *codec) snd_hda_detach_beep_device(codec); } -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM static int ad198x_suspend(struct hda_codec *codec, pm_message_t state) { ad198x_shutup(codec); @@ -579,7 +579,7 @@ static const struct hda_codec_ops ad198x_patch_ops = { #ifdef CONFIG_SND_HDA_POWER_SAVE .check_power_status = ad198x_check_power_status, #endif -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM .suspend = ad198x_suspend, #endif .reboot_notify = ad198x_shutup, diff --git a/sound/pci/hda/patch_cirrus.c b/sound/pci/hda/patch_cirrus.c index 7f93739b1e33..47d6ffc9b5b5 100644 --- a/sound/pci/hda/patch_cirrus.c +++ b/sound/pci/hda/patch_cirrus.c @@ -25,6 +25,7 @@ #include <sound/core.h> #include "hda_codec.h" #include "hda_local.h" +#include <sound/tlv.h> /* */ @@ -61,9 +62,15 @@ struct cs_spec { unsigned int hp_detect:1; unsigned int mic_detect:1; + /* CS421x */ + unsigned int spdif_detect:1; + unsigned int sense_b:1; + hda_nid_t vendor_nid; + struct hda_input_mux input_mux; + unsigned int last_input; }; -/* available models */ +/* available models with CS420x */ enum { CS420X_MBP53, CS420X_MBP55, @@ -72,6 +79,12 @@ enum { CS420X_MODELS }; +/* CS421x boards */ +enum { + CS421X_CDB4210, + CS421X_MODELS +}; + /* Vendor-specific processing widget */ #define CS420X_VENDOR_NID 0x11 #define CS_DIG_OUT1_PIN_NID 0x10 @@ -111,21 +124,42 @@ enum { /* 0x0009 - 0x0014 -> 12 test regs */ /* 0x0015 - visibility reg */ +/* + * Cirrus Logic CS4210 + * + * 1 DAC => HP(sense) / Speakers, + * 1 ADC <= LineIn(sense) / MicIn / DMicIn, + * 1 SPDIF OUT => SPDIF Trasmitter(sense) +*/ +#define CS4210_DAC_NID 0x02 +#define CS4210_ADC_NID 0x03 +#define CS421X_VENDOR_NID 0x0B +#define CS421X_DMIC_PIN_NID 0x09 /* Port E */ +#define CS421X_SPDIF_PIN_NID 0x0A /* Port H */ + +#define CS421X_IDX_DEV_CFG 0x01 +#define CS421X_IDX_ADC_CFG 0x02 +#define CS421X_IDX_DAC_CFG 0x03 +#define CS421X_IDX_SPK_CTL 0x04 + +#define SPDIF_EVENT 0x04 static inline int cs_vendor_coef_get(struct hda_codec *codec, unsigned int idx) { - snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0, + struct cs_spec *spec = codec->spec; + snd_hda_codec_write(codec, spec->vendor_nid, 0, AC_VERB_SET_COEF_INDEX, idx); - return snd_hda_codec_read(codec, CS420X_VENDOR_NID, 0, + return snd_hda_codec_read(codec, spec->vendor_nid, 0, AC_VERB_GET_PROC_COEF, 0); } static inline void cs_vendor_coef_set(struct hda_codec *codec, unsigned int idx, unsigned int coef) { - snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0, + struct cs_spec *spec = codec->spec; + snd_hda_codec_write(codec, spec->vendor_nid, 0, AC_VERB_SET_COEF_INDEX, idx); - snd_hda_codec_write(codec, CS420X_VENDOR_NID, 0, + snd_hda_codec_write(codec, spec->vendor_nid, 0, AC_VERB_SET_PROC_COEF, coef); } @@ -347,15 +381,12 @@ static hda_nid_t get_adc(struct hda_codec *codec, hda_nid_t pin, nid = codec->start_nid; for (i = 0; i < codec->num_nodes; i++, nid++) { unsigned int type; - int idx; type = get_wcaps_type(get_wcaps(codec, nid)); if (type != AC_WID_AUD_IN) continue; - idx = snd_hda_get_conn_index(codec, nid, pin, 0); - if (idx >= 0) { - *idxp = idx; + *idxp = snd_hda_get_conn_index(codec, nid, pin, false); + if (*idxp >= 0) return nid; - } } return 0; } @@ -835,6 +866,8 @@ static int build_digital_input(struct hda_codec *codec) /* * auto-mute and auto-mic switching + * CS421x auto-output redirecting + * HP/SPK/SPDIF */ static void cs_automute(struct hda_codec *codec) @@ -842,9 +875,25 @@ static void cs_automute(struct hda_codec *codec) struct cs_spec *spec = codec->spec; struct auto_pin_cfg *cfg = &spec->autocfg; unsigned int hp_present; + unsigned int spdif_present; hda_nid_t nid; int i; + spdif_present = 0; + if (cfg->dig_outs) { + nid = cfg->dig_out_pins[0]; + if (is_jack_detectable(codec, nid)) { + /* + TODO: SPDIF output redirect when SENSE_B is enabled. + Shared (SENSE_A) jack (e.g HP/mini-TOSLINK) + assumed. + */ + if (snd_hda_jack_detect(codec, nid) + /* && spec->sense_b */) + spdif_present = 1; + } + } + hp_present = 0; for (i = 0; i < cfg->hp_outs; i++) { nid = cfg->hp_pins[i]; @@ -854,11 +903,19 @@ static void cs_automute(struct hda_codec *codec) if (hp_present) break; } + + /* mute speakers if spdif or hp jack is plugged in */ for (i = 0; i < cfg->speaker_outs; i++) { nid = cfg->speaker_pins[i]; snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, hp_present ? 0 : PIN_OUT); + /* detect on spdif is specific to CS421x */ + if (spec->vendor_nid == CS421X_VENDOR_NID) { + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, + spdif_present ? 0 : PIN_OUT); + } } if (spec->board_config == CS420X_MBP53 || spec->board_config == CS420X_MBP55 || @@ -867,21 +924,62 @@ static void cs_automute(struct hda_codec *codec) snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, gpio); } + + /* specific to CS421x */ + if (spec->vendor_nid == CS421X_VENDOR_NID) { + /* mute HPs if spdif jack (SENSE_B) is present */ + for (i = 0; i < cfg->hp_outs; i++) { + nid = cfg->hp_pins[i]; + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, + (spdif_present && spec->sense_b) ? 0 : PIN_HP); + } + + /* SPDIF TX on/off */ + if (cfg->dig_outs) { + nid = cfg->dig_out_pins[0]; + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, + spdif_present ? PIN_OUT : 0); + + } + /* Update board GPIOs if neccessary ... */ + } } +/* + * Auto-input redirect for CS421x + * Switch max 3 inputs of a single ADC (nid 3) +*/ + static void cs_automic(struct hda_codec *codec) { struct cs_spec *spec = codec->spec; struct auto_pin_cfg *cfg = &spec->autocfg; hda_nid_t nid; unsigned int present; - + nid = cfg->inputs[spec->automic_idx].pin; present = snd_hda_jack_detect(codec, nid); - if (present) - change_cur_input(codec, spec->automic_idx, 0); - else - change_cur_input(codec, !spec->automic_idx, 0); + + /* specific to CS421x, single ADC */ + if (spec->vendor_nid == CS421X_VENDOR_NID) { + if (present) { + spec->last_input = spec->cur_input; + spec->cur_input = spec->automic_idx; + } else { + spec->cur_input = spec->last_input; + } + + snd_hda_codec_write_cache(codec, spec->cur_adc, 0, + AC_VERB_SET_CONNECT_SEL, + spec->adc_idx[spec->cur_input]); + } else { + if (present) + change_cur_input(codec, spec->automic_idx, 0); + else + change_cur_input(codec, !spec->automic_idx, 0); + } } /* @@ -911,23 +1009,28 @@ static void init_output(struct hda_codec *codec) for (i = 0; i < cfg->line_outs; i++) snd_hda_codec_write(codec, cfg->line_out_pins[i], 0, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); + /* HP */ for (i = 0; i < cfg->hp_outs; i++) { hda_nid_t nid = cfg->hp_pins[i]; snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_HP); if (!cfg->speaker_outs) continue; - if (is_jack_detectable(codec, nid)) { + if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) { snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | HP_EVENT); spec->hp_detect = 1; } } + + /* Speaker */ for (i = 0; i < cfg->speaker_outs; i++) snd_hda_codec_write(codec, cfg->speaker_pins[i], 0, AC_VERB_SET_PIN_WIDGET_CONTROL, PIN_OUT); - if (spec->hp_detect) + + /* SPDIF is enabled on presence detect for CS421x */ + if (spec->hp_detect || spec->spdif_detect) cs_automute(codec); } @@ -961,19 +1064,31 @@ static void init_input(struct hda_codec *codec) AC_VERB_SET_UNSOLICITED_ENABLE, AC_USRSP_EN | MIC_EVENT); } - change_cur_input(codec, spec->cur_input, 1); - if (spec->mic_detect) - cs_automic(codec); - - coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */ - if (is_active_pin(codec, CS_DMIC2_PIN_NID)) - coef |= 0x0500; /* DMIC2 enable 2 channels, disable GPIO1 */ - if (is_active_pin(codec, CS_DMIC1_PIN_NID)) - coef |= 0x1800; /* DMIC1 enable 2 channels, disable GPIO0 - * No effect if SPDIF_OUT2 is selected in - * IDX_SPDIF_CTL. - */ - cs_vendor_coef_set(codec, IDX_ADC_CFG, coef); + /* specific to CS421x */ + if (spec->vendor_nid == CS421X_VENDOR_NID) { + if (spec->mic_detect) + cs_automic(codec); + else { + spec->cur_adc = spec->adc_nid[spec->cur_input]; + snd_hda_codec_write(codec, spec->cur_adc, 0, + AC_VERB_SET_CONNECT_SEL, + spec->adc_idx[spec->cur_input]); + } + } else { + change_cur_input(codec, spec->cur_input, 1); + if (spec->mic_detect) + cs_automic(codec); + + coef = 0x000a; /* ADC1/2 - Digital and Analog Soft Ramp */ + if (is_active_pin(codec, CS_DMIC2_PIN_NID)) + coef |= 0x0500; /* DMIC2 2 chan on, GPIO1 off */ + if (is_active_pin(codec, CS_DMIC1_PIN_NID)) + coef |= 0x1800; /* DMIC1 2 chan on, GPIO0 off + * No effect if SPDIF_OUT2 is + * selected in IDX_SPDIF_CTL. + */ + cs_vendor_coef_set(codec, IDX_ADC_CFG, coef); + } } static const struct hda_verb cs_coef_init_verbs[] = { @@ -1221,16 +1336,16 @@ static const struct cs_pincfg *cs_pincfgs[CS420X_MODELS] = { [CS420X_IMAC27] = imac27_pincfgs, }; -static void fix_pincfg(struct hda_codec *codec, int model) +static void fix_pincfg(struct hda_codec *codec, int model, + const struct cs_pincfg **pin_configs) { - const struct cs_pincfg *cfg = cs_pincfgs[model]; + const struct cs_pincfg *cfg = pin_configs[model]; if (!cfg) return; for (; cfg->nid; cfg++) snd_hda_codec_set_pincfg(codec, cfg->nid, cfg->val); } - static int patch_cs420x(struct hda_codec *codec) { struct cs_spec *spec; @@ -1241,11 +1356,13 @@ static int patch_cs420x(struct hda_codec *codec) return -ENOMEM; codec->spec = spec; + spec->vendor_nid = CS420X_VENDOR_NID; + spec->board_config = snd_hda_check_board_config(codec, CS420X_MODELS, cs420x_models, cs420x_cfg_tbl); if (spec->board_config >= 0) - fix_pincfg(codec, spec->board_config); + fix_pincfg(codec, spec->board_config, cs_pincfgs); switch (spec->board_config) { case CS420X_IMAC27: @@ -1272,6 +1389,562 @@ static int patch_cs420x(struct hda_codec *codec) return err; } +/* + * Cirrus Logic CS4210 + * + * 1 DAC => HP(sense) / Speakers, + * 1 ADC <= LineIn(sense) / MicIn / DMicIn, + * 1 SPDIF OUT => SPDIF Trasmitter(sense) +*/ + +/* CS4210 board names */ +static const char *cs421x_models[CS421X_MODELS] = { + [CS421X_CDB4210] = "cdb4210", +}; + +static const struct snd_pci_quirk cs421x_cfg_tbl[] = { + /* Test Intel board + CDB2410 */ + SND_PCI_QUIRK(0x8086, 0x5001, "DP45SG/CDB4210", CS421X_CDB4210), + {} /* terminator */ +}; + +/* CS4210 board pinconfigs */ +/* Default CS4210 (CDB4210)*/ +static const struct cs_pincfg cdb4210_pincfgs[] = { + { 0x05, 0x0321401f }, + { 0x06, 0x90170010 }, + { 0x07, 0x03813031 }, + { 0x08, 0xb7a70037 }, + { 0x09, 0xb7a6003e }, + { 0x0a, 0x034510f0 }, + {} /* terminator */ +}; + +static const struct cs_pincfg *cs421x_pincfgs[CS421X_MODELS] = { + [CS421X_CDB4210] = cdb4210_pincfgs, +}; + +static const struct hda_verb cs421x_coef_init_verbs[] = { + {0x0B, AC_VERB_SET_PROC_STATE, 1}, + {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DEV_CFG}, + /* + Disable Coefficient Index Auto-Increment(DAI)=1, + PDREF=0 + */ + {0x0B, AC_VERB_SET_PROC_COEF, 0x0001 }, + + {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_ADC_CFG}, + /* ADC SZCMode = Digital Soft Ramp */ + {0x0B, AC_VERB_SET_PROC_COEF, 0x0002 }, + + {0x0B, AC_VERB_SET_COEF_INDEX, CS421X_IDX_DAC_CFG}, + {0x0B, AC_VERB_SET_PROC_COEF, + (0x0002 /* DAC SZCMode = Digital Soft Ramp */ + | 0x0004 /* Mute DAC on FIFO error */ + | 0x0008 /* Enable DAC High Pass Filter */ + )}, + {} /* terminator */ +}; + +/* Errata: CS4210 rev A1 Silicon + * + * http://www.cirrus.com/en/pubs/errata/ + * + * Description: + * 1. Performance degredation is present in the ADC. + * 2. Speaker output is not completely muted upon HP detect. + * 3. Noise is present when clipping occurs on the amplified + * speaker outputs. + * + * Workaround: + * The following verb sequence written to the registers during + * initialization will correct the issues listed above. + */ + +static const struct hda_verb cs421x_coef_init_verbs_A1_silicon_fixes[] = { + {0x0B, AC_VERB_SET_PROC_STATE, 0x01}, /* VPW: processing on */ + + {0x0B, AC_VERB_SET_COEF_INDEX, 0x0006}, + {0x0B, AC_VERB_SET_PROC_COEF, 0x9999}, /* Test mode: on */ + + {0x0B, AC_VERB_SET_COEF_INDEX, 0x000A}, + {0x0B, AC_VERB_SET_PROC_COEF, 0x14CB}, /* Chop double */ + + {0x0B, AC_VERB_SET_COEF_INDEX, 0x0011}, + {0x0B, AC_VERB_SET_PROC_COEF, 0xA2D0}, /* Increase ADC current */ + + {0x0B, AC_VERB_SET_COEF_INDEX, 0x001A}, + {0x0B, AC_VERB_SET_PROC_COEF, 0x02A9}, /* Mute speaker */ + + {0x0B, AC_VERB_SET_COEF_INDEX, 0x001B}, + {0x0B, AC_VERB_SET_PROC_COEF, 0X1006}, /* Remove noise */ + + {} /* terminator */ +}; + +/* Speaker Amp Gain is controlled by the vendor widget's coef 4 */ +static const DECLARE_TLV_DB_SCALE(cs421x_speaker_boost_db_scale, 900, 300, 0); + +static int cs421x_boost_vol_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + uinfo->type = SNDRV_CTL_ELEM_TYPE_INTEGER; + uinfo->count = 1; + uinfo->value.integer.min = 0; + uinfo->value.integer.max = 3; + return 0; +} + +static int cs421x_boost_vol_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + + ucontrol->value.integer.value[0] = + cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL) & 0x0003; + return 0; +} + +static int cs421x_boost_vol_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + + unsigned int vol = ucontrol->value.integer.value[0]; + unsigned int coef = + cs_vendor_coef_get(codec, CS421X_IDX_SPK_CTL); + unsigned int original_coef = coef; + + coef &= ~0x0003; + coef |= (vol & 0x0003); + if (original_coef == coef) + return 0; + else { + cs_vendor_coef_set(codec, CS421X_IDX_SPK_CTL, coef); + return 1; + } +} + +static const struct snd_kcontrol_new cs421x_speaker_bost_ctl = { + + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .access = (SNDRV_CTL_ELEM_ACCESS_READWRITE | + SNDRV_CTL_ELEM_ACCESS_TLV_READ), + .name = "Speaker Boost Playback Volume", + .info = cs421x_boost_vol_info, + .get = cs421x_boost_vol_get, + .put = cs421x_boost_vol_put, + .tlv = { .p = cs421x_speaker_boost_db_scale }, +}; + +static void cs421x_pinmux_init(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + unsigned int def_conf, coef; + + /* GPIO, DMIC_SCL, DMIC_SDA and SENSE_B are multiplexed */ + coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG); + + if (spec->gpio_mask) + coef |= 0x0008; /* B1,B2 are GPIOs */ + else + coef &= ~0x0008; + + if (spec->sense_b) + coef |= 0x0010; /* B2 is SENSE_B, not inverted */ + else + coef &= ~0x0010; + + cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef); + + if ((spec->gpio_mask || spec->sense_b) && + is_active_pin(codec, CS421X_DMIC_PIN_NID)) { + + /* + GPIO or SENSE_B forced - disconnect the DMIC pin. + */ + def_conf = snd_hda_codec_get_pincfg(codec, CS421X_DMIC_PIN_NID); + def_conf &= ~AC_DEFCFG_PORT_CONN; + def_conf |= (AC_JACK_PORT_NONE << AC_DEFCFG_PORT_CONN_SHIFT); + snd_hda_codec_set_pincfg(codec, CS421X_DMIC_PIN_NID, def_conf); + } +} + +static void init_cs421x_digital(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + int i; + + + for (i = 0; i < cfg->dig_outs; i++) { + hda_nid_t nid = cfg->dig_out_pins[i]; + if (!cfg->speaker_outs) + continue; + if (get_wcaps(codec, nid) & AC_WCAP_UNSOL_CAP) { + + snd_hda_codec_write(codec, nid, 0, + AC_VERB_SET_UNSOLICITED_ENABLE, + AC_USRSP_EN | SPDIF_EVENT); + spec->spdif_detect = 1; + } + } +} + +static int cs421x_init(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + + snd_hda_sequence_write(codec, cs421x_coef_init_verbs); + snd_hda_sequence_write(codec, cs421x_coef_init_verbs_A1_silicon_fixes); + + cs421x_pinmux_init(codec); + + if (spec->gpio_mask) { + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK, + spec->gpio_mask); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION, + spec->gpio_dir); + snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA, + spec->gpio_data); + } + + init_output(codec); + init_input(codec); + init_cs421x_digital(codec); + + return 0; +} + +/* + * CS4210 Input MUX (1 ADC) + */ +static int cs421x_mux_enum_info(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_info *uinfo) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + struct cs_spec *spec = codec->spec; + + return snd_hda_input_mux_info(&spec->input_mux, uinfo); +} + +static int cs421x_mux_enum_get(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + struct cs_spec *spec = codec->spec; + + ucontrol->value.enumerated.item[0] = spec->cur_input; + return 0; +} + +static int cs421x_mux_enum_put(struct snd_kcontrol *kcontrol, + struct snd_ctl_elem_value *ucontrol) +{ + struct hda_codec *codec = snd_kcontrol_chip(kcontrol); + struct cs_spec *spec = codec->spec; + + return snd_hda_input_mux_put(codec, &spec->input_mux, ucontrol, + spec->adc_nid[0], &spec->cur_input); + +} + +static struct snd_kcontrol_new cs421x_capture_source = { + + .iface = SNDRV_CTL_ELEM_IFACE_MIXER, + .name = "Capture Source", + .access = SNDRV_CTL_ELEM_ACCESS_READWRITE, + .info = cs421x_mux_enum_info, + .get = cs421x_mux_enum_get, + .put = cs421x_mux_enum_put, +}; + +static int cs421x_add_input_volume_control(struct hda_codec *codec, int item) +{ + struct cs_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + const struct hda_input_mux *imux = &spec->input_mux; + hda_nid_t pin = cfg->inputs[item].pin; + struct snd_kcontrol *kctl; + u32 caps; + + if (!(get_wcaps(codec, pin) & AC_WCAP_IN_AMP)) + return 0; + + caps = query_amp_caps(codec, pin, HDA_INPUT); + caps = (caps & AC_AMPCAP_NUM_STEPS) >> AC_AMPCAP_NUM_STEPS_SHIFT; + if (caps <= 1) + return 0; + + return add_volume(codec, imux->items[item].label, 0, + HDA_COMPOSE_AMP_VAL(pin, 3, 0, HDA_INPUT), 1, &kctl); +} + +/* add a (input-boost) volume control to the given input pin */ +static int build_cs421x_input(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + struct hda_input_mux *imux = &spec->input_mux; + int i, err, type_idx; + const char *label; + + if (!spec->num_inputs) + return 0; + + /* make bind-capture */ + spec->capture_bind[0] = make_bind_capture(codec, &snd_hda_bind_sw); + spec->capture_bind[1] = make_bind_capture(codec, &snd_hda_bind_vol); + for (i = 0; i < 2; i++) { + struct snd_kcontrol *kctl; + int n; + if (!spec->capture_bind[i]) + return -ENOMEM; + kctl = snd_ctl_new1(&cs_capture_ctls[i], codec); + if (!kctl) + return -ENOMEM; + kctl->private_value = (long)spec->capture_bind[i]; + err = snd_hda_ctl_add(codec, 0, kctl); + if (err < 0) + return err; + for (n = 0; n < AUTO_PIN_LAST; n++) { + if (!spec->adc_nid[n]) + continue; + err = snd_hda_add_nid(codec, kctl, 0, spec->adc_nid[n]); + if (err < 0) + return err; + } + } + + /* Add Input MUX Items + Capture Volume/Switch */ + for (i = 0; i < spec->num_inputs; i++) { + label = hda_get_autocfg_input_label(codec, cfg, i); + snd_hda_add_imux_item(imux, label, spec->adc_idx[i], &type_idx); + + err = cs421x_add_input_volume_control(codec, i); + if (err < 0) + return err; + } + + /* + Add 'Capture Source' Switch if + * 2 inputs and no mic detec + * 3 inputs + */ + if ((spec->num_inputs == 2 && !spec->mic_detect) || + (spec->num_inputs == 3)) { + + err = snd_hda_ctl_add(codec, spec->adc_nid[0], + snd_ctl_new1(&cs421x_capture_source, codec)); + if (err < 0) + return err; + } + + return 0; +} + +/* Single DAC (Mute/Gain) */ +static int build_cs421x_output(struct hda_codec *codec) +{ + hda_nid_t dac = CS4210_DAC_NID; + struct cs_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + struct snd_kcontrol *kctl; + int err; + char *name = "HP/Speakers"; + + fix_volume_caps(codec, dac); + if (!spec->vmaster_sw) { + err = add_vmaster(codec, dac); + if (err < 0) + return err; + } + + err = add_mute(codec, name, 0, + HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl); + if (err < 0) + return err; + err = snd_ctl_add_slave(spec->vmaster_sw, kctl); + if (err < 0) + return err; + + err = add_volume(codec, name, 0, + HDA_COMPOSE_AMP_VAL(dac, 3, 0, HDA_OUTPUT), 0, &kctl); + if (err < 0) + return err; + err = snd_ctl_add_slave(spec->vmaster_vol, kctl); + if (err < 0) + return err; + + if (cfg->speaker_outs) { + err = snd_hda_ctl_add(codec, 0, + snd_ctl_new1(&cs421x_speaker_bost_ctl, codec)); + if (err < 0) + return err; + } + return err; +} + +static int cs421x_build_controls(struct hda_codec *codec) +{ + int err; + + err = build_cs421x_output(codec); + if (err < 0) + return err; + err = build_cs421x_input(codec); + if (err < 0) + return err; + err = build_digital_output(codec); + if (err < 0) + return err; + return cs421x_init(codec); +} + +static void cs421x_unsol_event(struct hda_codec *codec, unsigned int res) +{ + switch ((res >> 26) & 0x3f) { + case HP_EVENT: + case SPDIF_EVENT: + cs_automute(codec); + break; + + case MIC_EVENT: + cs_automic(codec); + break; + } +} + +static int parse_cs421x_input(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + struct auto_pin_cfg *cfg = &spec->autocfg; + int i; + + for (i = 0; i < cfg->num_inputs; i++) { + hda_nid_t pin = cfg->inputs[i].pin; + spec->adc_nid[i] = get_adc(codec, pin, &spec->adc_idx[i]); + spec->cur_input = spec->last_input = i; + spec->num_inputs++; + + /* check whether the automatic mic switch is available */ + if (is_ext_mic(codec, i) && cfg->num_inputs >= 2) { + spec->mic_detect = 1; + spec->automic_idx = i; + } + } + return 0; +} + +static int cs421x_parse_auto_config(struct hda_codec *codec) +{ + struct cs_spec *spec = codec->spec; + int err; + + err = snd_hda_parse_pin_def_config(codec, &spec->autocfg, NULL); + if (err < 0) + return err; + err = parse_output(codec); + if (err < 0) + return err; + err = parse_cs421x_input(codec); + if (err < 0) + return err; + err = parse_digital_output(codec); + if (err < 0) + return err; + return 0; +} + +#ifdef CONFIG_PM +/* + Manage PDREF, when transitioning to D3hot + (DAC,ADC) -> D3, PDREF=1, AFG->D3 +*/ +static int cs421x_suspend(struct hda_codec *codec, pm_message_t state) +{ + unsigned int coef; + + snd_hda_shutup_pins(codec); + + snd_hda_codec_write(codec, CS4210_DAC_NID, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + snd_hda_codec_write(codec, CS4210_ADC_NID, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + + coef = cs_vendor_coef_get(codec, CS421X_IDX_DEV_CFG); + coef |= 0x0004; /* PDREF */ + cs_vendor_coef_set(codec, CS421X_IDX_DEV_CFG, coef); + + return 0; +} +#endif + +static struct hda_codec_ops cs4210_patch_ops = { + .build_controls = cs421x_build_controls, + .build_pcms = cs_build_pcms, + .init = cs421x_init, + .free = cs_free, + .unsol_event = cs421x_unsol_event, +#ifdef CONFIG_PM + .suspend = cs421x_suspend, +#endif +}; + +static int patch_cs421x(struct hda_codec *codec) +{ + struct cs_spec *spec; + int err; + + spec = kzalloc(sizeof(*spec), GFP_KERNEL); + if (!spec) + return -ENOMEM; + codec->spec = spec; + + spec->vendor_nid = CS421X_VENDOR_NID; + + spec->board_config = + snd_hda_check_board_config(codec, CS421X_MODELS, + cs421x_models, cs421x_cfg_tbl); + if (spec->board_config >= 0) + fix_pincfg(codec, spec->board_config, cs421x_pincfgs); + /* + Setup GPIO/SENSE for each board (if used) + */ + switch (spec->board_config) { + case CS421X_CDB4210: + snd_printd("CS4210 board: %s\n", + cs421x_models[spec->board_config]); +/* spec->gpio_mask = 3; + spec->gpio_dir = 3; + spec->gpio_data = 3; +*/ + spec->sense_b = 1; + + break; + } + + /* + Update the GPIO/DMIC/SENSE_B pinmux before the configuration + is auto-parsed. If GPIO or SENSE_B is forced, DMIC input + is disabled. + */ + cs421x_pinmux_init(codec); + + err = cs421x_parse_auto_config(codec); + if (err < 0) + goto error; + + codec->patch_ops = cs4210_patch_ops; + + return 0; + + error: + kfree(codec->spec); + codec->spec = NULL; + return err; +} + /* * patch entries @@ -1279,11 +1952,13 @@ static int patch_cs420x(struct hda_codec *codec) static const struct hda_codec_preset snd_hda_preset_cirrus[] = { { .id = 0x10134206, .name = "CS4206", .patch = patch_cs420x }, { .id = 0x10134207, .name = "CS4207", .patch = patch_cs420x }, + { .id = 0x10134210, .name = "CS4210", .patch = patch_cs421x }, {} /* terminator */ }; MODULE_ALIAS("snd-hda-codec-id:10134206"); MODULE_ALIAS("snd-hda-codec-id:10134207"); +MODULE_ALIAS("snd-hda-codec-id:10134210"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Cirrus Logic HD-audio codec"); diff --git a/sound/pci/hda/patch_conexant.c b/sound/pci/hda/patch_conexant.c index 884f67b8f4e0..502fc9499453 100644 --- a/sound/pci/hda/patch_conexant.c +++ b/sound/pci/hda/patch_conexant.c @@ -446,6 +446,19 @@ static int conexant_init_jacks(struct hda_codec *codec) return 0; } +static void conexant_set_power(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state) +{ + if (power_state == AC_PWRST_D3) + msleep(100); + snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE, + power_state); + /* partial workaround for "azx_get_response timeout" */ + if (power_state == AC_PWRST_D0) + msleep(10); + snd_hda_codec_set_power_to_all(codec, fg, power_state, true); +} + static int conexant_init(struct hda_codec *codec) { struct conexant_spec *spec = codec->spec; @@ -588,6 +601,7 @@ static const struct hda_codec_ops conexant_patch_ops = { .build_pcms = conexant_build_pcms, .init = conexant_init, .free = conexant_free, + .set_power_state = conexant_set_power, #ifdef CONFIG_SND_HDA_POWER_SAVE .suspend = conexant_suspend, #endif diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 52ce07534e5b..e125c60fe352 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -895,13 +895,15 @@ static void alc_init_auto_hp(struct hda_codec *codec) if (present == 3) spec->automute_hp_lo = 1; /* both HP and LO automute */ - if (!cfg->speaker_pins[0]) { + if (!cfg->speaker_pins[0] && + cfg->line_out_type == AUTO_PIN_SPEAKER_OUT) { memcpy(cfg->speaker_pins, cfg->line_out_pins, sizeof(cfg->speaker_pins)); cfg->speaker_outs = cfg->line_outs; } - if (!cfg->hp_pins[0]) { + if (!cfg->hp_pins[0] && + cfg->line_out_type == AUTO_PIN_HP_OUT) { memcpy(cfg->hp_pins, cfg->line_out_pins, sizeof(cfg->hp_pins)); cfg->hp_outs = cfg->line_outs; @@ -920,6 +922,7 @@ static void alc_init_auto_hp(struct hda_codec *codec) spec->automute_mode = ALC_AUTOMUTE_PIN; } if (spec->automute && cfg->line_out_pins[0] && + cfg->speaker_pins[0] && cfg->line_out_pins[0] != cfg->hp_pins[0] && cfg->line_out_pins[0] != cfg->speaker_pins[0]) { for (i = 0; i < cfg->line_outs; i++) { @@ -1911,7 +1914,7 @@ static int alc_build_controls(struct hda_codec *codec) return err; } } - if (spec->cap_mixer) { + if (spec->cap_mixer && spec->adc_nids) { const char *kname = kctl ? kctl->id.name : NULL; for (knew = spec->cap_mixer; knew->name; knew++) { if (kname && strcmp(knew->name, kname) == 0) @@ -2386,7 +2389,7 @@ static int alc_suspend(struct hda_codec *codec, pm_message_t state) } #endif -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM static int alc_resume(struct hda_codec *codec) { msleep(150); /* to avoid pop noise */ @@ -2406,7 +2409,7 @@ static const struct hda_codec_ops alc_patch_ops = { .init = alc_init, .free = alc_free, .unsol_event = alc_unsol_event, -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM .resume = alc_resume, #endif #ifdef CONFIG_SND_HDA_POWER_SAVE @@ -2801,7 +2804,8 @@ static int alc_auto_fill_dac_nids(struct hda_codec *codec) int i; again: - spec->multiout.num_dacs = 0; + /* set num_dacs once to full for alc_auto_look_for_dac() */ + spec->multiout.num_dacs = cfg->line_outs; spec->multiout.hp_nid = 0; spec->multiout.extra_out_nid[0] = 0; memset(spec->private_dac_nids, 0, sizeof(spec->private_dac_nids)); @@ -2834,6 +2838,8 @@ static int alc_auto_fill_dac_nids(struct hda_codec *codec) } } + /* re-count num_dacs and squash invalid entries */ + spec->multiout.num_dacs = 0; for (i = 0; i < cfg->line_outs; i++) { if (spec->private_dac_nids[i]) spec->multiout.num_dacs++; @@ -3674,7 +3680,7 @@ static int patch_alc880(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc880_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -3801,7 +3807,7 @@ static int patch_alc260(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc260_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -3980,7 +3986,7 @@ static int patch_alc882(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc882_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -4134,7 +4140,7 @@ static int patch_alc262(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc262_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -4290,7 +4296,7 @@ static int patch_alc268(struct hda_codec *codec) (0 << AC_AMPCAP_MUTE_SHIFT)); } - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -4410,7 +4416,7 @@ static void alc269_shutup(struct hda_codec *codec) } } -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM static int alc269_resume(struct hda_codec *codec) { if ((alc_read_coef_idx(codec, 0) & 0x00ff) == 0x018) { @@ -4433,7 +4439,7 @@ static int alc269_resume(struct hda_codec *codec) hda_call_check_power_status(codec, 0x01); return 0; } -#endif /* SND_HDA_NEEDS_RESUME */ +#endif /* CONFIG_PM */ static void alc269_fixup_hweq(struct hda_codec *codec, const struct alc_fixup *fix, int action) @@ -4702,7 +4708,7 @@ static int patch_alc269(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc269_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -4725,7 +4731,7 @@ static int patch_alc269(struct hda_codec *codec) spec->vmaster_nid = 0x02; codec->patch_ops = alc_patch_ops; -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM codec->patch_ops.resume = alc269_resume; #endif if (board_config == ALC_MODEL_AUTO) @@ -4840,7 +4846,7 @@ static int patch_alc861(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc861_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -4981,7 +4987,7 @@ static int patch_alc861vd(struct hda_codec *codec) add_verb(spec, alc660vd_eapd_verbs); } - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -5197,7 +5203,7 @@ static int patch_alc662(struct hda_codec *codec) if (board_config != ALC_MODEL_AUTO) setup_preset(codec, &alc662_presets[board_config]); - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); @@ -5333,7 +5339,7 @@ static int patch_alc680(struct hda_codec *codec) #endif } - if (!spec->no_analog && !spec->adc_nids && spec->input_mux) { + if (!spec->no_analog && !spec->adc_nids) { alc_auto_fill_adc_caps(codec); alc_rebuild_imux_for_auto_mic(codec); alc_remove_invalid_adc_nids(codec); diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c index 56425a53cf1b..aa376b59c006 100644 --- a/sound/pci/hda/patch_sigmatel.c +++ b/sound/pci/hda/patch_sigmatel.c @@ -95,6 +95,7 @@ enum { STAC_92HD83XXX_PWR_REF, STAC_DELL_S14, STAC_92HD83XXX_HP, + STAC_92HD83XXX_HP_cNB11_INTQUAD, STAC_HP_DV7_4000, STAC_92HD83XXX_MODELS }; @@ -212,6 +213,7 @@ struct sigmatel_spec { unsigned int gpio_mute; unsigned int gpio_led; unsigned int gpio_led_polarity; + unsigned int vref_led; /* stream */ unsigned int stream_delay; @@ -671,6 +673,30 @@ static int stac92xx_smux_enum_put(struct snd_kcontrol *kcontrol, return 0; } +static int stac_vrefout_set(struct hda_codec *codec, + hda_nid_t nid, unsigned int new_vref) +{ + int error, pinctl; + + snd_printdd("%s, nid %x ctl %x\n", __func__, nid, new_vref); + pinctl = snd_hda_codec_read(codec, nid, 0, + AC_VERB_GET_PIN_WIDGET_CONTROL, 0); + + if (pinctl < 0) + return pinctl; + + pinctl &= 0xff; + pinctl &= ~AC_PINCTL_VREFEN; + pinctl |= (new_vref & AC_PINCTL_VREFEN); + + error = snd_hda_codec_write_cache(codec, nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, pinctl); + if (error < 0) + return error; + + return 1; +} + static unsigned int stac92xx_vref_set(struct hda_codec *codec, hda_nid_t nid, unsigned int new_vref) { @@ -1636,10 +1662,17 @@ static const unsigned int hp_dv7_4000_pin_configs[10] = { 0x40f000f0, 0x40f000f0, }; +static const unsigned int hp_cNB11_intquad_pin_configs[10] = { + 0x40f000f0, 0x0221101f, 0x02a11020, 0x92170110, + 0x40f000f0, 0x92170110, 0x40f000f0, 0xd5a30130, + 0x40f000f0, 0x40f000f0, +}; + static const unsigned int *stac92hd83xxx_brd_tbl[STAC_92HD83XXX_MODELS] = { [STAC_92HD83XXX_REF] = ref92hd83xxx_pin_configs, [STAC_92HD83XXX_PWR_REF] = ref92hd83xxx_pin_configs, [STAC_DELL_S14] = dell_s14_pin_configs, + [STAC_92HD83XXX_HP_cNB11_INTQUAD] = hp_cNB11_intquad_pin_configs, [STAC_HP_DV7_4000] = hp_dv7_4000_pin_configs, }; @@ -1649,6 +1682,7 @@ static const char * const stac92hd83xxx_models[STAC_92HD83XXX_MODELS] = { [STAC_92HD83XXX_PWR_REF] = "mic-ref", [STAC_DELL_S14] = "dell-s14", [STAC_92HD83XXX_HP] = "hp", + [STAC_92HD83XXX_HP_cNB11_INTQUAD] = "hp_cNB11_intquad", [STAC_HP_DV7_4000] = "hp-dv7-4000", }; @@ -1661,7 +1695,47 @@ static const struct snd_pci_quirk stac92hd83xxx_cfg_tbl[] = { SND_PCI_QUIRK(PCI_VENDOR_ID_DELL, 0x02ba, "unknown Dell", STAC_DELL_S14), SND_PCI_QUIRK_MASK(PCI_VENDOR_ID_HP, 0xff00, 0x3600, - "HP", STAC_92HD83XXX_HP), + "HP", STAC_92HD83XXX_HP), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1656, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1657, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1658, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x1659, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165A, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x165B, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3388, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3389, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355B, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355C, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355D, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355E, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x355F, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3560, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358B, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358C, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x358D, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3591, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3592, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), + SND_PCI_QUIRK(PCI_VENDOR_ID_HP, 0x3593, + "HP", STAC_92HD83XXX_HP_cNB11_INTQUAD), {} /* terminator */ }; @@ -4020,6 +4094,8 @@ static void stac_gpio_set(struct hda_codec *codec, unsigned int mask, { unsigned int gpiostate, gpiomask, gpiodir; + snd_printdd("%s msk %x dir %x gpio %x\n", __func__, mask, dir_mask, data); + gpiostate = snd_hda_codec_read(codec, codec->afg, 0, AC_VERB_GET_GPIO_DATA, 0); gpiostate = (gpiostate & ~dir_mask) | (data & dir_mask); @@ -4209,10 +4285,12 @@ static void stac_store_hints(struct hda_codec *codec) spec->eapd_switch = val; get_int_hint(codec, "gpio_led_polarity", &spec->gpio_led_polarity); if (get_int_hint(codec, "gpio_led", &spec->gpio_led)) { - spec->gpio_mask |= spec->gpio_led; - spec->gpio_dir |= spec->gpio_led; - if (spec->gpio_led_polarity) - spec->gpio_data |= spec->gpio_led; + if (spec->gpio_led <= 8) { + spec->gpio_mask |= spec->gpio_led; + spec->gpio_dir |= spec->gpio_led; + if (spec->gpio_led_polarity) + spec->gpio_data |= spec->gpio_led; + } } } @@ -4382,11 +4460,26 @@ static void stac92xx_free_kctls(struct hda_codec *codec) snd_array_free(&spec->kctls); } +static void stac92xx_shutup_pins(struct hda_codec *codec) +{ + unsigned int i, def_conf; + + if (codec->bus->shutdown) + return; + for (i = 0; i < codec->init_pins.used; i++) { + struct hda_pincfg *pin = snd_array_elem(&codec->init_pins, i); + def_conf = snd_hda_codec_get_pincfg(codec, pin->nid); + if (get_defcfg_connect(def_conf) != AC_JACK_PORT_NONE) + snd_hda_codec_write(codec, pin->nid, 0, + AC_VERB_SET_PIN_WIDGET_CONTROL, 0); + } +} + static void stac92xx_shutup(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - snd_hda_shutup_pins(codec); + stac92xx_shutup_pins(codec); if (spec->eapd_mask) stac_gpio_set(codec, spec->gpio_mask, @@ -4784,10 +4877,11 @@ static int find_mute_led_gpio(struct hda_codec *codec, int default_polarity) if ((codec->subsystem_id >> 16) == PCI_VENDOR_ID_HP) { while ((dev = dmi_find_device(DMI_DEV_TYPE_OEM_STRING, NULL, dev))) { - if (sscanf(dev->name, "HP_Mute_LED_%d_%d", + if (sscanf(dev->name, "HP_Mute_LED_%d_%x", &spec->gpio_led_polarity, &spec->gpio_led) == 2) { - spec->gpio_led = 1 << spec->gpio_led; + if (spec->gpio_led < 4) + spec->gpio_led = 1 << spec->gpio_led; return 1; } if (sscanf(dev->name, "HP_Mute_LED_%d", @@ -4885,7 +4979,7 @@ static void stac927x_proc_hook(struct snd_info_buffer *buffer, #define stac927x_proc_hook NULL #endif -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM static int stac92xx_resume(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; @@ -4901,29 +4995,81 @@ static int stac92xx_resume(struct hda_codec *codec) stac_issue_unsol_event(codec, spec->autocfg.line_out_pins[0]); } + return 0; +} + +static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state) +{ + stac92xx_shutup(codec); + return 0; +} + +#ifdef CONFIG_SND_HDA_POWER_SAVE +static int stac92xx_pre_resume(struct hda_codec *codec) +{ + struct sigmatel_spec *spec = codec->spec; + /* sync mute LED */ - if (spec->gpio_led) - hda_call_check_power_status(codec, 0x01); + if (spec->gpio_led) { + if (spec->gpio_led <= 8) { + stac_gpio_set(codec, spec->gpio_mask, + spec->gpio_dir, spec->gpio_data); + } else { + stac_vrefout_set(codec, + spec->gpio_led, spec->vref_led); + } + } return 0; } +static int stac92xx_post_suspend(struct hda_codec *codec) +{ + struct sigmatel_spec *spec = codec->spec; + if (spec->gpio_led > 8) { + /* with vref-out pin used for mute led control + * codec AFG is prevented from D3 state, but on + * system suspend it can (and should) be used + */ + snd_hda_codec_read(codec, codec->afg, 0, + AC_VERB_SET_POWER_STATE, AC_PWRST_D3); + } + return 0; +} + +static void stac92xx_set_power_state(struct hda_codec *codec, hda_nid_t fg, + unsigned int power_state) +{ + unsigned int afg_power_state = power_state; + struct sigmatel_spec *spec = codec->spec; + + if (power_state == AC_PWRST_D3) { + if (spec->gpio_led > 8) { + /* with vref-out pin used for mute led control + * codec AFG is prevented from D3 state + */ + afg_power_state = AC_PWRST_D1; + } + /* this delay seems necessary to avoid click noise at power-down */ + msleep(100); + } + snd_hda_codec_read(codec, fg, 0, AC_VERB_SET_POWER_STATE, + afg_power_state); + snd_hda_codec_set_power_to_all(codec, fg, power_state, true); +} + /* - * using power check for controlling mute led of HP notebooks - * check for mute state only on Speakers (nid = 0x10) - * - * For this feature CONFIG_SND_HDA_POWER_SAVE is needed, otherwise - * the LED is NOT working properly ! - * - * Changed name to reflect that it now works for any designated - * model, not just HP HDX. + * For this feature CONFIG_SND_HDA_POWER_SAVE is needed + * as mute LED state is updated in check_power_status hook */ - -#ifdef CONFIG_SND_HDA_POWER_SAVE -static int stac92xx_hp_check_power_status(struct hda_codec *codec, - hda_nid_t nid) +static int stac92xx_update_led_status(struct hda_codec *codec) { struct sigmatel_spec *spec = codec->spec; - int i, muted = 1; + int i, num_ext_dacs, muted = 1; + unsigned int muted_lvl, notmtd_lvl; + hda_nid_t nid; + + if (!spec->gpio_led) + return 0; for (i = 0; i < spec->multiout.num_dacs; i++) { nid = spec->multiout.dac_nids[i]; @@ -4933,27 +5079,58 @@ static int stac92xx_hp_check_power_status(struct hda_codec *codec, break; } } - if (muted) - spec->gpio_data &= ~spec->gpio_led; /* orange */ - else - spec->gpio_data |= spec->gpio_led; /* white */ - - if (!spec->gpio_led_polarity) { - /* LED state is inverted on these systems */ - spec->gpio_data ^= spec->gpio_led; + if (muted && spec->multiout.hp_nid) + if (!(snd_hda_codec_amp_read(codec, + spec->multiout.hp_nid, 0, HDA_OUTPUT, 0) & + HDA_AMP_MUTE)) { + muted = 0; /* HP is not muted */ + } + num_ext_dacs = ARRAY_SIZE(spec->multiout.extra_out_nid); + for (i = 0; muted && i < num_ext_dacs; i++) { + nid = spec->multiout.extra_out_nid[i]; + if (nid == 0) + break; + if (!(snd_hda_codec_amp_read(codec, nid, 0, HDA_OUTPUT, 0) & + HDA_AMP_MUTE)) { + muted = 0; /* extra output is not muted */ + } } + /*polarity defines *not* muted state level*/ + if (spec->gpio_led <= 8) { + if (muted) + spec->gpio_data &= ~spec->gpio_led; /* orange */ + else + spec->gpio_data |= spec->gpio_led; /* white */ - stac_gpio_set(codec, spec->gpio_mask, spec->gpio_dir, spec->gpio_data); + if (!spec->gpio_led_polarity) { + /* LED state is inverted on these systems */ + spec->gpio_data ^= spec->gpio_led; + } + stac_gpio_set(codec, spec->gpio_mask, + spec->gpio_dir, spec->gpio_data); + } else { + notmtd_lvl = spec->gpio_led_polarity ? + AC_PINCTL_VREF_HIZ : AC_PINCTL_VREF_GRD; + muted_lvl = spec->gpio_led_polarity ? + AC_PINCTL_VREF_GRD : AC_PINCTL_VREF_HIZ; + spec->vref_led = muted ? muted_lvl : notmtd_lvl; + stac_vrefout_set(codec, spec->gpio_led, spec->vref_led); + } return 0; } -#endif -static int stac92xx_suspend(struct hda_codec *codec, pm_message_t state) +/* + * use power check for controlling mute led of HP notebooks + */ +static int stac92xx_check_power_status(struct hda_codec *codec, + hda_nid_t nid) { - stac92xx_shutup(codec); + stac92xx_update_led_status(codec); + return 0; } -#endif +#endif /* CONFIG_SND_HDA_POWER_SAVE */ +#endif /* CONFIG_PM */ static const struct hda_codec_ops stac92xx_patch_ops = { .build_controls = stac92xx_build_controls, @@ -4961,7 +5138,7 @@ static const struct hda_codec_ops stac92xx_patch_ops = { .init = stac92xx_init, .free = stac92xx_free, .unsol_event = stac92xx_unsol_event, -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM .suspend = stac92xx_suspend, .resume = stac92xx_resume, #endif @@ -5477,12 +5654,19 @@ again: #ifdef CONFIG_SND_HDA_POWER_SAVE if (spec->gpio_led) { - spec->gpio_mask |= spec->gpio_led; - spec->gpio_dir |= spec->gpio_led; - spec->gpio_data |= spec->gpio_led; - /* register check_power_status callback. */ + if (spec->gpio_led <= 8) { + spec->gpio_mask |= spec->gpio_led; + spec->gpio_dir |= spec->gpio_led; + spec->gpio_data |= spec->gpio_led; + } else { + codec->patch_ops.set_power_state = + stac92xx_set_power_state; + codec->patch_ops.post_suspend = + stac92xx_post_suspend; + } + codec->patch_ops.pre_resume = stac92xx_pre_resume; codec->patch_ops.check_power_status = - stac92xx_hp_check_power_status; + stac92xx_check_power_status; } #endif @@ -5805,12 +5989,19 @@ again: #ifdef CONFIG_SND_HDA_POWER_SAVE if (spec->gpio_led) { - spec->gpio_mask |= spec->gpio_led; - spec->gpio_dir |= spec->gpio_led; - spec->gpio_data |= spec->gpio_led; - /* register check_power_status callback. */ + if (spec->gpio_led <= 8) { + spec->gpio_mask |= spec->gpio_led; + spec->gpio_dir |= spec->gpio_led; + spec->gpio_data |= spec->gpio_led; + } else { + codec->patch_ops.set_power_state = + stac92xx_set_power_state; + codec->patch_ops.post_suspend = + stac92xx_post_suspend; + } + codec->patch_ops.pre_resume = stac92xx_pre_resume; codec->patch_ops.check_power_status = - stac92xx_hp_check_power_status; + stac92xx_check_power_status; } #endif diff --git a/sound/pci/hda/patch_via.c b/sound/pci/hda/patch_via.c index f38160b00e16..84d8798bf33a 100644 --- a/sound/pci/hda/patch_via.c +++ b/sound/pci/hda/patch_via.c @@ -1708,7 +1708,7 @@ static void via_unsol_event(struct hda_codec *codec, via_gpio_control(codec); } -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM static int via_suspend(struct hda_codec *codec, pm_message_t state) { struct via_spec *spec = codec->spec; @@ -1736,7 +1736,7 @@ static const struct hda_codec_ops via_patch_ops = { .init = via_init, .free = via_free, .unsol_event = via_unsol_event, -#ifdef SND_HDA_NEEDS_RESUME +#ifdef CONFIG_PM .suspend = via_suspend, #endif #ifdef CONFIG_SND_HDA_POWER_SAVE diff --git a/sound/soc/codecs/sgtl5000.c b/sound/soc/codecs/sgtl5000.c index ff29380c9ed3..76258f2a2ffb 100644 --- a/sound/soc/codecs/sgtl5000.c +++ b/sound/soc/codecs/sgtl5000.c @@ -907,6 +907,7 @@ static int ldo_regulator_register(struct snd_soc_codec *codec, struct regulator_init_data *init_data, int voltage) { + dev_err(codec->dev, "this setup needs regulator support in the kernel\n"); return -EINVAL; } @@ -1218,6 +1219,34 @@ static int sgtl5000_set_power_regs(struct snd_soc_codec *codec) return 0; } +static int sgtl5000_replace_vddd_with_ldo(struct snd_soc_codec *codec) +{ + struct sgtl5000_priv *sgtl5000 = snd_soc_codec_get_drvdata(codec); + int ret; + + /* set internal ldo to 1.2v */ + ret = ldo_regulator_register(codec, &ldo_init_data, LDO_VOLTAGE); + if (ret) { + dev_err(codec->dev, + "Failed to register vddd internal supplies: %d\n", ret); + return ret; + } + + sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME; + + ret = regulator_bulk_get(codec->dev, ARRAY_SIZE(sgtl5000->supplies), + sgtl5000->supplies); + + if (ret) { + ldo_regulator_remove(codec); + dev_err(codec->dev, "Failed to request supplies: %d\n", ret); + return ret; + } + + dev_info(codec->dev, "Using internal LDO instead of VDDD\n"); + return 0; +} + static int sgtl5000_enable_regulators(struct snd_soc_codec *codec) { u16 reg; @@ -1235,30 +1264,9 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec) if (!ret) external_vddd = 1; else { - /* set internal ldo to 1.2v */ - int voltage = LDO_VOLTAGE; - - ret = ldo_regulator_register(codec, &ldo_init_data, voltage); - if (ret) { - dev_err(codec->dev, - "Failed to register vddd internal supplies: %d\n", - ret); - return ret; - } - - sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME; - - ret = regulator_bulk_get(codec->dev, - ARRAY_SIZE(sgtl5000->supplies), - sgtl5000->supplies); - - if (ret) { - ldo_regulator_remove(codec); - dev_err(codec->dev, - "Failed to request supplies: %d\n", ret); - + ret = sgtl5000_replace_vddd_with_ldo(codec); + if (ret) return ret; - } } ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies), @@ -1287,7 +1295,6 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec) * roll back to use internal LDO */ if (external_vddd && rev >= 0x11) { - int voltage = LDO_VOLTAGE; /* disable all regulator first */ regulator_bulk_disable(ARRAY_SIZE(sgtl5000->supplies), sgtl5000->supplies); @@ -1295,23 +1302,10 @@ static int sgtl5000_enable_regulators(struct snd_soc_codec *codec) regulator_bulk_free(ARRAY_SIZE(sgtl5000->supplies), sgtl5000->supplies); - ret = ldo_regulator_register(codec, &ldo_init_data, voltage); + ret = sgtl5000_replace_vddd_with_ldo(codec); if (ret) return ret; - sgtl5000->supplies[VDDD].supply = LDO_CONSUMER_NAME; - - ret = regulator_bulk_get(codec->dev, - ARRAY_SIZE(sgtl5000->supplies), - sgtl5000->supplies); - if (ret) { - ldo_regulator_remove(codec); - dev_err(codec->dev, - "Failed to request supplies: %d\n", ret); - - return ret; - } - ret = regulator_bulk_enable(ARRAY_SIZE(sgtl5000->supplies), sgtl5000->supplies); if (ret) diff --git a/sound/soc/codecs/wm8962.c b/sound/soc/codecs/wm8962.c index 8499c563a9b5..60d740ebeb5b 100644 --- a/sound/soc/codecs/wm8962.c +++ b/sound/soc/codecs/wm8962.c @@ -3409,6 +3409,9 @@ static irqreturn_t wm8962_irq(int irq, void *data) active = snd_soc_read(codec, WM8962_INTERRUPT_STATUS_2); active &= ~mask; + /* Acknowledge the interrupts */ + snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active); + if (active & WM8962_FLL_LOCK_EINT) { dev_dbg(codec->dev, "FLL locked\n"); complete(&wm8962->fll_lock); @@ -3433,9 +3436,6 @@ static irqreturn_t wm8962_irq(int irq, void *data) msecs_to_jiffies(250)); } - /* Acknowledge the interrupts */ - snd_soc_write(codec, WM8962_INTERRUPT_STATUS_2, active); - return IRQ_HANDLED; } diff --git a/sound/soc/davinci/davinci-vcif.c b/sound/soc/davinci/davinci-vcif.c index 9259f1f34899..1f11525d97e8 100644 --- a/sound/soc/davinci/davinci-vcif.c +++ b/sound/soc/davinci/davinci-vcif.c @@ -62,9 +62,9 @@ static void davinci_vcif_start(struct snd_pcm_substream *substream) w = readl(davinci_vc->base + DAVINCI_VC_CTRL); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1); + MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0); else - MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1); + MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0); writel(w, davinci_vc->base + DAVINCI_VC_CTRL); } @@ -80,9 +80,9 @@ static void davinci_vcif_stop(struct snd_pcm_substream *substream) /* Reset transmitter/receiver and sample rate/frame sync generators */ w = readl(davinci_vc->base + DAVINCI_VC_CTRL); if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) - MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 0); + MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTDAC, 1); else - MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 0); + MOD_REG_BIT(w, DAVINCI_VC_CTRL_RSTADC, 1); writel(w, davinci_vc->base + DAVINCI_VC_CTRL); } @@ -159,6 +159,7 @@ static int davinci_vcif_trigger(struct snd_pcm_substream *substream, int cmd, case SNDRV_PCM_TRIGGER_RESUME: case SNDRV_PCM_TRIGGER_PAUSE_RELEASE: davinci_vcif_start(substream); + break; case SNDRV_PCM_TRIGGER_STOP: case SNDRV_PCM_TRIGGER_SUSPEND: case SNDRV_PCM_TRIGGER_PAUSE_PUSH: diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c index 1568eea31f41..c086b78539ee 100644 --- a/sound/soc/samsung/i2s.c +++ b/sound/soc/samsung/i2s.c @@ -21,6 +21,7 @@ #include <plat/audio.h> #include "dma.h" +#include "idma.h" #include "i2s.h" #include "i2s-regs.h" @@ -60,6 +61,7 @@ struct i2s_dai { /* DMA parameters */ struct s3c_dma_params dma_playback; struct s3c_dma_params dma_capture; + struct s3c_dma_params idma_playback; u32 quirks; u32 suspend_i2smod; u32 suspend_i2scon; @@ -877,6 +879,10 @@ static int samsung_i2s_dai_probe(struct snd_soc_dai *dai) if (i2s->quirks & QUIRK_NEED_RSTCLR) writel(CON_RSTCLR, i2s->addr + I2SCON); + if (i2s->quirks & QUIRK_SEC_DAI) + idma_reg_addr_init((void *)i2s->addr, + i2s->sec_dai->idma_playback.dma_addr); + probe_exit: /* Reset any constraint on RFS and BFS */ i2s->rfs = 0; @@ -1077,6 +1083,7 @@ static __devinit int samsung_i2s_probe(struct platform_device *pdev) sec_dai->dma_playback.dma_size = 4; sec_dai->base = regs_base; sec_dai->quirks = quirks; + sec_dai->idma_playback.dma_addr = i2s_cfg->idma_addr; sec_dai->pri_dai = pri_dai; pri_dai->sec_dai = sec_dai; } diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c index e44267f66216..83ad8ca27490 100644 --- a/sound/soc/soc-core.c +++ b/sound/soc/soc-core.c @@ -577,6 +577,7 @@ int snd_soc_suspend(struct device *dev) case SND_SOC_BIAS_OFF: codec->driver->suspend(codec, PMSG_SUSPEND); codec->suspended = 1; + codec->cache_sync = 1; break; default: dev_dbg(codec->dev, "CODEC is on over suspend\n"); @@ -1140,7 +1141,7 @@ static int soc_probe_dai_link(struct snd_soc_card *card, int num, int order) } } cpu_dai->probed = 1; - /* mark cpu_dai as probed and add to card cpu_dai list */ + /* mark cpu_dai as probed and add to card dai list */ list_add(&cpu_dai->card_list, &card->dai_dev_list); } @@ -1171,7 +1172,7 @@ static int soc_probe_dai_link(struct snd_soc_card *card, int num, int order) } } - /* mark cpu_dai as probed and add to card cpu_dai list */ + /* mark codec_dai as probed and add to card dai list */ codec_dai->probed = 1; list_add(&codec_dai->card_list, &card->dai_dev_list); } diff --git a/sound/soc/soc-dapm.c b/sound/soc/soc-dapm.c index fbfcda062839..7e15914b3633 100644 --- a/sound/soc/soc-dapm.c +++ b/sound/soc/soc-dapm.c @@ -124,6 +124,36 @@ static inline struct snd_soc_dapm_widget *dapm_cnew_widget( return kmemdup(_widget, sizeof(*_widget), GFP_KERNEL); } +/* get snd_card from DAPM context */ +static inline struct snd_card *dapm_get_snd_card( + struct snd_soc_dapm_context *dapm) +{ + if (dapm->codec) + return dapm->codec->card->snd_card; + else if (dapm->platform) + return dapm->platform->card->snd_card; + else + BUG(); + + /* unreachable */ + return NULL; +} + +/* get soc_card from DAPM context */ +static inline struct snd_soc_card *dapm_get_soc_card( + struct snd_soc_dapm_context *dapm) +{ + if (dapm->codec) + return dapm->codec->card; + else if (dapm->platform) + return dapm->platform->card; + else + BUG(); + + /* unreachable */ + return NULL; +} + static int soc_widget_read(struct snd_soc_dapm_widget *w, int reg) { if (w->codec) |