summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/mmc/mmc.txt2
-rw-r--r--Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt30
-rw-r--r--Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt5
-rw-r--r--Documentation/devicetree/bindings/mmc/usdhi6rol0.txt33
-rw-r--r--Documentation/filesystems/nfs/nfs41-server.txt2
-rw-r--r--MAINTAINERS3
-rw-r--r--drivers/mmc/core/bus.c9
-rw-r--r--drivers/mmc/core/core.c52
-rw-r--r--drivers/mmc/core/debugfs.c8
-rw-r--r--drivers/mmc/core/host.c4
-rw-r--r--drivers/mmc/core/mmc.c678
-rw-r--r--drivers/mmc/core/sd.c28
-rw-r--r--drivers/mmc/core/sd.h1
-rw-r--r--drivers/mmc/core/sdio.c53
-rw-r--r--drivers/mmc/core/sdio_bus.c14
-rw-r--r--drivers/mmc/core/sdio_irq.c41
-rw-r--r--drivers/mmc/core/slot-gpio.c4
-rw-r--r--drivers/mmc/host/Kconfig17
-rw-r--r--drivers/mmc/host/Makefile2
-rw-r--r--drivers/mmc/host/atmel-mci.c13
-rw-r--r--drivers/mmc/host/dw_mmc-exynos.c7
-rw-r--r--drivers/mmc/host/dw_mmc.c180
-rw-r--r--drivers/mmc/host/dw_mmc.h2
-rw-r--r--drivers/mmc/host/jz4740_mmc.c11
-rw-r--r--drivers/mmc/host/mmci.c6
-rw-r--r--drivers/mmc/host/moxart-mmc.c730
-rw-r--r--drivers/mmc/host/mvsdio.c20
-rw-r--r--drivers/mmc/host/mxcmmc.c140
-rw-r--r--drivers/mmc/host/mxs-mmc.c7
-rw-r--r--drivers/mmc/host/omap.c10
-rw-r--r--drivers/mmc/host/omap_hsmmc.c61
-rw-r--r--drivers/mmc/host/rtsx_pci_sdmmc.c5
-rw-r--r--drivers/mmc/host/rtsx_usb_sdmmc.c5
-rw-r--r--drivers/mmc/host/sdhci-acpi.c8
-rw-r--r--drivers/mmc/host/sdhci-bcm-kona.c4
-rw-r--r--drivers/mmc/host/sdhci-bcm2835.c4
-rw-r--r--drivers/mmc/host/sdhci-cns3xxx.c13
-rw-r--r--drivers/mmc/host/sdhci-dove.c78
-rw-r--r--drivers/mmc/host/sdhci-esdhc-imx.c83
-rw-r--r--drivers/mmc/host/sdhci-esdhc.h4
-rw-r--r--drivers/mmc/host/sdhci-of-arasan.c4
-rw-r--r--drivers/mmc/host/sdhci-of-esdhc.c70
-rw-r--r--drivers/mmc/host/sdhci-of-hlwd.c4
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.c78
-rw-r--r--drivers/mmc/host/sdhci-pci-o2micro.h3
-rw-r--r--drivers/mmc/host/sdhci-pci.c9
-rw-r--r--drivers/mmc/host/sdhci-pltfm.c4
-rw-r--r--drivers/mmc/host/sdhci-pxav2.c14
-rw-r--r--drivers/mmc/host/sdhci-pxav3.c13
-rw-r--r--drivers/mmc/host/sdhci-s3c.c138
-rw-r--r--drivers/mmc/host/sdhci-sirf.c4
-rw-r--r--drivers/mmc/host/sdhci-spear.c5
-rw-r--r--drivers/mmc/host/sdhci-tegra.c67
-rw-r--r--drivers/mmc/host/sdhci.c749
-rw-r--r--drivers/mmc/host/sdhci.h20
-rw-r--r--drivers/mmc/host/sh_mmcif.c9
-rw-r--r--drivers/mmc/host/usdhi6rol0.c1847
-rw-r--r--drivers/mmc/host/wmt-sdmmc.c2
-rw-r--r--drivers/net/wireless/rsi/rsi_91x_sdio.c4
-rw-r--r--fs/attr.c8
-rw-r--r--fs/inode.c10
-rw-r--r--fs/lockd/clnt4xdr.c2
-rw-r--r--fs/lockd/clntxdr.c2
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/lockd/svcsubs.c3
-rw-r--r--fs/lockd/xdr.c2
-rw-r--r--fs/namei.c11
-rw-r--r--fs/nfs/Makefile4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c38
-rw-r--r--fs/nfs/direct.c117
-rw-r--r--fs/nfs/filelayout/Makefile5
-rw-r--r--fs/nfs/filelayout/filelayout.c (renamed from fs/nfs/nfs4filelayout.c)203
-rw-r--r--fs/nfs/filelayout/filelayout.h (renamed from fs/nfs/nfs4filelayout.h)2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c (renamed from fs/nfs/nfs4filelayoutdev.c)6
-rw-r--r--fs/nfs/getroot.c3
-rw-r--r--fs/nfs/inode.c26
-rw-r--r--fs/nfs/internal.h33
-rw-r--r--fs/nfs/nfs2xdr.c14
-rw-r--r--fs/nfs/nfs3proc.c21
-rw-r--r--fs/nfs/nfs3xdr.c16
-rw-r--r--fs/nfs/nfs4_fs.h4
-rw-r--r--fs/nfs/nfs4file.c3
-rw-r--r--fs/nfs/nfs4proc.c58
-rw-r--r--fs/nfs/nfs4state.c6
-rw-r--r--fs/nfs/nfs4trace.h8
-rw-r--r--fs/nfs/nfs4xdr.c19
-rw-r--r--fs/nfs/objlayout/objio_osd.c24
-rw-r--r--fs/nfs/objlayout/objlayout.c24
-rw-r--r--fs/nfs/objlayout/objlayout.h8
-rw-r--r--fs/nfs/pagelist.c633
-rw-r--r--fs/nfs/pnfs.c166
-rw-r--r--fs/nfs/pnfs.h30
-rw-r--r--fs/nfs/proc.c21
-rw-r--r--fs/nfs/read.c414
-rw-r--r--fs/nfs/super.c27
-rw-r--r--fs/nfs/write.c588
-rw-r--r--fs/nfsd/acl.h2
-rw-r--r--fs/nfsd/auth.c5
-rw-r--r--fs/nfsd/export.c88
-rw-r--r--fs/nfsd/export.h (renamed from include/linux/nfsd/export.h)14
-rw-r--r--fs/nfsd/fault_inject.c15
-rw-r--r--fs/nfsd/idmap.h4
-rw-r--r--fs/nfsd/nfs2acl.c12
-rw-r--r--fs/nfsd/nfs3acl.c6
-rw-r--r--fs/nfsd/nfs3xdr.c27
-rw-r--r--fs/nfsd/nfs4acl.c12
-rw-r--r--fs/nfsd/nfs4idmap.c42
-rw-r--r--fs/nfsd/nfs4proc.c180
-rw-r--r--fs/nfsd/nfs4state.c271
-rw-r--r--fs/nfsd/nfs4xdr.c1933
-rw-r--r--fs/nfsd/nfscache.c17
-rw-r--r--fs/nfsd/nfsctl.c1
-rw-r--r--fs/nfsd/nfsd.h17
-rw-r--r--fs/nfsd/nfsfh.c25
-rw-r--r--fs/nfsd/nfsfh.h59
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/nfsxdr.c15
-rw-r--r--fs/nfsd/state.h5
-rw-r--r--fs/nfsd/stats.c1
-rw-r--r--fs/nfsd/stats.h (renamed from include/linux/nfsd/stats.h)8
-rw-r--r--fs/nfsd/vfs.c155
-rw-r--r--fs/nfsd/vfs.h10
-rw-r--r--fs/nfsd/xdr4.h23
-rw-r--r--fs/xfs/xfs_ioctl.c2
-rw-r--r--include/linux/capability.h2
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/mmc/card.h29
-rw-r--r--include/linux/mmc/dw_mmc.h14
-rw-r--r--include/linux/mmc/host.h59
-rw-r--r--include/linux/mmc/mmc.h23
-rw-r--r--include/linux/mmc/sdhci.h15
-rw-r--r--include/linux/nfs.h5
-rw-r--r--include/linux/nfs4.h2
-rw-r--r--include/linux/nfs_fs.h2
-rw-r--r--include/linux/nfs_page.h46
-rw-r--r--include/linux/nfs_xdr.h106
-rw-r--r--include/linux/nfsd/debug.h19
-rw-r--r--include/linux/nfsd/nfsfh.h63
-rw-r--r--include/linux/omap-dma.h19
-rw-r--r--include/linux/omap-dmaengine.h21
-rw-r--r--include/linux/sunrpc/svc.h13
-rw-r--r--include/linux/sunrpc/svc_rdma.h3
-rw-r--r--include/linux/sunrpc/svc_xprt.h2
-rw-r--r--include/linux/sunrpc/xdr.h3
-rw-r--r--include/linux/sunrpc/xprt.h6
-rw-r--r--include/uapi/linux/nfsd/nfsfh.h32
-rw-r--r--kernel/capability.c20
-rw-r--r--net/sunrpc/auth_gss/gss_mech_switch.c4
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c2
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/sunrpc.h13
-rw-r--r--net/sunrpc/svc_xprt.c5
-rw-r--r--net/sunrpc/svcauth.c2
-rw-r--r--net/sunrpc/svcsock.c17
-rw-r--r--net/sunrpc/xdr.c174
-rw-r--r--net/sunrpc/xprt.c28
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c119
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c643
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c230
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c69
-rw-r--r--net/sunrpc/xprtrdma/transport.c90
-rw-r--r--net/sunrpc/xprtrdma/verbs.c753
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h17
-rwxr-xr-xscripts/decode_stacktrace.sh126
165 files changed, 8329 insertions, 5413 deletions
diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 9dce540771fb..3c18001dfd5d 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -38,6 +38,8 @@ Optional properties:
- mmc-highspeed-ddr-1_2v: eMMC high-speed DDR mode(1.2V I/O) is supported
- mmc-hs200-1_8v: eMMC HS200 mode(1.8V I/O) is supported
- mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
+- mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
+- mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
*NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
polarity properties, we have to fix the meaning of the "normal" and "inverted"
diff --git a/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt
new file mode 100644
index 000000000000..b63819149f22
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/moxa,moxart-mmc.txt
@@ -0,0 +1,30 @@
+MOXA ART MMC Host Controller Interface
+
+ Inherits from mmc binding[1].
+
+ [1] Documentation/devicetree/bindings/mmc/mmc.txt
+
+Required properties:
+
+- compatible : Must be "moxa,moxart-mmc" or "faraday,ftsdc010"
+- reg : Should contain registers location and length
+- interrupts : Should contain the interrupt number
+- clocks : Should contain phandle for the clock feeding the MMC controller
+
+Optional properties:
+
+- dmas : Should contain two DMA channels, line request number must be 5 for
+ both channels
+- dma-names : Must be "tx", "rx"
+
+Example:
+
+ mmc: mmc@98e00000 {
+ compatible = "moxa,moxart-mmc";
+ reg = <0x98e00000 0x5C>;
+ interrupts = <5 0>;
+ clocks = <&clk_apb>;
+ dmas = <&dma 5>,
+ <&dma 5>;
+ dma-names = "tx", "rx";
+ };
diff --git a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
index 8f3f13315358..2d4a7258a10d 100644
--- a/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
+++ b/Documentation/devicetree/bindings/mmc/synopsys-dw-mshc.txt
@@ -69,10 +69,6 @@ Optional properties:
* supports-highspeed: Enables support for high speed cards (up to 50MHz)
-* caps2-mmc-hs200-1_8v: Supports mmc HS200 SDR 1.8V mode
-
-* caps2-mmc-hs200-1_2v: Supports mmc HS200 SDR 1.2V mode
-
* broken-cd: as documented in mmc core bindings.
* vmmc-supply: The phandle to the regulator to use for vmmc. If this is
@@ -103,7 +99,6 @@ board specific portions as listed below.
clock-freq-min-max = <400000 200000000>;
num-slots = <1>;
supports-highspeed;
- caps2-mmc-hs200-1_8v;
broken-cd;
fifo-depth = <0x80>;
card-detect-delay = <200>;
diff --git a/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
new file mode 100644
index 000000000000..8babdaa8623b
--- /dev/null
+++ b/Documentation/devicetree/bindings/mmc/usdhi6rol0.txt
@@ -0,0 +1,33 @@
+* Renesas usdhi6rol0 SD/SDIO host controller
+
+Required properties:
+
+- compatible: must be
+ "renesas,usdhi6rol0"
+- interrupts: 3 interrupts, named "card detect", "data" and "SDIO" must be
+ specified
+- clocks: a clock binding for the IMCLK input
+
+Optional properties:
+
+- vmmc-supply: a phandle of a regulator, supplying Vcc to the card
+- vqmmc-supply: a phandle of a regulator, supplying VccQ to the card
+
+Additionally any standard mmc bindings from mmc.txt can be used.
+
+Example:
+
+sd0: sd@ab000000 {
+ compatible = "renesas,usdhi6rol0";
+ reg = <0xab000000 0x200>;
+ interrupts = <0 23 0x4
+ 0 24 0x4
+ 0 25 0x4>;
+ interrupt-names = "card detect", "data", "SDIO";
+ bus-width = <4>;
+ max-frequency = <50000000>;
+ cap-power-off-card;
+ clocks = <&imclk>;
+ vmmc-supply = <&vcc_sd0>;
+ vqmmc-supply = <&vccq_sd0>;
+};
diff --git a/Documentation/filesystems/nfs/nfs41-server.txt b/Documentation/filesystems/nfs/nfs41-server.txt
index b930ad087780..c49cd7e796e7 100644
--- a/Documentation/filesystems/nfs/nfs41-server.txt
+++ b/Documentation/filesystems/nfs/nfs41-server.txt
@@ -176,7 +176,5 @@ Nonstandard compound limitations:
ca_maxrequestsize request and a ca_maxresponsesize reply, so we may
fail to live up to the promise we made in CREATE_SESSION fore channel
negotiation.
-* No more than one read-like operation allowed per compound; encoding
- replies that cross page boundaries (except for read data) not handled.
See also http://wiki.linux-nfs.org/wiki/index.php/Server_4.0_and_4.1_issues.
diff --git a/MAINTAINERS b/MAINTAINERS
index 948379508e44..b4a66b9d6b4d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5974,6 +5974,7 @@ M: Chris Ball <chris@printf.net>
M: Ulf Hansson <ulf.hansson@linaro.org>
L: linux-mmc@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/cjb/mmc.git
+T: git git://git.linaro.org/people/ulf.hansson/mmc.git
S: Maintained
F: drivers/mmc/
F: include/linux/mmc/
@@ -9103,7 +9104,7 @@ F: include/linux/toshiba.h
F: include/uapi/linux/toshiba.h
TMIO MMC DRIVER
-M: Ian Molton <ian@mnementh.co.uk>
+M: Ian Molton <ian.molton@codethink.co.uk>
L: linux-mmc@vger.kernel.org
S: Maintained
F: drivers/mmc/host/tmio_mmc*
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 824644875d41..d2dbf02022bd 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -341,16 +341,17 @@ int mmc_add_card(struct mmc_card *card)
if (mmc_host_is_spi(card->host)) {
pr_info("%s: new %s%s%s card on SPI\n",
mmc_hostname(card->host),
- mmc_card_highspeed(card) ? "high speed " : "",
- mmc_card_ddr_mode(card) ? "DDR " : "",
+ mmc_card_hs(card) ? "high speed " : "",
+ mmc_card_ddr52(card) ? "DDR " : "",
type);
} else {
pr_info("%s: new %s%s%s%s%s card at address %04x\n",
mmc_hostname(card->host),
mmc_card_uhs(card) ? "ultra high speed " :
- (mmc_card_highspeed(card) ? "high speed " : ""),
+ (mmc_card_hs(card) ? "high speed " : ""),
+ mmc_card_hs400(card) ? "HS400 " :
(mmc_card_hs200(card) ? "HS200 " : ""),
- mmc_card_ddr_mode(card) ? "DDR " : "",
+ mmc_card_ddr52(card) ? "DDR " : "",
uhs_bus_speed_mode, type, card->rca);
}
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index acbc3f2aaaf9..7dc0c85fdb60 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -800,6 +800,10 @@ void mmc_set_data_timeout(struct mmc_data *data, const struct mmc_card *card)
data->timeout_ns = limit_us * 1000;
data->timeout_clks = 0;
}
+
+ /* assign limit value if invalid */
+ if (timeout_us == 0)
+ data->timeout_ns = limit_us * 1000;
}
/*
@@ -1310,31 +1314,38 @@ int mmc_regulator_set_ocr(struct mmc_host *mmc,
}
EXPORT_SYMBOL_GPL(mmc_regulator_set_ocr);
+#endif /* CONFIG_REGULATOR */
+
int mmc_regulator_get_supply(struct mmc_host *mmc)
{
struct device *dev = mmc_dev(mmc);
- struct regulator *supply;
int ret;
- supply = devm_regulator_get(dev, "vmmc");
- mmc->supply.vmmc = supply;
+ mmc->supply.vmmc = devm_regulator_get_optional(dev, "vmmc");
mmc->supply.vqmmc = devm_regulator_get_optional(dev, "vqmmc");
- if (IS_ERR(supply))
- return PTR_ERR(supply);
+ if (IS_ERR(mmc->supply.vmmc)) {
+ if (PTR_ERR(mmc->supply.vmmc) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_info(dev, "No vmmc regulator found\n");
+ } else {
+ ret = mmc_regulator_get_ocrmask(mmc->supply.vmmc);
+ if (ret > 0)
+ mmc->ocr_avail = ret;
+ else
+ dev_warn(dev, "Failed getting OCR mask: %d\n", ret);
+ }
- ret = mmc_regulator_get_ocrmask(supply);
- if (ret > 0)
- mmc->ocr_avail = ret;
- else
- dev_warn(mmc_dev(mmc), "Failed getting OCR mask: %d\n", ret);
+ if (IS_ERR(mmc->supply.vqmmc)) {
+ if (PTR_ERR(mmc->supply.vqmmc) == -EPROBE_DEFER)
+ return -EPROBE_DEFER;
+ dev_info(dev, "No vqmmc regulator found\n");
+ }
return 0;
}
EXPORT_SYMBOL_GPL(mmc_regulator_get_supply);
-#endif /* CONFIG_REGULATOR */
-
/*
* Mask off any voltages we don't support and select
* the lowest voltage
@@ -1533,8 +1544,13 @@ void mmc_power_up(struct mmc_host *host, u32 ocr)
host->ios.timing = MMC_TIMING_LEGACY;
mmc_set_ios(host);
- /* Set signal voltage to 3.3V */
- __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330);
+ /* Try to set signal voltage to 3.3V but fall back to 1.8v or 1.2v */
+ if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330) == 0)
+ dev_dbg(mmc_dev(host), "Initial signal voltage of 3.3v\n");
+ else if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180) == 0)
+ dev_dbg(mmc_dev(host), "Initial signal voltage of 1.8v\n");
+ else if (__mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120) == 0)
+ dev_dbg(mmc_dev(host), "Initial signal voltage of 1.2v\n");
/*
* This delay should be sufficient to allow the power supply
@@ -2183,7 +2199,7 @@ int mmc_set_blocklen(struct mmc_card *card, unsigned int blocklen)
{
struct mmc_command cmd = {0};
- if (mmc_card_blockaddr(card) || mmc_card_ddr_mode(card))
+ if (mmc_card_blockaddr(card) || mmc_card_ddr52(card))
return 0;
cmd.opcode = MMC_SET_BLOCKLEN;
@@ -2263,7 +2279,6 @@ static int mmc_do_hw_reset(struct mmc_host *host, int check)
}
}
- host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_DDR);
if (mmc_host_is_spi(host)) {
host->ios.chip_select = MMC_CS_HIGH;
host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
@@ -2403,6 +2418,11 @@ void mmc_rescan(struct work_struct *work)
container_of(work, struct mmc_host, detect.work);
int i;
+ if (host->trigger_card_event && host->ops->card_event) {
+ host->ops->card_event(host);
+ host->trigger_card_event = false;
+ }
+
if (host->rescan_disable)
return;
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index 54829c0ed000..91eb16223246 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -135,8 +135,14 @@ static int mmc_ios_show(struct seq_file *s, void *data)
case MMC_TIMING_UHS_DDR50:
str = "sd uhs DDR50";
break;
+ case MMC_TIMING_MMC_DDR52:
+ str = "mmc DDR52";
+ break;
case MMC_TIMING_MMC_HS200:
- str = "mmc high-speed SDR200";
+ str = "mmc HS200";
+ break;
+ case MMC_TIMING_MMC_HS400:
+ str = "mmc HS400";
break;
default:
str = "invalid";
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index fdea825dbb24..95cceae96944 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -447,6 +447,10 @@ int mmc_of_parse(struct mmc_host *host)
host->caps2 |= MMC_CAP2_HS200_1_8V_SDR;
if (of_find_property(np, "mmc-hs200-1_2v", &len))
host->caps2 |= MMC_CAP2_HS200_1_2V_SDR;
+ if (of_find_property(np, "mmc-hs400-1_8v", &len))
+ host->caps2 |= MMC_CAP2_HS400_1_8V | MMC_CAP2_HS200_1_8V_SDR;
+ if (of_find_property(np, "mmc-hs400-1_2v", &len))
+ host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR;
return 0;
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1ab5f3a0af5b..793c6f7ddb04 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -240,31 +240,62 @@ static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
static void mmc_select_card_type(struct mmc_card *card)
{
struct mmc_host *host = card->host;
- u8 card_type = card->ext_csd.raw_card_type & EXT_CSD_CARD_TYPE_MASK;
+ u8 card_type = card->ext_csd.raw_card_type;
u32 caps = host->caps, caps2 = host->caps2;
- unsigned int hs_max_dtr = 0;
+ unsigned int hs_max_dtr = 0, hs200_max_dtr = 0;
+ unsigned int avail_type = 0;
- if (card_type & EXT_CSD_CARD_TYPE_26)
+ if (caps & MMC_CAP_MMC_HIGHSPEED &&
+ card_type & EXT_CSD_CARD_TYPE_HS_26) {
hs_max_dtr = MMC_HIGH_26_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS_26;
+ }
if (caps & MMC_CAP_MMC_HIGHSPEED &&
- card_type & EXT_CSD_CARD_TYPE_52)
+ card_type & EXT_CSD_CARD_TYPE_HS_52) {
hs_max_dtr = MMC_HIGH_52_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS_52;
+ }
+
+ if (caps & MMC_CAP_1_8V_DDR &&
+ card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) {
+ hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_DDR_1_8V;
+ }
- if ((caps & MMC_CAP_1_8V_DDR &&
- card_type & EXT_CSD_CARD_TYPE_DDR_1_8V) ||
- (caps & MMC_CAP_1_2V_DDR &&
- card_type & EXT_CSD_CARD_TYPE_DDR_1_2V))
+ if (caps & MMC_CAP_1_2V_DDR &&
+ card_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
hs_max_dtr = MMC_HIGH_DDR_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_DDR_1_2V;
+ }
+
+ if (caps2 & MMC_CAP2_HS200_1_8V_SDR &&
+ card_type & EXT_CSD_CARD_TYPE_HS200_1_8V) {
+ hs200_max_dtr = MMC_HS200_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS200_1_8V;
+ }
+
+ if (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
+ card_type & EXT_CSD_CARD_TYPE_HS200_1_2V) {
+ hs200_max_dtr = MMC_HS200_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS200_1_2V;
+ }
+
+ if (caps2 & MMC_CAP2_HS400_1_8V &&
+ card_type & EXT_CSD_CARD_TYPE_HS400_1_8V) {
+ hs200_max_dtr = MMC_HS200_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS400_1_8V;
+ }
- if ((caps2 & MMC_CAP2_HS200_1_8V_SDR &&
- card_type & EXT_CSD_CARD_TYPE_SDR_1_8V) ||
- (caps2 & MMC_CAP2_HS200_1_2V_SDR &&
- card_type & EXT_CSD_CARD_TYPE_SDR_1_2V))
- hs_max_dtr = MMC_HS200_MAX_DTR;
+ if (caps2 & MMC_CAP2_HS400_1_2V &&
+ card_type & EXT_CSD_CARD_TYPE_HS400_1_2V) {
+ hs200_max_dtr = MMC_HS200_MAX_DTR;
+ avail_type |= EXT_CSD_CARD_TYPE_HS400_1_2V;
+ }
card->ext_csd.hs_max_dtr = hs_max_dtr;
- card->ext_csd.card_type = card_type;
+ card->ext_csd.hs200_max_dtr = hs200_max_dtr;
+ card->mmc_avail_type = avail_type;
}
/*
@@ -480,6 +511,8 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
ext_csd[EXT_CSD_PWR_CL_DDR_52_195];
card->ext_csd.raw_pwr_cl_ddr_52_360 =
ext_csd[EXT_CSD_PWR_CL_DDR_52_360];
+ card->ext_csd.raw_pwr_cl_ddr_200_360 =
+ ext_csd[EXT_CSD_PWR_CL_DDR_200_360];
}
if (card->ext_csd.rev >= 5) {
@@ -646,7 +679,10 @@ static int mmc_compare_ext_csds(struct mmc_card *card, unsigned bus_width)
(card->ext_csd.raw_pwr_cl_ddr_52_195 ==
bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_195]) &&
(card->ext_csd.raw_pwr_cl_ddr_52_360 ==
- bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]));
+ bw_ext_csd[EXT_CSD_PWR_CL_DDR_52_360]) &&
+ (card->ext_csd.raw_pwr_cl_ddr_200_360 ==
+ bw_ext_csd[EXT_CSD_PWR_CL_DDR_200_360]));
+
if (err)
err = -EINVAL;
@@ -694,18 +730,10 @@ static struct attribute *mmc_std_attrs[] = {
&dev_attr_rel_sectors.attr,
NULL,
};
-
-static struct attribute_group mmc_std_attr_group = {
- .attrs = mmc_std_attrs,
-};
-
-static const struct attribute_group *mmc_attr_groups[] = {
- &mmc_std_attr_group,
- NULL,
-};
+ATTRIBUTE_GROUPS(mmc_std);
static struct device_type mmc_type = {
- .groups = mmc_attr_groups,
+ .groups = mmc_std_groups,
};
/*
@@ -714,17 +742,13 @@ static struct device_type mmc_type = {
* extended CSD register, select it by executing the
* mmc_switch command.
*/
-static int mmc_select_powerclass(struct mmc_card *card,
- unsigned int bus_width)
+static int __mmc_select_powerclass(struct mmc_card *card,
+ unsigned int bus_width)
{
- int err = 0;
+ struct mmc_host *host = card->host;
+ struct mmc_ext_csd *ext_csd = &card->ext_csd;
unsigned int pwrclass_val = 0;
- struct mmc_host *host;
-
- BUG_ON(!card);
-
- host = card->host;
- BUG_ON(!host);
+ int err = 0;
/* Power class selection is supported for versions >= 4.0 */
if (card->csd.mmca_vsn < CSD_SPEC_VER_4)
@@ -736,14 +760,14 @@ static int mmc_select_powerclass(struct mmc_card *card,
switch (1 << host->ios.vdd) {
case MMC_VDD_165_195:
- if (host->ios.clock <= 26000000)
- pwrclass_val = card->ext_csd.raw_pwr_cl_26_195;
- else if (host->ios.clock <= 52000000)
+ if (host->ios.clock <= MMC_HIGH_26_MAX_DTR)
+ pwrclass_val = ext_csd->raw_pwr_cl_26_195;
+ else if (host->ios.clock <= MMC_HIGH_52_MAX_DTR)
pwrclass_val = (bus_width <= EXT_CSD_BUS_WIDTH_8) ?
- card->ext_csd.raw_pwr_cl_52_195 :
- card->ext_csd.raw_pwr_cl_ddr_52_195;
- else if (host->ios.clock <= 200000000)
- pwrclass_val = card->ext_csd.raw_pwr_cl_200_195;
+ ext_csd->raw_pwr_cl_52_195 :
+ ext_csd->raw_pwr_cl_ddr_52_195;
+ else if (host->ios.clock <= MMC_HS200_MAX_DTR)
+ pwrclass_val = ext_csd->raw_pwr_cl_200_195;
break;
case MMC_VDD_27_28:
case MMC_VDD_28_29:
@@ -754,14 +778,16 @@ static int mmc_select_powerclass(struct mmc_card *card,
case MMC_VDD_33_34:
case MMC_VDD_34_35:
case MMC_VDD_35_36:
- if (host->ios.clock <= 26000000)
- pwrclass_val = card->ext_csd.raw_pwr_cl_26_360;
- else if (host->ios.clock <= 52000000)
+ if (host->ios.clock <= MMC_HIGH_26_MAX_DTR)
+ pwrclass_val = ext_csd->raw_pwr_cl_26_360;
+ else if (host->ios.clock <= MMC_HIGH_52_MAX_DTR)
pwrclass_val = (bus_width <= EXT_CSD_BUS_WIDTH_8) ?
- card->ext_csd.raw_pwr_cl_52_360 :
- card->ext_csd.raw_pwr_cl_ddr_52_360;
- else if (host->ios.clock <= 200000000)
- pwrclass_val = card->ext_csd.raw_pwr_cl_200_360;
+ ext_csd->raw_pwr_cl_52_360 :
+ ext_csd->raw_pwr_cl_ddr_52_360;
+ else if (host->ios.clock <= MMC_HS200_MAX_DTR)
+ pwrclass_val = (bus_width == EXT_CSD_DDR_BUS_WIDTH_8) ?
+ ext_csd->raw_pwr_cl_ddr_200_360 :
+ ext_csd->raw_pwr_cl_200_360;
break;
default:
pr_warning("%s: Voltage range not supported "
@@ -787,40 +813,79 @@ static int mmc_select_powerclass(struct mmc_card *card,
return err;
}
+static int mmc_select_powerclass(struct mmc_card *card)
+{
+ struct mmc_host *host = card->host;
+ u32 bus_width, ext_csd_bits;
+ int err, ddr;
+
+ /* Power class selection is supported for versions >= 4.0 */
+ if (card->csd.mmca_vsn < CSD_SPEC_VER_4)
+ return 0;
+
+ bus_width = host->ios.bus_width;
+ /* Power class values are defined only for 4/8 bit bus */
+ if (bus_width == MMC_BUS_WIDTH_1)
+ return 0;
+
+ ddr = card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52;
+ if (ddr)
+ ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+ EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+ else
+ ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+ EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
+
+ err = __mmc_select_powerclass(card, ext_csd_bits);
+ if (err)
+ pr_warn("%s: power class selection to bus width %d ddr %d failed\n",
+ mmc_hostname(host), 1 << bus_width, ddr);
+
+ return err;
+}
+
/*
- * Selects the desired buswidth and switch to the HS200 mode
- * if bus width set without error
+ * Set the bus speed for the selected speed mode.
*/
-static int mmc_select_hs200(struct mmc_card *card)
+static void mmc_set_bus_speed(struct mmc_card *card)
+{
+ unsigned int max_dtr = (unsigned int)-1;
+
+ if ((mmc_card_hs200(card) || mmc_card_hs400(card)) &&
+ max_dtr > card->ext_csd.hs200_max_dtr)
+ max_dtr = card->ext_csd.hs200_max_dtr;
+ else if (mmc_card_hs(card) && max_dtr > card->ext_csd.hs_max_dtr)
+ max_dtr = card->ext_csd.hs_max_dtr;
+ else if (max_dtr > card->csd.max_dtr)
+ max_dtr = card->csd.max_dtr;
+
+ mmc_set_clock(card->host, max_dtr);
+}
+
+/*
+ * Select the bus width amoung 4-bit and 8-bit(SDR).
+ * If the bus width is changed successfully, return the selected width value.
+ * Zero is returned instead of error value if the wide width is not supported.
+ */
+static int mmc_select_bus_width(struct mmc_card *card)
{
- int idx, err = -EINVAL;
- struct mmc_host *host;
static unsigned ext_csd_bits[] = {
- EXT_CSD_BUS_WIDTH_4,
EXT_CSD_BUS_WIDTH_8,
+ EXT_CSD_BUS_WIDTH_4,
};
static unsigned bus_widths[] = {
- MMC_BUS_WIDTH_4,
MMC_BUS_WIDTH_8,
+ MMC_BUS_WIDTH_4,
};
+ struct mmc_host *host = card->host;
+ unsigned idx, bus_width = 0;
+ int err = 0;
- BUG_ON(!card);
-
- host = card->host;
-
- if (card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_2V &&
- host->caps2 & MMC_CAP2_HS200_1_2V_SDR)
- err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
-
- if (err && card->ext_csd.card_type & EXT_CSD_CARD_TYPE_SDR_1_8V &&
- host->caps2 & MMC_CAP2_HS200_1_8V_SDR)
- err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
-
- /* If fails try again during next card power cycle */
- if (err)
- goto err;
+ if ((card->csd.mmca_vsn < CSD_SPEC_VER_4) &&
+ !(host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
+ return 0;
- idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 1 : 0;
+ idx = (host->caps & MMC_CAP_8_BIT_DATA) ? 0 : 1;
/*
* Unlike SD, MMC cards dont have a configuration register to notify
@@ -828,8 +893,7 @@ static int mmc_select_hs200(struct mmc_card *card)
* the supported bus width or compare the ext csd values of current
* bus width and ext csd values of 1 bit mode read earlier.
*/
- for (; idx >= 0; idx--) {
-
+ for (; idx < ARRAY_SIZE(bus_widths); idx++) {
/*
* Host is capable of 8bit transfer, then switch
* the device to work in 8bit transfer mode. If the
@@ -844,27 +908,266 @@ static int mmc_select_hs200(struct mmc_card *card)
if (err)
continue;
- mmc_set_bus_width(card->host, bus_widths[idx]);
+ bus_width = bus_widths[idx];
+ mmc_set_bus_width(host, bus_width);
+ /*
+ * If controller can't handle bus width test,
+ * compare ext_csd previously read in 1 bit mode
+ * against ext_csd at new bus width
+ */
if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
- err = mmc_compare_ext_csds(card, bus_widths[idx]);
+ err = mmc_compare_ext_csds(card, bus_width);
else
- err = mmc_bus_test(card, bus_widths[idx]);
- if (!err)
+ err = mmc_bus_test(card, bus_width);
+
+ if (!err) {
+ err = bus_width;
break;
+ } else {
+ pr_warn("%s: switch to bus width %d failed\n",
+ mmc_hostname(host), ext_csd_bits[idx]);
+ }
}
- /* switch to HS200 mode if bus width set successfully */
+ return err;
+}
+
+/*
+ * Switch to the high-speed mode
+ */
+static int mmc_select_hs(struct mmc_card *card)
+{
+ int err;
+
+ err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+ card->ext_csd.generic_cmd6_time,
+ true, true, true);
if (!err)
+ mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+
+ return err;
+}
+
+/*
+ * Activate wide bus and DDR if supported.
+ */
+static int mmc_select_hs_ddr(struct mmc_card *card)
+{
+ struct mmc_host *host = card->host;
+ u32 bus_width, ext_csd_bits;
+ int err = 0;
+
+ if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_52))
+ return 0;
+
+ bus_width = host->ios.bus_width;
+ if (bus_width == MMC_BUS_WIDTH_1)
+ return 0;
+
+ ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
+ EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
+
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_BUS_WIDTH,
+ ext_csd_bits,
+ card->ext_csd.generic_cmd6_time);
+ if (err) {
+ pr_warn("%s: switch to bus width %d ddr failed\n",
+ mmc_hostname(host), 1 << bus_width);
+ return err;
+ }
+
+ /*
+ * eMMC cards can support 3.3V to 1.2V i/o (vccq)
+ * signaling.
+ *
+ * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
+ *
+ * 1.8V vccq at 3.3V core voltage (vcc) is not required
+ * in the JEDEC spec for DDR.
+ *
+ * Do not force change in vccq since we are obviously
+ * working and no change to vccq is needed.
+ *
+ * WARNING: eMMC rules are NOT the same as SD DDR
+ */
+ if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_DDR_1_2V) {
+ err = __mmc_set_signal_voltage(host,
+ MMC_SIGNAL_VOLTAGE_120);
+ if (err)
+ return err;
+ }
+
+ mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
+
+ return err;
+}
+
+static int mmc_select_hs400(struct mmc_card *card)
+{
+ struct mmc_host *host = card->host;
+ int err = 0;
+
+ /*
+ * HS400 mode requires 8-bit bus width
+ */
+ if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+ host->ios.bus_width == MMC_BUS_WIDTH_8))
+ return 0;
+
+ /*
+ * Before switching to dual data rate operation for HS400,
+ * it is required to convert from HS200 mode to HS mode.
+ */
+ mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
+ mmc_set_bus_speed(card);
+
+ err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS,
+ card->ext_csd.generic_cmd6_time,
+ true, true, true);
+ if (err) {
+ pr_warn("%s: switch to high-speed from hs200 failed, err:%d\n",
+ mmc_hostname(host), err);
+ return err;
+ }
+
+ err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_BUS_WIDTH,
+ EXT_CSD_DDR_BUS_WIDTH_8,
+ card->ext_csd.generic_cmd6_time);
+ if (err) {
+ pr_warn("%s: switch to bus width for hs400 failed, err:%d\n",
+ mmc_hostname(host), err);
+ return err;
+ }
+
+ err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+ EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS400,
+ card->ext_csd.generic_cmd6_time,
+ true, true, true);
+ if (err) {
+ pr_warn("%s: switch to hs400 failed, err:%d\n",
+ mmc_hostname(host), err);
+ return err;
+ }
+
+ mmc_set_timing(host, MMC_TIMING_MMC_HS400);
+ mmc_set_bus_speed(card);
+
+ return 0;
+}
+
+/*
+ * For device supporting HS200 mode, the following sequence
+ * should be done before executing the tuning process.
+ * 1. set the desired bus width(4-bit or 8-bit, 1-bit is not supported)
+ * 2. switch to HS200 mode
+ * 3. set the clock to > 52Mhz and <=200MHz
+ */
+static int mmc_select_hs200(struct mmc_card *card)
+{
+ struct mmc_host *host = card->host;
+ int err = -EINVAL;
+
+ if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
+ err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_120);
+
+ if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
+ err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_180);
+
+ /* If fails try again during next card power cycle */
+ if (err)
+ goto err;
+
+ /*
+ * Set the bus width(4 or 8) with host's support and
+ * switch to HS200 mode if bus width is set successfully.
+ */
+ err = mmc_select_bus_width(card);
+ if (!IS_ERR_VALUE(err)) {
err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_HS_TIMING, 2,
- card->ext_csd.generic_cmd6_time,
- true, true, true);
+ EXT_CSD_HS_TIMING, EXT_CSD_TIMING_HS200,
+ card->ext_csd.generic_cmd6_time,
+ true, true, true);
+ if (!err)
+ mmc_set_timing(host, MMC_TIMING_MMC_HS200);
+ }
err:
return err;
}
/*
+ * Activate High Speed or HS200 mode if supported.
+ */
+static int mmc_select_timing(struct mmc_card *card)
+{
+ int err = 0;
+
+ if ((card->csd.mmca_vsn < CSD_SPEC_VER_4 &&
+ card->ext_csd.hs_max_dtr == 0))
+ goto bus_speed;
+
+ if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
+ err = mmc_select_hs200(card);
+ else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS)
+ err = mmc_select_hs(card);
+
+ if (err && err != -EBADMSG)
+ return err;
+
+ if (err) {
+ pr_warn("%s: switch to %s failed\n",
+ mmc_card_hs(card) ? "high-speed" :
+ (mmc_card_hs200(card) ? "hs200" : ""),
+ mmc_hostname(card->host));
+ err = 0;
+ }
+
+bus_speed:
+ /*
+ * Set the bus speed to the selected bus timing.
+ * If timing is not selected, backward compatible is the default.
+ */
+ mmc_set_bus_speed(card);
+ return err;
+}
+
+/*
+ * Execute tuning sequence to seek the proper bus operating
+ * conditions for HS200 and HS400, which sends CMD21 to the device.
+ */
+static int mmc_hs200_tuning(struct mmc_card *card)
+{
+ struct mmc_host *host = card->host;
+ int err = 0;
+
+ /*
+ * Timing should be adjusted to the HS400 target
+ * operation frequency for tuning process
+ */
+ if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+ host->ios.bus_width == MMC_BUS_WIDTH_8)
+ if (host->ops->prepare_hs400_tuning)
+ host->ops->prepare_hs400_tuning(host, &host->ios);
+
+ if (host->ops->execute_tuning) {
+ mmc_host_clk_hold(host);
+ err = host->ops->execute_tuning(host,
+ MMC_SEND_TUNING_BLOCK_HS200);
+ mmc_host_clk_release(host);
+
+ if (err)
+ pr_warn("%s: tuning execution failed\n",
+ mmc_hostname(host));
+ }
+
+ return err;
+}
+
+/*
* Handle the detection and initialisation of a card.
*
* In the case of a resume, "oldcard" will contain the card
@@ -874,9 +1177,8 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
struct mmc_card *oldcard)
{
struct mmc_card *card;
- int err, ddr = 0;
+ int err;
u32 cid[4];
- unsigned int max_dtr;
u32 rocr;
u8 *ext_csd = NULL;
@@ -1068,206 +1370,34 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
}
/*
- * Activate high speed (if supported)
- */
- if (card->ext_csd.hs_max_dtr != 0) {
- err = 0;
- if (card->ext_csd.hs_max_dtr > 52000000 &&
- host->caps2 & MMC_CAP2_HS200)
- err = mmc_select_hs200(card);
- else if (host->caps & MMC_CAP_MMC_HIGHSPEED)
- err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_HS_TIMING, 1,
- card->ext_csd.generic_cmd6_time,
- true, true, true);
-
- if (err && err != -EBADMSG)
- goto free_card;
-
- if (err) {
- pr_warning("%s: switch to highspeed failed\n",
- mmc_hostname(card->host));
- err = 0;
- } else {
- if (card->ext_csd.hs_max_dtr > 52000000 &&
- host->caps2 & MMC_CAP2_HS200) {
- mmc_card_set_hs200(card);
- mmc_set_timing(card->host,
- MMC_TIMING_MMC_HS200);
- } else {
- mmc_card_set_highspeed(card);
- mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
- }
- }
- }
-
- /*
- * Compute bus speed.
- */
- max_dtr = (unsigned int)-1;
-
- if (mmc_card_highspeed(card) || mmc_card_hs200(card)) {
- if (max_dtr > card->ext_csd.hs_max_dtr)
- max_dtr = card->ext_csd.hs_max_dtr;
- if (mmc_card_highspeed(card) && (max_dtr > 52000000))
- max_dtr = 52000000;
- } else if (max_dtr > card->csd.max_dtr) {
- max_dtr = card->csd.max_dtr;
- }
-
- mmc_set_clock(host, max_dtr);
-
- /*
- * Indicate DDR mode (if supported).
+ * Select timing interface
*/
- if (mmc_card_highspeed(card)) {
- if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_8V)
- && (host->caps & MMC_CAP_1_8V_DDR))
- ddr = MMC_1_8V_DDR_MODE;
- else if ((card->ext_csd.card_type & EXT_CSD_CARD_TYPE_DDR_1_2V)
- && (host->caps & MMC_CAP_1_2V_DDR))
- ddr = MMC_1_2V_DDR_MODE;
- }
+ err = mmc_select_timing(card);
+ if (err)
+ goto free_card;
- /*
- * Indicate HS200 SDR mode (if supported).
- */
if (mmc_card_hs200(card)) {
- u32 ext_csd_bits;
- u32 bus_width = card->host->ios.bus_width;
-
- /*
- * For devices supporting HS200 mode, the bus width has
- * to be set before executing the tuning function. If
- * set before tuning, then device will respond with CRC
- * errors for responses on CMD line. So for HS200 the
- * sequence will be
- * 1. set bus width 4bit / 8 bit (1 bit not supported)
- * 2. switch to HS200 mode
- * 3. set the clock to > 52Mhz <=200MHz and
- * 4. execute tuning for HS200
- */
- if ((host->caps2 & MMC_CAP2_HS200) &&
- card->host->ops->execute_tuning) {
- mmc_host_clk_hold(card->host);
- err = card->host->ops->execute_tuning(card->host,
- MMC_SEND_TUNING_BLOCK_HS200);
- mmc_host_clk_release(card->host);
- }
- if (err) {
- pr_warning("%s: tuning execution failed\n",
- mmc_hostname(card->host));
+ err = mmc_hs200_tuning(card);
+ if (err)
goto err;
- }
- ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
- EXT_CSD_BUS_WIDTH_8 : EXT_CSD_BUS_WIDTH_4;
- err = mmc_select_powerclass(card, ext_csd_bits);
+ err = mmc_select_hs400(card);
if (err)
- pr_warning("%s: power class selection to bus width %d"
- " failed\n", mmc_hostname(card->host),
- 1 << bus_width);
+ goto err;
+ } else if (mmc_card_hs(card)) {
+ /* Select the desired bus width optionally */
+ err = mmc_select_bus_width(card);
+ if (!IS_ERR_VALUE(err)) {
+ err = mmc_select_hs_ddr(card);
+ if (err)
+ goto err;
+ }
}
/*
- * Activate wide bus and DDR (if supported).
+ * Choose the power class with selected bus interface
*/
- if (!mmc_card_hs200(card) &&
- (card->csd.mmca_vsn >= CSD_SPEC_VER_4) &&
- (host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA))) {
- static unsigned ext_csd_bits[][2] = {
- { EXT_CSD_BUS_WIDTH_8, EXT_CSD_DDR_BUS_WIDTH_8 },
- { EXT_CSD_BUS_WIDTH_4, EXT_CSD_DDR_BUS_WIDTH_4 },
- { EXT_CSD_BUS_WIDTH_1, EXT_CSD_BUS_WIDTH_1 },
- };
- static unsigned bus_widths[] = {
- MMC_BUS_WIDTH_8,
- MMC_BUS_WIDTH_4,
- MMC_BUS_WIDTH_1
- };
- unsigned idx, bus_width = 0;
-
- if (host->caps & MMC_CAP_8_BIT_DATA)
- idx = 0;
- else
- idx = 1;
- for (; idx < ARRAY_SIZE(bus_widths); idx++) {
- bus_width = bus_widths[idx];
- if (bus_width == MMC_BUS_WIDTH_1)
- ddr = 0; /* no DDR for 1-bit width */
- err = mmc_select_powerclass(card, ext_csd_bits[idx][0]);
- if (err)
- pr_warning("%s: power class selection to "
- "bus width %d failed\n",
- mmc_hostname(card->host),
- 1 << bus_width);
-
- err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_BUS_WIDTH,
- ext_csd_bits[idx][0],
- card->ext_csd.generic_cmd6_time);
- if (!err) {
- mmc_set_bus_width(card->host, bus_width);
-
- /*
- * If controller can't handle bus width test,
- * compare ext_csd previously read in 1 bit mode
- * against ext_csd at new bus width
- */
- if (!(host->caps & MMC_CAP_BUS_WIDTH_TEST))
- err = mmc_compare_ext_csds(card,
- bus_width);
- else
- err = mmc_bus_test(card, bus_width);
- if (!err)
- break;
- }
- }
-
- if (!err && ddr) {
- err = mmc_select_powerclass(card, ext_csd_bits[idx][1]);
- if (err)
- pr_warning("%s: power class selection to "
- "bus width %d ddr %d failed\n",
- mmc_hostname(card->host),
- 1 << bus_width, ddr);
-
- err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
- EXT_CSD_BUS_WIDTH,
- ext_csd_bits[idx][1],
- card->ext_csd.generic_cmd6_time);
- }
- if (err) {
- pr_warning("%s: switch to bus width %d ddr %d "
- "failed\n", mmc_hostname(card->host),
- 1 << bus_width, ddr);
- goto free_card;
- } else if (ddr) {
- /*
- * eMMC cards can support 3.3V to 1.2V i/o (vccq)
- * signaling.
- *
- * EXT_CSD_CARD_TYPE_DDR_1_8V means 3.3V or 1.8V vccq.
- *
- * 1.8V vccq at 3.3V core voltage (vcc) is not required
- * in the JEDEC spec for DDR.
- *
- * Do not force change in vccq since we are obviously
- * working and no change to vccq is needed.
- *
- * WARNING: eMMC rules are NOT the same as SD DDR
- */
- if (ddr == MMC_1_2V_DDR_MODE) {
- err = __mmc_set_signal_voltage(host,
- MMC_SIGNAL_VOLTAGE_120);
- if (err)
- goto err;
- }
- mmc_card_set_ddr_mode(card);
- mmc_set_timing(card->host, MMC_TIMING_UHS_DDR50);
- mmc_set_bus_width(card->host, bus_width);
- }
- }
+ mmc_select_powerclass(card);
/*
* Enable HPI feature (if supported)
@@ -1507,7 +1637,6 @@ static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
err = mmc_sleep(host);
else if (!mmc_host_is_spi(host))
err = mmc_deselect_cards(host);
- host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
if (!err) {
mmc_power_off(host);
@@ -1637,7 +1766,6 @@ static int mmc_power_restore(struct mmc_host *host)
{
int ret;
- host->card->state &= ~(MMC_STATE_HIGHSPEED | MMC_STATE_HIGHSPEED_200);
mmc_claim_host(host);
ret = mmc_init_card(host, host->card->ocr, host->card);
mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 2dd359d2242f..0c44510bf717 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -707,18 +707,10 @@ static struct attribute *sd_std_attrs[] = {
&dev_attr_serial.attr,
NULL,
};
-
-static struct attribute_group sd_std_attr_group = {
- .attrs = sd_std_attrs,
-};
-
-static const struct attribute_group *sd_attr_groups[] = {
- &sd_std_attr_group,
- NULL,
-};
+ATTRIBUTE_GROUPS(sd_std);
struct device_type sd_type = {
- .groups = sd_attr_groups,
+ .groups = sd_std_groups,
};
/*
@@ -895,7 +887,7 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
{
unsigned max_dtr = (unsigned int)-1;
- if (mmc_card_highspeed(card)) {
+ if (mmc_card_hs(card)) {
if (max_dtr > card->sw_caps.hs_max_dtr)
max_dtr = card->sw_caps.hs_max_dtr;
} else if (max_dtr > card->csd.max_dtr) {
@@ -905,12 +897,6 @@ unsigned mmc_sd_get_max_clock(struct mmc_card *card)
return max_dtr;
}
-void mmc_sd_go_highspeed(struct mmc_card *card)
-{
- mmc_card_set_highspeed(card);
- mmc_set_timing(card->host, MMC_TIMING_SD_HS);
-}
-
/*
* Handle the detection and initialisation of a card.
*
@@ -985,16 +971,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
err = mmc_sd_init_uhs_card(card);
if (err)
goto free_card;
-
- /* Card is an ultra-high-speed card */
- mmc_card_set_uhs(card);
} else {
/*
* Attempt to change to high-speed (if supported)
*/
err = mmc_sd_switch_hs(card);
if (err > 0)
- mmc_sd_go_highspeed(card);
+ mmc_set_timing(card->host, MMC_TIMING_SD_HS);
else if (err)
goto free_card;
@@ -1089,7 +1072,7 @@ static int _mmc_sd_suspend(struct mmc_host *host)
if (!mmc_host_is_spi(host))
err = mmc_deselect_cards(host);
- host->card->state &= ~MMC_STATE_HIGHSPEED;
+
if (!err) {
mmc_power_off(host);
mmc_card_set_suspended(host->card);
@@ -1198,7 +1181,6 @@ static int mmc_sd_power_restore(struct mmc_host *host)
{
int ret;
- host->card->state &= ~MMC_STATE_HIGHSPEED;
mmc_claim_host(host);
ret = mmc_sd_init_card(host, host->card->ocr, host->card);
mmc_release_host(host);
diff --git a/drivers/mmc/core/sd.h b/drivers/mmc/core/sd.h
index 4b34b24f3f76..aab824a9a7f3 100644
--- a/drivers/mmc/core/sd.h
+++ b/drivers/mmc/core/sd.h
@@ -12,6 +12,5 @@ int mmc_sd_setup_card(struct mmc_host *host, struct mmc_card *card,
bool reinit);
unsigned mmc_sd_get_max_clock(struct mmc_card *card);
int mmc_sd_switch_hs(struct mmc_card *card);
-void mmc_sd_go_highspeed(struct mmc_card *card);
#endif
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 4d721c6e2af0..e636d9e99e4a 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -363,7 +363,7 @@ static unsigned mmc_sdio_get_max_clock(struct mmc_card *card)
{
unsigned max_dtr;
- if (mmc_card_highspeed(card)) {
+ if (mmc_card_hs(card)) {
/*
* The SDIO specification doesn't mention how
* the CIS transfer speed register relates to
@@ -733,7 +733,6 @@ try_again:
mmc_set_clock(host, card->cis.max_dtr);
if (card->cccr.high_speed) {
- mmc_card_set_highspeed(card);
mmc_set_timing(card->host, MMC_TIMING_SD_HS);
}
@@ -792,16 +791,13 @@ try_again:
err = mmc_sdio_init_uhs_card(card);
if (err)
goto remove;
-
- /* Card is an ultra-high-speed card */
- mmc_card_set_uhs(card);
} else {
/*
* Switch to high-speed (if supported).
*/
err = sdio_enable_hs(card);
if (err > 0)
- mmc_sd_go_highspeed(card);
+ mmc_set_timing(card->host, MMC_TIMING_SD_HS);
else if (err)
goto remove;
@@ -943,40 +939,21 @@ static int mmc_sdio_pre_suspend(struct mmc_host *host)
*/
static int mmc_sdio_suspend(struct mmc_host *host)
{
- int i, err = 0;
-
- for (i = 0; i < host->card->sdio_funcs; i++) {
- struct sdio_func *func = host->card->sdio_func[i];
- if (func && sdio_func_present(func) && func->dev.driver) {
- const struct dev_pm_ops *pmops = func->dev.driver->pm;
- err = pmops->suspend(&func->dev);
- if (err)
- break;
- }
- }
- while (err && --i >= 0) {
- struct sdio_func *func = host->card->sdio_func[i];
- if (func && sdio_func_present(func) && func->dev.driver) {
- const struct dev_pm_ops *pmops = func->dev.driver->pm;
- pmops->resume(&func->dev);
- }
- }
-
- if (!err && mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
+ if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
mmc_claim_host(host);
sdio_disable_wide(host->card);
mmc_release_host(host);
}
- if (!err && !mmc_card_keep_power(host))
+ if (!mmc_card_keep_power(host))
mmc_power_off(host);
- return err;
+ return 0;
}
static int mmc_sdio_resume(struct mmc_host *host)
{
- int i, err = 0;
+ int err = 0;
BUG_ON(!host);
BUG_ON(!host->card);
@@ -1019,24 +996,6 @@ static int mmc_sdio_resume(struct mmc_host *host)
wake_up_process(host->sdio_irq_thread);
mmc_release_host(host);
- /*
- * If the card looked to be the same as before suspending, then
- * we proceed to resume all card functions. If one of them returns
- * an error then we simply return that error to the core and the
- * card will be redetected as new. It is the responsibility of
- * the function driver to perform further tests with the extra
- * knowledge it has of the card to confirm the card is indeed the
- * same as before suspending (same MAC address for network cards,
- * etc.) and return an error otherwise.
- */
- for (i = 0; !err && i < host->card->sdio_funcs; i++) {
- struct sdio_func *func = host->card->sdio_func[i];
- if (func && sdio_func_present(func) && func->dev.driver) {
- const struct dev_pm_ops *pmops = func->dev.driver->pm;
- err = pmops->resume(&func->dev);
- }
- }
-
host->pm_flags &= ~MMC_PM_KEEP_POWER;
return err;
}
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 92d1ba8e8153..4fa8fef9147f 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -197,20 +197,8 @@ static int sdio_bus_remove(struct device *dev)
#ifdef CONFIG_PM
-#ifdef CONFIG_PM_SLEEP
-static int pm_no_operation(struct device *dev)
-{
- /*
- * Prevent the PM core from calling SDIO device drivers' suspend
- * callback routines, which it is not supposed to do, by using this
- * empty function as the bus type suspend callaback for SDIO.
- */
- return 0;
-}
-#endif
-
static const struct dev_pm_ops sdio_bus_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(pm_no_operation, pm_no_operation)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_generic_suspend, pm_generic_resume)
SET_RUNTIME_PM_OPS(
pm_generic_runtime_suspend,
pm_generic_runtime_resume,
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index aaa90460ed23..5cc13c8d35bb 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -90,6 +90,15 @@ static int process_sdio_pending_irqs(struct mmc_host *host)
return ret;
}
+void sdio_run_irqs(struct mmc_host *host)
+{
+ mmc_claim_host(host);
+ host->sdio_irq_pending = true;
+ process_sdio_pending_irqs(host);
+ mmc_release_host(host);
+}
+EXPORT_SYMBOL_GPL(sdio_run_irqs);
+
static int sdio_irq_thread(void *_host)
{
struct mmc_host *host = _host;
@@ -189,14 +198,20 @@ static int sdio_card_irq_get(struct mmc_card *card)
WARN_ON(!host->claimed);
if (!host->sdio_irqs++) {
- atomic_set(&host->sdio_irq_thread_abort, 0);
- host->sdio_irq_thread =
- kthread_run(sdio_irq_thread, host, "ksdioirqd/%s",
- mmc_hostname(host));
- if (IS_ERR(host->sdio_irq_thread)) {
- int err = PTR_ERR(host->sdio_irq_thread);
- host->sdio_irqs--;
- return err;
+ if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+ atomic_set(&host->sdio_irq_thread_abort, 0);
+ host->sdio_irq_thread =
+ kthread_run(sdio_irq_thread, host,
+ "ksdioirqd/%s", mmc_hostname(host));
+ if (IS_ERR(host->sdio_irq_thread)) {
+ int err = PTR_ERR(host->sdio_irq_thread);
+ host->sdio_irqs--;
+ return err;
+ }
+ } else {
+ mmc_host_clk_hold(host);
+ host->ops->enable_sdio_irq(host, 1);
+ mmc_host_clk_release(host);
}
}
@@ -211,8 +226,14 @@ static int sdio_card_irq_put(struct mmc_card *card)
BUG_ON(host->sdio_irqs < 1);
if (!--host->sdio_irqs) {
- atomic_set(&host->sdio_irq_thread_abort, 1);
- kthread_stop(host->sdio_irq_thread);
+ if (!(host->caps2 & MMC_CAP2_SDIO_IRQ_NOTHREAD)) {
+ atomic_set(&host->sdio_irq_thread_abort, 1);
+ kthread_stop(host->sdio_irq_thread);
+ } else {
+ mmc_host_clk_hold(host);
+ host->ops->enable_sdio_irq(host, 0);
+ mmc_host_clk_release(host);
+ }
}
return 0;
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index f7650b899e3d..5f89cb83d5f0 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -32,9 +32,7 @@ static irqreturn_t mmc_gpio_cd_irqt(int irq, void *dev_id)
/* Schedule a card detection after a debounce timeout */
struct mmc_host *host = dev_id;
- if (host->ops->card_event)
- host->ops->card_event(host);
-
+ host->trigger_card_event = true;
mmc_detect_change(host, msecs_to_jiffies(200));
return IRQ_HANDLED;
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index 779368b683d0..7fee22432e94 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -168,7 +168,7 @@ config MMC_SDHCI_ESDHC_IMX
config MMC_SDHCI_DOVE
tristate "SDHCI support on Marvell's Dove SoC"
- depends on ARCH_DOVE
+ depends on ARCH_DOVE || MACH_DOVE
depends on MMC_SDHCI_PLTFM
select MMC_SDHCI_IO_ACCESSORS
help
@@ -283,6 +283,15 @@ config MMC_SDHCI_BCM2835
If unsure, say N.
+config MMC_MOXART
+ tristate "MOXART SD/MMC Host Controller support"
+ depends on ARCH_MOXART && MMC
+ help
+ This selects support for the MOXART SD/MMC Host Controller.
+ MOXA provides one multi-functional card reader which can
+ be found on some embedded hardware such as UC-7112-LX.
+ If you have a controller with this interface, say Y here.
+
config MMC_OMAP
tristate "TI OMAP Multimedia Card Interface support"
depends on ARCH_OMAP
@@ -688,6 +697,12 @@ config MMC_WMT
To compile this driver as a module, choose M here: the
module will be called wmt-sdmmc.
+config MMC_USDHI6ROL0
+ tristate "Renesas USDHI6ROL0 SD/SDIO Host Controller support"
+ help
+ This selects support for the Renesas USDHI6ROL0 SD/SDIO
+ Host Controller
+
config MMC_REALTEK_PCI
tristate "Realtek PCI-E SD/MMC Card Interface Driver"
depends on MFD_RTSX_PCI
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index 61cbc241935b..7f81ddf1dd2c 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -50,7 +50,9 @@ obj-$(CONFIG_MMC_JZ4740) += jz4740_mmc.o
obj-$(CONFIG_MMC_VUB300) += vub300.o
obj-$(CONFIG_MMC_USHC) += ushc.o
obj-$(CONFIG_MMC_WMT) += wmt-sdmmc.o
+obj-$(CONFIG_MMC_MOXART) += moxart-mmc.o
obj-$(CONFIG_MMC_SUNXI) += sunxi-mmc.o
+obj-$(CONFIG_MMC_USDHI6ROL0) += usdhi6rol0.o
obj-$(CONFIG_MMC_REALTEK_PCI) += rtsx_pci_sdmmc.o
obj-$(CONFIG_MMC_REALTEK_USB) += rtsx_usb_sdmmc.o
diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 42706ea0ba85..aece7cafbb97 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -820,16 +820,9 @@ static void atmci_pdc_complete(struct atmel_mci *host)
atmci_pdc_cleanup(host);
- /*
- * If the card was removed, data will be NULL. No point trying
- * to send the stop command or waiting for NBUSY in this case.
- */
- if (host->data) {
- dev_dbg(&host->pdev->dev,
- "(%s) set pending xfer complete\n", __func__);
- atmci_set_pending(host, EVENT_XFER_COMPLETE);
- tasklet_schedule(&host->tasklet);
- }
+ dev_dbg(&host->pdev->dev, "(%s) set pending xfer complete\n", __func__);
+ atmci_set_pending(host, EVENT_XFER_COMPLETE);
+ tasklet_schedule(&host->tasklet);
}
static void atmci_dma_cleanup(struct atmel_mci *host)
diff --git a/drivers/mmc/host/dw_mmc-exynos.c b/drivers/mmc/host/dw_mmc-exynos.c
index 3423c5ed50c7..0fbc53ac7eae 100644
--- a/drivers/mmc/host/dw_mmc-exynos.c
+++ b/drivers/mmc/host/dw_mmc-exynos.c
@@ -187,7 +187,7 @@ static void dw_mci_exynos_set_ios(struct dw_mci *host, struct mmc_ios *ios)
unsigned long actual;
u8 div = priv->ciu_div + 1;
- if (ios->timing == MMC_TIMING_UHS_DDR50) {
+ if (ios->timing == MMC_TIMING_MMC_DDR52) {
mci_writel(host, CLKSEL, priv->ddr_timing);
/* Should be double rate for DDR mode */
if (ios->bus_width == MMC_BUS_WIDTH_8)
@@ -386,8 +386,7 @@ static int dw_mci_exynos_execute_tuning(struct dw_mci_slot *slot, u32 opcode,
/* Common capabilities of Exynos4/Exynos5 SoC */
static unsigned long exynos_dwmmc_caps[4] = {
- MMC_CAP_UHS_DDR50 | MMC_CAP_1_8V_DDR |
- MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
+ MMC_CAP_1_8V_DDR | MMC_CAP_8_BIT_DATA | MMC_CAP_CMD23,
MMC_CAP_CMD23,
MMC_CAP_CMD23,
MMC_CAP_CMD23,
@@ -426,7 +425,7 @@ static int dw_mci_exynos_probe(struct platform_device *pdev)
return dw_mci_pltfm_register(pdev, drv_data);
}
-const struct dev_pm_ops dw_mci_exynos_pmops = {
+static const struct dev_pm_ops dw_mci_exynos_pmops = {
SET_SYSTEM_SLEEP_PM_OPS(dw_mci_exynos_suspend, dw_mci_exynos_resume)
.resume_noirq = dw_mci_exynos_resume_noirq,
.thaw_noirq = dw_mci_exynos_resume_noirq,
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index cced599d5aeb..1ac227c603b7 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -235,12 +235,6 @@ err:
}
#endif /* defined(CONFIG_DEBUG_FS) */
-static void dw_mci_set_timeout(struct dw_mci *host)
-{
- /* timeout (maximum) */
- mci_writel(host, TMOUT, 0xffffffff);
-}
-
static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
{
struct mmc_data *data;
@@ -257,9 +251,8 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
(cmd->opcode == SD_IO_RW_DIRECT &&
((cmd->arg >> 9) & 0x1FFFF) == SDIO_CCCR_ABORT))
cmdr |= SDMMC_CMD_STOP;
- else
- if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
- cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
+ else if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
+ cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
if (cmd->flags & MMC_RSP_PRESENT) {
/* We expect a response, so set this bit */
@@ -850,8 +843,6 @@ static void __dw_mci_start_request(struct dw_mci *host,
u32 cmdflags;
mrq = slot->mrq;
- if (host->pdata->select_slot)
- host->pdata->select_slot(slot->id);
host->cur_slot = slot;
host->mrq = mrq;
@@ -864,7 +855,7 @@ static void __dw_mci_start_request(struct dw_mci *host,
data = cmd->data;
if (data) {
- dw_mci_set_timeout(host);
+ mci_writel(host, TMOUT, 0xFFFFFFFF);
mci_writel(host, BYTCNT, data->blksz*data->blocks);
mci_writel(host, BLKSIZ, data->blksz);
}
@@ -962,7 +953,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
regs = mci_readl(slot->host, UHS_REG);
/* DDR mode set */
- if (ios->timing == MMC_TIMING_UHS_DDR50)
+ if (ios->timing == MMC_TIMING_MMC_DDR52)
regs |= ((0x1 << slot->id) << 16);
else
regs &= ~((0x1 << slot->id) << 16);
@@ -985,17 +976,11 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
switch (ios->power_mode) {
case MMC_POWER_UP:
set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
- /* Power up slot */
- if (slot->host->pdata->setpower)
- slot->host->pdata->setpower(slot->id, mmc->ocr_avail);
regs = mci_readl(slot->host, PWREN);
regs |= (1 << slot->id);
mci_writel(slot->host, PWREN, regs);
break;
case MMC_POWER_OFF:
- /* Power down slot */
- if (slot->host->pdata->setpower)
- slot->host->pdata->setpower(slot->id, 0);
regs = mci_readl(slot->host, PWREN);
regs &= ~(1 << slot->id);
mci_writel(slot->host, PWREN, regs);
@@ -1009,15 +994,13 @@ static int dw_mci_get_ro(struct mmc_host *mmc)
{
int read_only;
struct dw_mci_slot *slot = mmc_priv(mmc);
- struct dw_mci_board *brd = slot->host->pdata;
+ int gpio_ro = mmc_gpio_get_ro(mmc);
/* Use platform get_ro function, else try on board write protect */
if (slot->quirks & DW_MCI_SLOT_QUIRK_NO_WRITE_PROTECT)
read_only = 0;
- else if (brd->get_ro)
- read_only = brd->get_ro(slot->id);
- else if (gpio_is_valid(slot->wp_gpio))
- read_only = gpio_get_value(slot->wp_gpio);
+ else if (!IS_ERR_VALUE(gpio_ro))
+ read_only = gpio_ro;
else
read_only =
mci_readl(slot->host, WRTPRT) & (1 << slot->id) ? 1 : 0;
@@ -1039,8 +1022,6 @@ static int dw_mci_get_cd(struct mmc_host *mmc)
/* Use platform get_cd function, else try onboard card detect */
if (brd->quirks & DW_MCI_QUIRK_BROKEN_CARD_DETECTION)
present = 1;
- else if (brd->get_cd)
- present = !brd->get_cd(slot->id);
else if (!IS_ERR_VALUE(gpio_cd))
present = gpio_cd;
else
@@ -1248,7 +1229,7 @@ static int dw_mci_data_complete(struct dw_mci *host, struct mmc_data *data)
data->error = -EIO;
}
- dev_err(host->dev, "data error, status 0x%08x\n", status);
+ dev_dbg(host->dev, "data error, status 0x%08x\n", status);
/*
* After an error, there may be data lingering
@@ -2045,86 +2026,15 @@ static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
return quirks;
}
-
-/* find out bus-width for a given slot */
-static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
-{
- struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
- u32 bus_wd = 1;
-
- if (!np)
- return 1;
-
- if (of_property_read_u32(np, "bus-width", &bus_wd))
- dev_err(dev, "bus-width property not found, assuming width"
- " as 1\n");
- return bus_wd;
-}
-
-/* find the write protect gpio for a given slot; or -1 if none specified */
-static int dw_mci_of_get_wp_gpio(struct device *dev, u8 slot)
-{
- struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
- int gpio;
-
- if (!np)
- return -EINVAL;
-
- gpio = of_get_named_gpio(np, "wp-gpios", 0);
-
- /* Having a missing entry is valid; return silently */
- if (!gpio_is_valid(gpio))
- return -EINVAL;
-
- if (devm_gpio_request(dev, gpio, "dw-mci-wp")) {
- dev_warn(dev, "gpio [%d] request failed\n", gpio);
- return -EINVAL;
- }
-
- return gpio;
-}
-
-/* find the cd gpio for a given slot */
-static void dw_mci_of_get_cd_gpio(struct device *dev, u8 slot,
- struct mmc_host *mmc)
-{
- struct device_node *np = dw_mci_of_find_slot_node(dev, slot);
- int gpio;
-
- if (!np)
- return;
-
- gpio = of_get_named_gpio(np, "cd-gpios", 0);
-
- /* Having a missing entry is valid; return silently */
- if (!gpio_is_valid(gpio))
- return;
-
- if (mmc_gpio_request_cd(mmc, gpio, 0))
- dev_warn(dev, "gpio [%d] request failed\n", gpio);
-}
#else /* CONFIG_OF */
static int dw_mci_of_get_slot_quirks(struct device *dev, u8 slot)
{
return 0;
}
-static u32 dw_mci_of_get_bus_wd(struct device *dev, u8 slot)
-{
- return 1;
-}
static struct device_node *dw_mci_of_find_slot_node(struct device *dev, u8 slot)
{
return NULL;
}
-static int dw_mci_of_get_wp_gpio(struct device *dev, u8 slot)
-{
- return -EINVAL;
-}
-static void dw_mci_of_get_cd_gpio(struct device *dev, u8 slot,
- struct mmc_host *mmc)
-{
- return;
-}
#endif /* CONFIG_OF */
static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
@@ -2134,7 +2044,6 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
const struct dw_mci_drv_data *drv_data = host->drv_data;
int ctrl_id, ret;
u32 freq[2];
- u8 bus_width;
mmc = mmc_alloc_host(sizeof(struct dw_mci_slot), host->dev);
if (!mmc)
@@ -2158,17 +2067,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
mmc->f_max = freq[1];
}
- if (host->pdata->get_ocr)
- mmc->ocr_avail = host->pdata->get_ocr(id);
- else
- mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
-
- /*
- * Start with slot power disabled, it will be enabled when a card
- * is detected.
- */
- if (host->pdata->setpower)
- host->pdata->setpower(id, 0);
+ mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
if (host->pdata->caps)
mmc->caps = host->pdata->caps;
@@ -2189,19 +2088,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
if (host->pdata->caps2)
mmc->caps2 = host->pdata->caps2;
- if (host->pdata->get_bus_wd)
- bus_width = host->pdata->get_bus_wd(slot->id);
- else if (host->dev->of_node)
- bus_width = dw_mci_of_get_bus_wd(host->dev, slot->id);
- else
- bus_width = 1;
-
- switch (bus_width) {
- case 8:
- mmc->caps |= MMC_CAP_8_BIT_DATA;
- case 4:
- mmc->caps |= MMC_CAP_4_BIT_DATA;
- }
+ mmc_of_parse(mmc);
if (host->pdata->blk_settings) {
mmc->max_segs = host->pdata->blk_settings->max_segs;
@@ -2226,8 +2113,10 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
#endif /* CONFIG_MMC_DW_IDMAC */
}
- slot->wp_gpio = dw_mci_of_get_wp_gpio(host->dev, slot->id);
- dw_mci_of_get_cd_gpio(host->dev, slot->id, mmc);
+ if (dw_mci_get_cd(mmc))
+ set_bit(DW_MMC_CARD_PRESENT, &slot->flags);
+ else
+ clear_bit(DW_MMC_CARD_PRESENT, &slot->flags);
ret = mmc_add_host(mmc);
if (ret)
@@ -2249,10 +2138,6 @@ err_setup_bus:
static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
{
- /* Shutdown detect IRQ */
- if (slot->host->pdata->exit)
- slot->host->pdata->exit(id);
-
/* Debugfs stuff is cleaned up by mmc core */
mmc_remove_host(slot->mmc);
slot->host->slot[id] = NULL;
@@ -2399,24 +2284,9 @@ static struct dw_mci_board *dw_mci_parse_dt(struct dw_mci *host)
return ERR_PTR(ret);
}
- if (of_find_property(np, "keep-power-in-suspend", NULL))
- pdata->pm_caps |= MMC_PM_KEEP_POWER;
-
- if (of_find_property(np, "enable-sdio-wakeup", NULL))
- pdata->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
-
if (of_find_property(np, "supports-highspeed", NULL))
pdata->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED;
- if (of_find_property(np, "caps2-mmc-hs200-1_8v", NULL))
- pdata->caps2 |= MMC_CAP2_HS200_1_8V_SDR;
-
- if (of_find_property(np, "caps2-mmc-hs200-1_2v", NULL))
- pdata->caps2 |= MMC_CAP2_HS200_1_2V_SDR;
-
- if (of_get_property(np, "cd-inverted", NULL))
- pdata->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
-
return pdata;
}
@@ -2442,9 +2312,9 @@ int dw_mci_probe(struct dw_mci *host)
}
}
- if (!host->pdata->select_slot && host->pdata->num_slots > 1) {
+ if (host->pdata->num_slots > 1) {
dev_err(host->dev,
- "Platform data must supply select_slot function\n");
+ "Platform data must supply num_slots.\n");
return -ENODEV;
}
@@ -2474,12 +2344,19 @@ int dw_mci_probe(struct dw_mci *host)
ret = clk_set_rate(host->ciu_clk, host->pdata->bus_hz);
if (ret)
dev_warn(host->dev,
- "Unable to set bus rate to %ul\n",
+ "Unable to set bus rate to %uHz\n",
host->pdata->bus_hz);
}
host->bus_hz = clk_get_rate(host->ciu_clk);
}
+ if (!host->bus_hz) {
+ dev_err(host->dev,
+ "Platform data must supply bus speed\n");
+ ret = -ENODEV;
+ goto err_clk_ciu;
+ }
+
if (drv_data && drv_data->init) {
ret = drv_data->init(host);
if (ret) {
@@ -2516,13 +2393,6 @@ int dw_mci_probe(struct dw_mci *host)
}
}
- if (!host->bus_hz) {
- dev_err(host->dev,
- "Platform data must supply bus speed\n");
- ret = -ENODEV;
- goto err_regulator;
- }
-
host->quirks = host->pdata->quirks;
spin_lock_init(&host->lock);
@@ -2666,8 +2536,6 @@ err_workqueue:
err_dmaunmap:
if (host->use_dma && host->dma_ops->exit)
host->dma_ops->exit(host);
-
-err_regulator:
if (host->vmmc)
regulator_disable(host->vmmc);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 68349779c396..738fa241d058 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -195,7 +195,6 @@ extern int dw_mci_resume(struct dw_mci *host);
* @mmc: The mmc_host representing this slot.
* @host: The MMC controller this slot is using.
* @quirks: Slot-level quirks (DW_MCI_SLOT_QUIRK_XXX)
- * @wp_gpio: If gpio_is_valid() we'll use this to read write protect.
* @ctype: Card type for this slot.
* @mrq: mmc_request currently being processed or waiting to be
* processed, or NULL when the slot is idle.
@@ -214,7 +213,6 @@ struct dw_mci_slot {
struct dw_mci *host;
int quirks;
- int wp_gpio;
u32 ctype;
diff --git a/drivers/mmc/host/jz4740_mmc.c b/drivers/mmc/host/jz4740_mmc.c
index de2139cf3444..537d6c7a5ae4 100644
--- a/drivers/mmc/host/jz4740_mmc.c
+++ b/drivers/mmc/host/jz4740_mmc.c
@@ -515,10 +515,13 @@ static irqreturn_t jz_mmc_irq_worker(int irq, void *devid)
jz4740_mmc_send_command(host, req->stop);
- timeout = jz4740_mmc_poll_irq(host, JZ_MMC_IRQ_PRG_DONE);
- if (timeout) {
- host->state = JZ4740_MMC_STATE_DONE;
- break;
+ if (mmc_resp_type(req->stop) & MMC_RSP_BUSY) {
+ timeout = jz4740_mmc_poll_irq(host,
+ JZ_MMC_IRQ_PRG_DONE);
+ if (timeout) {
+ host->state = JZ4740_MMC_STATE_DONE;
+ break;
+ }
}
case JZ4740_MMC_STATE_DONE:
break;
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index a084edd37af5..7ad463e9741c 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -301,7 +301,8 @@ static void mmci_set_clkreg(struct mmci_host *host, unsigned int desired)
if (host->mmc->ios.bus_width == MMC_BUS_WIDTH_8)
clk |= MCI_ST_8BIT_BUS;
- if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50)
+ if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
+ host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
clk |= MCI_ST_UX500_NEG_EDGE;
mmci_write_clkreg(host, clk);
@@ -764,7 +765,8 @@ static void mmci_start_data(struct mmci_host *host, struct mmc_data *data)
mmci_write_clkreg(host, clk);
}
- if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50)
+ if (host->mmc->ios.timing == MMC_TIMING_UHS_DDR50 ||
+ host->mmc->ios.timing == MMC_TIMING_MMC_DDR52)
datactrl |= MCI_ST_DPSM_DDRMODE;
/*
diff --git a/drivers/mmc/host/moxart-mmc.c b/drivers/mmc/host/moxart-mmc.c
new file mode 100644
index 000000000000..74924a04026e
--- /dev/null
+++ b/drivers/mmc/host/moxart-mmc.c
@@ -0,0 +1,730 @@
+/*
+ * MOXA ART MMC host driver.
+ *
+ * Copyright (C) 2014 Jonas Jensen
+ *
+ * Jonas Jensen <jonas.jensen@gmail.com>
+ *
+ * Based on code from
+ * Moxa Technologies Co., Ltd. <www.moxa.com>
+ *
+ * This file is licensed under the terms of the GNU General Public
+ * License version 2. This program is licensed "as is" without any
+ * warranty of any kind, whether express or implied.
+ */
+
+#include <linux/version.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/platform_device.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/sd.h>
+#include <linux/sched.h>
+#include <linux/io.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/clk.h>
+#include <linux/bitops.h>
+#include <linux/of_dma.h>
+#include <linux/spinlock.h>
+
+#define REG_COMMAND 0
+#define REG_ARGUMENT 4
+#define REG_RESPONSE0 8
+#define REG_RESPONSE1 12
+#define REG_RESPONSE2 16
+#define REG_RESPONSE3 20
+#define REG_RESPONSE_COMMAND 24
+#define REG_DATA_CONTROL 28
+#define REG_DATA_TIMER 32
+#define REG_DATA_LENGTH 36
+#define REG_STATUS 40
+#define REG_CLEAR 44
+#define REG_INTERRUPT_MASK 48
+#define REG_POWER_CONTROL 52
+#define REG_CLOCK_CONTROL 56
+#define REG_BUS_WIDTH 60
+#define REG_DATA_WINDOW 64
+#define REG_FEATURE 68
+#define REG_REVISION 72
+
+/* REG_COMMAND */
+#define CMD_SDC_RESET BIT(10)
+#define CMD_EN BIT(9)
+#define CMD_APP_CMD BIT(8)
+#define CMD_LONG_RSP BIT(7)
+#define CMD_NEED_RSP BIT(6)
+#define CMD_IDX_MASK 0x3f
+
+/* REG_RESPONSE_COMMAND */
+#define RSP_CMD_APP BIT(6)
+#define RSP_CMD_IDX_MASK 0x3f
+
+/* REG_DATA_CONTROL */
+#define DCR_DATA_FIFO_RESET BIT(8)
+#define DCR_DATA_THRES BIT(7)
+#define DCR_DATA_EN BIT(6)
+#define DCR_DMA_EN BIT(5)
+#define DCR_DATA_WRITE BIT(4)
+#define DCR_BLK_SIZE 0x0f
+
+/* REG_DATA_LENGTH */
+#define DATA_LEN_MASK 0xffffff
+
+/* REG_STATUS */
+#define WRITE_PROT BIT(12)
+#define CARD_DETECT BIT(11)
+/* 1-10 below can be sent to either registers, interrupt or clear. */
+#define CARD_CHANGE BIT(10)
+#define FIFO_ORUN BIT(9)
+#define FIFO_URUN BIT(8)
+#define DATA_END BIT(7)
+#define CMD_SENT BIT(6)
+#define DATA_CRC_OK BIT(5)
+#define RSP_CRC_OK BIT(4)
+#define DATA_TIMEOUT BIT(3)
+#define RSP_TIMEOUT BIT(2)
+#define DATA_CRC_FAIL BIT(1)
+#define RSP_CRC_FAIL BIT(0)
+
+#define MASK_RSP (RSP_TIMEOUT | RSP_CRC_FAIL | \
+ RSP_CRC_OK | CARD_DETECT | CMD_SENT)
+
+#define MASK_DATA (DATA_CRC_OK | DATA_END | \
+ DATA_CRC_FAIL | DATA_TIMEOUT)
+
+#define MASK_INTR_PIO (FIFO_URUN | FIFO_ORUN | CARD_CHANGE)
+
+/* REG_POWER_CONTROL */
+#define SD_POWER_ON BIT(4)
+#define SD_POWER_MASK 0x0f
+
+/* REG_CLOCK_CONTROL */
+#define CLK_HISPD BIT(9)
+#define CLK_OFF BIT(8)
+#define CLK_SD BIT(7)
+#define CLK_DIV_MASK 0x7f
+
+/* REG_BUS_WIDTH */
+#define BUS_WIDTH_8 BIT(2)
+#define BUS_WIDTH_4 BIT(1)
+#define BUS_WIDTH_1 BIT(0)
+
+#define MMC_VDD_360 23
+#define MIN_POWER (MMC_VDD_360 - SD_POWER_MASK)
+#define MAX_RETRIES 500000
+
+struct moxart_host {
+ spinlock_t lock;
+
+ void __iomem *base;
+
+ phys_addr_t reg_phys;
+
+ struct dma_chan *dma_chan_tx;
+ struct dma_chan *dma_chan_rx;
+ struct dma_async_tx_descriptor *tx_desc;
+ struct mmc_host *mmc;
+ struct mmc_request *mrq;
+ struct scatterlist *cur_sg;
+ struct completion dma_complete;
+ struct completion pio_complete;
+
+ u32 num_sg;
+ u32 data_remain;
+ u32 data_len;
+ u32 fifo_width;
+ u32 timeout;
+ u32 rate;
+
+ long sysclk;
+
+ bool have_dma;
+ bool is_removed;
+};
+
+static inline void moxart_init_sg(struct moxart_host *host,
+ struct mmc_data *data)
+{
+ host->cur_sg = data->sg;
+ host->num_sg = data->sg_len;
+ host->data_remain = host->cur_sg->length;
+
+ if (host->data_remain > host->data_len)
+ host->data_remain = host->data_len;
+}
+
+static inline int moxart_next_sg(struct moxart_host *host)
+{
+ int remain;
+ struct mmc_data *data = host->mrq->cmd->data;
+
+ host->cur_sg++;
+ host->num_sg--;
+
+ if (host->num_sg > 0) {
+ host->data_remain = host->cur_sg->length;
+ remain = host->data_len - data->bytes_xfered;
+ if (remain > 0 && remain < host->data_remain)
+ host->data_remain = remain;
+ }
+
+ return host->num_sg;
+}
+
+static int moxart_wait_for_status(struct moxart_host *host,
+ u32 mask, u32 *status)
+{
+ int ret = -ETIMEDOUT;
+ u32 i;
+
+ for (i = 0; i < MAX_RETRIES; i++) {
+ *status = readl(host->base + REG_STATUS);
+ if (!(*status & mask)) {
+ udelay(5);
+ continue;
+ }
+ writel(*status & mask, host->base + REG_CLEAR);
+ ret = 0;
+ break;
+ }
+
+ if (ret)
+ dev_err(mmc_dev(host->mmc), "timed out waiting for status\n");
+
+ return ret;
+}
+
+
+static void moxart_send_command(struct moxart_host *host,
+ struct mmc_command *cmd)
+{
+ u32 status, cmdctrl;
+
+ writel(RSP_TIMEOUT | RSP_CRC_OK |
+ RSP_CRC_FAIL | CMD_SENT, host->base + REG_CLEAR);
+ writel(cmd->arg, host->base + REG_ARGUMENT);
+
+ cmdctrl = cmd->opcode & CMD_IDX_MASK;
+ if (cmdctrl == SD_APP_SET_BUS_WIDTH || cmdctrl == SD_APP_OP_COND ||
+ cmdctrl == SD_APP_SEND_SCR || cmdctrl == SD_APP_SD_STATUS ||
+ cmdctrl == SD_APP_SEND_NUM_WR_BLKS)
+ cmdctrl |= CMD_APP_CMD;
+
+ if (cmd->flags & MMC_RSP_PRESENT)
+ cmdctrl |= CMD_NEED_RSP;
+
+ if (cmd->flags & MMC_RSP_136)
+ cmdctrl |= CMD_LONG_RSP;
+
+ writel(cmdctrl | CMD_EN, host->base + REG_COMMAND);
+
+ if (moxart_wait_for_status(host, MASK_RSP, &status) == -ETIMEDOUT)
+ cmd->error = -ETIMEDOUT;
+
+ if (status & RSP_TIMEOUT) {
+ cmd->error = -ETIMEDOUT;
+ return;
+ }
+ if (status & RSP_CRC_FAIL) {
+ cmd->error = -EIO;
+ return;
+ }
+ if (status & RSP_CRC_OK) {
+ if (cmd->flags & MMC_RSP_136) {
+ cmd->resp[3] = readl(host->base + REG_RESPONSE0);
+ cmd->resp[2] = readl(host->base + REG_RESPONSE1);
+ cmd->resp[1] = readl(host->base + REG_RESPONSE2);
+ cmd->resp[0] = readl(host->base + REG_RESPONSE3);
+ } else {
+ cmd->resp[0] = readl(host->base + REG_RESPONSE0);
+ }
+ }
+}
+
+static void moxart_dma_complete(void *param)
+{
+ struct moxart_host *host = param;
+
+ complete(&host->dma_complete);
+}
+
+static void moxart_transfer_dma(struct mmc_data *data, struct moxart_host *host)
+{
+ u32 len, dir_data, dir_slave;
+ unsigned long dma_time;
+ struct dma_async_tx_descriptor *desc = NULL;
+ struct dma_chan *dma_chan;
+
+ if (host->data_len == data->bytes_xfered)
+ return;
+
+ if (data->flags & MMC_DATA_WRITE) {
+ dma_chan = host->dma_chan_tx;
+ dir_data = DMA_TO_DEVICE;
+ dir_slave = DMA_MEM_TO_DEV;
+ } else {
+ dma_chan = host->dma_chan_rx;
+ dir_data = DMA_FROM_DEVICE;
+ dir_slave = DMA_DEV_TO_MEM;
+ }
+
+ len = dma_map_sg(dma_chan->device->dev, data->sg,
+ data->sg_len, dir_data);
+
+ if (len > 0) {
+ desc = dmaengine_prep_slave_sg(dma_chan, data->sg,
+ len, dir_slave,
+ DMA_PREP_INTERRUPT |
+ DMA_CTRL_ACK);
+ } else {
+ dev_err(mmc_dev(host->mmc), "dma_map_sg returned zero length\n");
+ }
+
+ if (desc) {
+ host->tx_desc = desc;
+ desc->callback = moxart_dma_complete;
+ desc->callback_param = host;
+ dmaengine_submit(desc);
+ dma_async_issue_pending(dma_chan);
+ }
+
+ data->bytes_xfered += host->data_remain;
+
+ dma_time = wait_for_completion_interruptible_timeout(
+ &host->dma_complete, host->timeout);
+
+ dma_unmap_sg(dma_chan->device->dev,
+ data->sg, data->sg_len,
+ dir_data);
+}
+
+
+static void moxart_transfer_pio(struct moxart_host *host)
+{
+ struct mmc_data *data = host->mrq->cmd->data;
+ u32 *sgp, len = 0, remain, status;
+
+ if (host->data_len == data->bytes_xfered)
+ return;
+
+ sgp = sg_virt(host->cur_sg);
+ remain = host->data_remain;
+
+ if (data->flags & MMC_DATA_WRITE) {
+ while (remain > 0) {
+ if (moxart_wait_for_status(host, FIFO_URUN, &status)
+ == -ETIMEDOUT) {
+ data->error = -ETIMEDOUT;
+ complete(&host->pio_complete);
+ return;
+ }
+ for (len = 0; len < remain && len < host->fifo_width;) {
+ iowrite32(*sgp, host->base + REG_DATA_WINDOW);
+ sgp++;
+ len += 4;
+ }
+ remain -= len;
+ }
+
+ } else {
+ while (remain > 0) {
+ if (moxart_wait_for_status(host, FIFO_ORUN, &status)
+ == -ETIMEDOUT) {
+ data->error = -ETIMEDOUT;
+ complete(&host->pio_complete);
+ return;
+ }
+ for (len = 0; len < remain && len < host->fifo_width;) {
+ /* SCR data must be read in big endian. */
+ if (data->mrq->cmd->opcode == SD_APP_SEND_SCR)
+ *sgp = ioread32be(host->base +
+ REG_DATA_WINDOW);
+ else
+ *sgp = ioread32(host->base +
+ REG_DATA_WINDOW);
+ sgp++;
+ len += 4;
+ }
+ remain -= len;
+ }
+ }
+
+ data->bytes_xfered += host->data_remain - remain;
+ host->data_remain = remain;
+
+ if (host->data_len != data->bytes_xfered)
+ moxart_next_sg(host);
+ else
+ complete(&host->pio_complete);
+}
+
+static void moxart_prepare_data(struct moxart_host *host)
+{
+ struct mmc_data *data = host->mrq->cmd->data;
+ u32 datactrl;
+ int blksz_bits;
+
+ if (!data)
+ return;
+
+ host->data_len = data->blocks * data->blksz;
+ blksz_bits = ffs(data->blksz) - 1;
+ BUG_ON(1 << blksz_bits != data->blksz);
+
+ moxart_init_sg(host, data);
+
+ datactrl = DCR_DATA_EN | (blksz_bits & DCR_BLK_SIZE);
+
+ if (data->flags & MMC_DATA_WRITE)
+ datactrl |= DCR_DATA_WRITE;
+
+ if ((host->data_len > host->fifo_width) && host->have_dma)
+ datactrl |= DCR_DMA_EN;
+
+ writel(DCR_DATA_FIFO_RESET, host->base + REG_DATA_CONTROL);
+ writel(MASK_DATA | FIFO_URUN | FIFO_ORUN, host->base + REG_CLEAR);
+ writel(host->rate, host->base + REG_DATA_TIMER);
+ writel(host->data_len, host->base + REG_DATA_LENGTH);
+ writel(datactrl, host->base + REG_DATA_CONTROL);
+}
+
+static void moxart_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+ struct moxart_host *host = mmc_priv(mmc);
+ unsigned long pio_time, flags;
+ u32 status;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ init_completion(&host->dma_complete);
+ init_completion(&host->pio_complete);
+
+ host->mrq = mrq;
+
+ if (readl(host->base + REG_STATUS) & CARD_DETECT) {
+ mrq->cmd->error = -ETIMEDOUT;
+ goto request_done;
+ }
+
+ moxart_prepare_data(host);
+ moxart_send_command(host, host->mrq->cmd);
+
+ if (mrq->cmd->data) {
+ if ((host->data_len > host->fifo_width) && host->have_dma) {
+
+ writel(CARD_CHANGE, host->base + REG_INTERRUPT_MASK);
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ moxart_transfer_dma(mrq->cmd->data, host);
+
+ spin_lock_irqsave(&host->lock, flags);
+ } else {
+
+ writel(MASK_INTR_PIO, host->base + REG_INTERRUPT_MASK);
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ /* PIO transfers start from interrupt. */
+ pio_time = wait_for_completion_interruptible_timeout(
+ &host->pio_complete, host->timeout);
+
+ spin_lock_irqsave(&host->lock, flags);
+ }
+
+ if (host->is_removed) {
+ dev_err(mmc_dev(host->mmc), "card removed\n");
+ mrq->cmd->error = -ETIMEDOUT;
+ goto request_done;
+ }
+
+ if (moxart_wait_for_status(host, MASK_DATA, &status)
+ == -ETIMEDOUT) {
+ mrq->cmd->data->error = -ETIMEDOUT;
+ goto request_done;
+ }
+
+ if (status & DATA_CRC_FAIL)
+ mrq->cmd->data->error = -ETIMEDOUT;
+
+ if (mrq->cmd->data->stop)
+ moxart_send_command(host, mrq->cmd->data->stop);
+ }
+
+request_done:
+ spin_unlock_irqrestore(&host->lock, flags);
+ mmc_request_done(host->mmc, mrq);
+}
+
+static irqreturn_t moxart_irq(int irq, void *devid)
+{
+ struct moxart_host *host = (struct moxart_host *)devid;
+ u32 status;
+ unsigned long flags;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ status = readl(host->base + REG_STATUS);
+ if (status & CARD_CHANGE) {
+ host->is_removed = status & CARD_DETECT;
+ if (host->is_removed && host->have_dma) {
+ dmaengine_terminate_all(host->dma_chan_tx);
+ dmaengine_terminate_all(host->dma_chan_rx);
+ }
+ host->mrq = NULL;
+ writel(MASK_INTR_PIO, host->base + REG_CLEAR);
+ writel(CARD_CHANGE, host->base + REG_INTERRUPT_MASK);
+ mmc_detect_change(host->mmc, 0);
+ }
+ if (status & (FIFO_ORUN | FIFO_URUN) && host->mrq)
+ moxart_transfer_pio(host);
+
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ return IRQ_HANDLED;
+}
+
+static void moxart_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct moxart_host *host = mmc_priv(mmc);
+ unsigned long flags;
+ u8 power, div;
+ u32 ctrl;
+
+ spin_lock_irqsave(&host->lock, flags);
+
+ if (ios->clock) {
+ for (div = 0; div < CLK_DIV_MASK; ++div) {
+ if (ios->clock >= host->sysclk / (2 * (div + 1)))
+ break;
+ }
+ ctrl = CLK_SD | div;
+ host->rate = host->sysclk / (2 * (div + 1));
+ if (host->rate > host->sysclk)
+ ctrl |= CLK_HISPD;
+ writel(ctrl, host->base + REG_CLOCK_CONTROL);
+ }
+
+ if (ios->power_mode == MMC_POWER_OFF) {
+ writel(readl(host->base + REG_POWER_CONTROL) & ~SD_POWER_ON,
+ host->base + REG_POWER_CONTROL);
+ } else {
+ if (ios->vdd < MIN_POWER)
+ power = 0;
+ else
+ power = ios->vdd - MIN_POWER;
+
+ writel(SD_POWER_ON | (u32) power,
+ host->base + REG_POWER_CONTROL);
+ }
+
+ switch (ios->bus_width) {
+ case MMC_BUS_WIDTH_4:
+ writel(BUS_WIDTH_4, host->base + REG_BUS_WIDTH);
+ break;
+ case MMC_BUS_WIDTH_8:
+ writel(BUS_WIDTH_8, host->base + REG_BUS_WIDTH);
+ break;
+ default:
+ writel(BUS_WIDTH_1, host->base + REG_BUS_WIDTH);
+ break;
+ }
+
+ spin_unlock_irqrestore(&host->lock, flags);
+}
+
+
+static int moxart_get_ro(struct mmc_host *mmc)
+{
+ struct moxart_host *host = mmc_priv(mmc);
+
+ return !!(readl(host->base + REG_STATUS) & WRITE_PROT);
+}
+
+static struct mmc_host_ops moxart_ops = {
+ .request = moxart_request,
+ .set_ios = moxart_set_ios,
+ .get_ro = moxart_get_ro,
+};
+
+static int moxart_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *node = dev->of_node;
+ struct resource res_mmc;
+ struct mmc_host *mmc;
+ struct moxart_host *host = NULL;
+ struct dma_slave_config cfg;
+ struct clk *clk;
+ void __iomem *reg_mmc;
+ dma_cap_mask_t mask;
+ int irq, ret;
+ u32 i;
+
+ mmc = mmc_alloc_host(sizeof(struct moxart_host), dev);
+ if (!mmc) {
+ dev_err(dev, "mmc_alloc_host failed\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ ret = of_address_to_resource(node, 0, &res_mmc);
+ if (ret) {
+ dev_err(dev, "of_address_to_resource failed\n");
+ goto out;
+ }
+
+ irq = irq_of_parse_and_map(node, 0);
+ if (irq <= 0) {
+ dev_err(dev, "irq_of_parse_and_map failed\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ clk = of_clk_get(node, 0);
+ if (IS_ERR(clk)) {
+ dev_err(dev, "of_clk_get failed\n");
+ ret = PTR_ERR(clk);
+ goto out;
+ }
+
+ reg_mmc = devm_ioremap_resource(dev, &res_mmc);
+ if (IS_ERR(reg_mmc)) {
+ ret = PTR_ERR(reg_mmc);
+ goto out;
+ }
+
+ mmc_of_parse(mmc);
+
+ dma_cap_zero(mask);
+ dma_cap_set(DMA_SLAVE, mask);
+
+ host = mmc_priv(mmc);
+ host->mmc = mmc;
+ host->base = reg_mmc;
+ host->reg_phys = res_mmc.start;
+ host->timeout = msecs_to_jiffies(1000);
+ host->sysclk = clk_get_rate(clk);
+ host->fifo_width = readl(host->base + REG_FEATURE) << 2;
+ host->dma_chan_tx = of_dma_request_slave_channel(node, "tx");
+ host->dma_chan_rx = of_dma_request_slave_channel(node, "rx");
+
+ spin_lock_init(&host->lock);
+
+ mmc->ops = &moxart_ops;
+ mmc->f_max = DIV_ROUND_CLOSEST(host->sysclk, 2);
+ mmc->f_min = DIV_ROUND_CLOSEST(host->sysclk, CLK_DIV_MASK * 2);
+ mmc->ocr_avail = 0xffff00; /* Support 2.0v - 3.6v power. */
+
+ if (IS_ERR(host->dma_chan_tx) || IS_ERR(host->dma_chan_rx)) {
+ dev_dbg(dev, "PIO mode transfer enabled\n");
+ host->have_dma = false;
+ } else {
+ dev_dbg(dev, "DMA channels found (%p,%p)\n",
+ host->dma_chan_tx, host->dma_chan_rx);
+ host->have_dma = true;
+
+ cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+ cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES;
+
+ cfg.direction = DMA_MEM_TO_DEV;
+ cfg.src_addr = 0;
+ cfg.dst_addr = host->reg_phys + REG_DATA_WINDOW;
+ dmaengine_slave_config(host->dma_chan_tx, &cfg);
+
+ cfg.direction = DMA_DEV_TO_MEM;
+ cfg.src_addr = host->reg_phys + REG_DATA_WINDOW;
+ cfg.dst_addr = 0;
+ dmaengine_slave_config(host->dma_chan_rx, &cfg);
+ }
+
+ switch ((readl(host->base + REG_BUS_WIDTH) >> 3) & 3) {
+ case 1:
+ mmc->caps |= MMC_CAP_4_BIT_DATA;
+ break;
+ case 2:
+ mmc->caps |= MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA;
+ break;
+ default:
+ break;
+ }
+
+ writel(0, host->base + REG_INTERRUPT_MASK);
+
+ writel(CMD_SDC_RESET, host->base + REG_COMMAND);
+ for (i = 0; i < MAX_RETRIES; i++) {
+ if (!(readl(host->base + REG_COMMAND) & CMD_SDC_RESET))
+ break;
+ udelay(5);
+ }
+
+ ret = devm_request_irq(dev, irq, moxart_irq, 0, "moxart-mmc", host);
+ if (ret)
+ goto out;
+
+ dev_set_drvdata(dev, mmc);
+ mmc_add_host(mmc);
+
+ dev_dbg(dev, "IRQ=%d, FIFO is %d bytes\n", irq, host->fifo_width);
+
+ return 0;
+
+out:
+ if (mmc)
+ mmc_free_host(mmc);
+ return ret;
+}
+
+static int moxart_remove(struct platform_device *pdev)
+{
+ struct mmc_host *mmc = dev_get_drvdata(&pdev->dev);
+ struct moxart_host *host = mmc_priv(mmc);
+
+ dev_set_drvdata(&pdev->dev, NULL);
+
+ if (mmc) {
+ if (!IS_ERR(host->dma_chan_tx))
+ dma_release_channel(host->dma_chan_tx);
+ if (!IS_ERR(host->dma_chan_rx))
+ dma_release_channel(host->dma_chan_rx);
+ mmc_remove_host(mmc);
+ mmc_free_host(mmc);
+
+ writel(0, host->base + REG_INTERRUPT_MASK);
+ writel(0, host->base + REG_POWER_CONTROL);
+ writel(readl(host->base + REG_CLOCK_CONTROL) | CLK_OFF,
+ host->base + REG_CLOCK_CONTROL);
+ }
+
+ kfree(host);
+
+ return 0;
+}
+
+static const struct of_device_id moxart_mmc_match[] = {
+ { .compatible = "moxa,moxart-mmc" },
+ { .compatible = "faraday,ftsdc010" },
+ { }
+};
+
+static struct platform_driver moxart_mmc_driver = {
+ .probe = moxart_probe,
+ .remove = moxart_remove,
+ .driver = {
+ .name = "mmc-moxart",
+ .owner = THIS_MODULE,
+ .of_match_table = moxart_mmc_match,
+ },
+};
+module_platform_driver(moxart_mmc_driver);
+
+MODULE_ALIAS("platform:mmc-moxart");
+MODULE_DESCRIPTION("MOXA ART MMC driver");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Jonas Jensen <jonas.jensen@gmail.com>");
diff --git a/drivers/mmc/host/mvsdio.c b/drivers/mmc/host/mvsdio.c
index 45aa2206741d..9377284f8544 100644
--- a/drivers/mmc/host/mvsdio.c
+++ b/drivers/mmc/host/mvsdio.c
@@ -354,6 +354,20 @@ static irqreturn_t mvsd_irq(int irq, void *dev)
intr_status, mvsd_read(MVSD_NOR_INTR_EN),
mvsd_read(MVSD_HW_STATE));
+ /*
+ * It looks like, SDIO IP can issue one late, spurious irq
+ * although all irqs should be disabled. To work around this,
+ * bail out early, if we didn't expect any irqs to occur.
+ */
+ if (!mvsd_read(MVSD_NOR_INTR_EN) && !mvsd_read(MVSD_ERR_INTR_EN)) {
+ dev_dbg(host->dev, "spurious irq detected intr 0x%04x intr_en 0x%04x erri 0x%04x erri_en 0x%04x\n",
+ mvsd_read(MVSD_NOR_INTR_STATUS),
+ mvsd_read(MVSD_NOR_INTR_EN),
+ mvsd_read(MVSD_ERR_INTR_STATUS),
+ mvsd_read(MVSD_ERR_INTR_EN));
+ return IRQ_HANDLED;
+ }
+
spin_lock(&host->lock);
/* PIO handling, if needed. Messy business... */
@@ -801,10 +815,10 @@ static int mvsd_probe(struct platform_device *pdev)
goto out;
if (!(mmc->caps & MMC_CAP_NEEDS_POLL))
- dev_notice(&pdev->dev, "using GPIO for card detection\n");
+ dev_dbg(&pdev->dev, "using GPIO for card detection\n");
else
- dev_notice(&pdev->dev,
- "lacking card detect (fall back to polling)\n");
+ dev_dbg(&pdev->dev, "lacking card detect (fall back to polling)\n");
+
return 0;
out:
diff --git a/drivers/mmc/host/mxcmmc.c b/drivers/mmc/host/mxcmmc.c
index f7199c83f5cf..ed1cb93c3784 100644
--- a/drivers/mmc/host/mxcmmc.c
+++ b/drivers/mmc/host/mxcmmc.c
@@ -124,9 +124,8 @@ enum mxcmci_type {
struct mxcmci_host {
struct mmc_host *mmc;
- struct resource *res;
void __iomem *base;
- int irq;
+ dma_addr_t phys_base;
int detect_irq;
struct dma_chan *dma;
struct dma_async_tx_descriptor *desc;
@@ -154,8 +153,6 @@ struct mxcmci_host {
struct work_struct datawork;
spinlock_t lock;
- struct regulator *vcc;
-
int burstlen;
int dmareq;
struct dma_slave_config dma_slave_config;
@@ -241,37 +238,15 @@ static inline void mxcmci_writew(struct mxcmci_host *host, u16 val, int reg)
static void mxcmci_set_clk_rate(struct mxcmci_host *host, unsigned int clk_ios);
-static inline void mxcmci_init_ocr(struct mxcmci_host *host)
-{
- host->vcc = regulator_get(mmc_dev(host->mmc), "vmmc");
-
- if (IS_ERR(host->vcc)) {
- host->vcc = NULL;
- } else {
- host->mmc->ocr_avail = mmc_regulator_get_ocrmask(host->vcc);
- if (host->pdata && host->pdata->ocr_avail)
- dev_warn(mmc_dev(host->mmc),
- "pdata->ocr_avail will not be used\n");
- }
-
- if (host->vcc == NULL) {
- /* fall-back to platform data */
- if (host->pdata && host->pdata->ocr_avail)
- host->mmc->ocr_avail = host->pdata->ocr_avail;
- else
- host->mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
- }
-}
-
-static inline void mxcmci_set_power(struct mxcmci_host *host,
- unsigned char power_mode,
- unsigned int vdd)
+static void mxcmci_set_power(struct mxcmci_host *host, unsigned int vdd)
{
- if (host->vcc) {
- if (power_mode == MMC_POWER_UP)
- mmc_regulator_set_ocr(host->mmc, host->vcc, vdd);
- else if (power_mode == MMC_POWER_OFF)
- mmc_regulator_set_ocr(host->mmc, host->vcc, 0);
+ if (!IS_ERR(host->mmc->supply.vmmc)) {
+ if (host->power_mode == MMC_POWER_UP)
+ mmc_regulator_set_ocr(host->mmc,
+ host->mmc->supply.vmmc, vdd);
+ else if (host->power_mode == MMC_POWER_OFF)
+ mmc_regulator_set_ocr(host->mmc,
+ host->mmc->supply.vmmc, 0);
}
if (host->pdata && host->pdata->setpower)
@@ -299,7 +274,6 @@ static void mxcmci_softreset(struct mxcmci_host *host)
mxcmci_writew(host, 0xff, MMC_REG_RES_TO);
}
-static int mxcmci_setup_dma(struct mmc_host *mmc);
#if IS_ENABLED(CONFIG_PPC_MPC512x)
static inline void buffer_swap32(u32 *buf, int len)
@@ -868,8 +842,8 @@ static int mxcmci_setup_dma(struct mmc_host *mmc)
struct mxcmci_host *host = mmc_priv(mmc);
struct dma_slave_config *config = &host->dma_slave_config;
- config->dst_addr = host->res->start + MMC_REG_BUFFER_ACCESS;
- config->src_addr = host->res->start + MMC_REG_BUFFER_ACCESS;
+ config->dst_addr = host->phys_base + MMC_REG_BUFFER_ACCESS;
+ config->src_addr = host->phys_base + MMC_REG_BUFFER_ACCESS;
config->dst_addr_width = 4;
config->src_addr_width = 4;
config->dst_maxburst = host->burstlen;
@@ -911,8 +885,8 @@ static void mxcmci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->cmdat &= ~CMD_DAT_CONT_BUS_WIDTH_4;
if (host->power_mode != ios->power_mode) {
- mxcmci_set_power(host, ios->power_mode, ios->vdd);
host->power_mode = ios->power_mode;
+ mxcmci_set_power(host, ios->vdd);
if (ios->power_mode == MMC_POWER_ON)
host->cmdat |= CMD_DAT_CONT_INIT;
@@ -1040,8 +1014,8 @@ static const struct mmc_host_ops mxcmci_ops = {
static int mxcmci_probe(struct platform_device *pdev)
{
struct mmc_host *mmc;
- struct mxcmci_host *host = NULL;
- struct resource *iores, *r;
+ struct mxcmci_host *host;
+ struct resource *res;
int ret = 0, irq;
bool dat3_card_detect = false;
dma_cap_mask_t mask;
@@ -1052,21 +1026,25 @@ static int mxcmci_probe(struct platform_device *pdev)
of_id = of_match_device(mxcmci_of_match, &pdev->dev);
- iores = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
irq = platform_get_irq(pdev, 0);
- if (!iores || irq < 0)
+ if (irq < 0)
return -EINVAL;
- r = request_mem_region(iores->start, resource_size(iores), pdev->name);
- if (!r)
- return -EBUSY;
+ mmc = mmc_alloc_host(sizeof(*host), &pdev->dev);
+ if (!mmc)
+ return -ENOMEM;
+
+ host = mmc_priv(mmc);
- mmc = mmc_alloc_host(sizeof(struct mxcmci_host), &pdev->dev);
- if (!mmc) {
- ret = -ENOMEM;
- goto out_release_mem;
+ host->base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(host->base)) {
+ ret = PTR_ERR(host->base);
+ goto out_free;
}
+ host->phys_base = res->start;
+
ret = mmc_of_parse(mmc);
if (ret)
goto out_free;
@@ -1084,13 +1062,6 @@ static int mxcmci_probe(struct platform_device *pdev)
mmc->max_req_size = mmc->max_blk_size * mmc->max_blk_count;
mmc->max_seg_size = mmc->max_req_size;
- host = mmc_priv(mmc);
- host->base = ioremap(r->start, resource_size(r));
- if (!host->base) {
- ret = -ENOMEM;
- goto out_free;
- }
-
if (of_id) {
const struct platform_device_id *id_entry = of_id->data;
host->devtype = id_entry->driver_data;
@@ -1112,7 +1083,14 @@ static int mxcmci_probe(struct platform_device *pdev)
&& !of_property_read_bool(pdev->dev.of_node, "cd-gpios"))
dat3_card_detect = true;
- mxcmci_init_ocr(host);
+ ret = mmc_regulator_get_supply(mmc);
+ if (ret) {
+ if (pdata && ret != -EPROBE_DEFER)
+ mmc->ocr_avail = pdata->ocr_avail ? :
+ MMC_VDD_32_33 | MMC_VDD_33_34;
+ else
+ goto out_free;
+ }
if (dat3_card_detect)
host->default_irq_mask =
@@ -1120,19 +1098,16 @@ static int mxcmci_probe(struct platform_device *pdev)
else
host->default_irq_mask = 0;
- host->res = r;
- host->irq = irq;
-
host->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(host->clk_ipg)) {
ret = PTR_ERR(host->clk_ipg);
- goto out_iounmap;
+ goto out_free;
}
host->clk_per = devm_clk_get(&pdev->dev, "per");
if (IS_ERR(host->clk_per)) {
ret = PTR_ERR(host->clk_per);
- goto out_iounmap;
+ goto out_free;
}
clk_prepare_enable(host->clk_per);
@@ -1159,9 +1134,9 @@ static int mxcmci_probe(struct platform_device *pdev)
if (!host->pdata) {
host->dma = dma_request_slave_channel(&pdev->dev, "rx-tx");
} else {
- r = platform_get_resource(pdev, IORESOURCE_DMA, 0);
- if (r) {
- host->dmareq = r->start;
+ res = platform_get_resource(pdev, IORESOURCE_DMA, 0);
+ if (res) {
+ host->dmareq = res->start;
host->dma_data.peripheral_type = IMX_DMATYPE_SDHC;
host->dma_data.priority = DMA_PRIO_LOW;
host->dma_data.dma_request = host->dmareq;
@@ -1178,7 +1153,8 @@ static int mxcmci_probe(struct platform_device *pdev)
INIT_WORK(&host->datawork, mxcmci_datawork);
- ret = request_irq(host->irq, mxcmci_irq, 0, DRIVER_NAME, host);
+ ret = devm_request_irq(&pdev->dev, irq, mxcmci_irq, 0,
+ dev_name(&pdev->dev), host);
if (ret)
goto out_free_dma;
@@ -1188,7 +1164,7 @@ static int mxcmci_probe(struct platform_device *pdev)
ret = host->pdata->init(&pdev->dev, mxcmci_detect_irq,
host->mmc);
if (ret)
- goto out_free_irq;
+ goto out_free_dma;
}
init_timer(&host->watchdog);
@@ -1199,20 +1175,17 @@ static int mxcmci_probe(struct platform_device *pdev)
return 0;
-out_free_irq:
- free_irq(host->irq, host);
out_free_dma:
if (host->dma)
dma_release_channel(host->dma);
+
out_clk_put:
clk_disable_unprepare(host->clk_per);
clk_disable_unprepare(host->clk_ipg);
-out_iounmap:
- iounmap(host->base);
+
out_free:
mmc_free_host(mmc);
-out_release_mem:
- release_mem_region(iores->start, resource_size(iores));
+
return ret;
}
@@ -1223,30 +1196,21 @@ static int mxcmci_remove(struct platform_device *pdev)
mmc_remove_host(mmc);
- if (host->vcc)
- regulator_put(host->vcc);
-
if (host->pdata && host->pdata->exit)
host->pdata->exit(&pdev->dev, mmc);
- free_irq(host->irq, host);
- iounmap(host->base);
-
if (host->dma)
dma_release_channel(host->dma);
clk_disable_unprepare(host->clk_per);
clk_disable_unprepare(host->clk_ipg);
- release_mem_region(host->res->start, resource_size(host->res));
-
mmc_free_host(mmc);
return 0;
}
-#ifdef CONFIG_PM
-static int mxcmci_suspend(struct device *dev)
+static int __maybe_unused mxcmci_suspend(struct device *dev)
{
struct mmc_host *mmc = dev_get_drvdata(dev);
struct mxcmci_host *host = mmc_priv(mmc);
@@ -1256,7 +1220,7 @@ static int mxcmci_suspend(struct device *dev)
return 0;
}
-static int mxcmci_resume(struct device *dev)
+static int __maybe_unused mxcmci_resume(struct device *dev)
{
struct mmc_host *mmc = dev_get_drvdata(dev);
struct mxcmci_host *host = mmc_priv(mmc);
@@ -1266,11 +1230,7 @@ static int mxcmci_resume(struct device *dev)
return 0;
}
-static const struct dev_pm_ops mxcmci_pm_ops = {
- .suspend = mxcmci_suspend,
- .resume = mxcmci_resume,
-};
-#endif
+static SIMPLE_DEV_PM_OPS(mxcmci_pm_ops, mxcmci_suspend, mxcmci_resume);
static struct platform_driver mxcmci_driver = {
.probe = mxcmci_probe,
@@ -1279,9 +1239,7 @@ static struct platform_driver mxcmci_driver = {
.driver = {
.name = DRIVER_NAME,
.owner = THIS_MODULE,
-#ifdef CONFIG_PM
.pm = &mxcmci_pm_ops,
-#endif
.of_match_table = mxcmci_of_match,
}
};
diff --git a/drivers/mmc/host/mxs-mmc.c b/drivers/mmc/host/mxs-mmc.c
index 073e871a0fc8..babfea03ba8a 100644
--- a/drivers/mmc/host/mxs-mmc.c
+++ b/drivers/mmc/host/mxs-mmc.c
@@ -70,6 +70,7 @@ struct mxs_mmc_host {
unsigned char bus_width;
spinlock_t lock;
int sdio_irq_en;
+ bool broken_cd;
};
static int mxs_mmc_get_cd(struct mmc_host *mmc)
@@ -78,6 +79,9 @@ static int mxs_mmc_get_cd(struct mmc_host *mmc)
struct mxs_ssp *ssp = &host->ssp;
int present, ret;
+ if (host->broken_cd)
+ return -ENOSYS;
+
ret = mmc_gpio_get_cd(mmc);
if (ret >= 0)
return ret;
@@ -568,6 +572,7 @@ static int mxs_mmc_probe(struct platform_device *pdev)
{
const struct of_device_id *of_id =
of_match_device(mxs_mmc_dt_ids, &pdev->dev);
+ struct device_node *np = pdev->dev.of_node;
struct mxs_mmc_host *host;
struct mmc_host *mmc;
struct resource *iores;
@@ -634,6 +639,8 @@ static int mxs_mmc_probe(struct platform_device *pdev)
mmc->caps = MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
MMC_CAP_SDIO_IRQ | MMC_CAP_NEEDS_POLL;
+ host->broken_cd = of_property_read_bool(np, "broken-cd");
+
mmc->f_min = 400000;
mmc->f_max = 288000000;
diff --git a/drivers/mmc/host/omap.c b/drivers/mmc/host/omap.c
index 5c2e58b29305..81974ecdfcbc 100644
--- a/drivers/mmc/host/omap.c
+++ b/drivers/mmc/host/omap.c
@@ -177,7 +177,7 @@ static void mmc_omap_fclk_offdelay(struct mmc_omap_slot *slot)
unsigned long tick_ns;
if (slot != NULL && slot->host->fclk_enabled && slot->fclk_freq > 0) {
- tick_ns = (1000000000 + slot->fclk_freq - 1) / slot->fclk_freq;
+ tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, slot->fclk_freq);
ndelay(8 * tick_ns);
}
}
@@ -435,7 +435,7 @@ static void mmc_omap_send_stop_work(struct work_struct *work)
struct mmc_data *data = host->stop_data;
unsigned long tick_ns;
- tick_ns = (1000000000 + slot->fclk_freq - 1)/slot->fclk_freq;
+ tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, slot->fclk_freq);
ndelay(8*tick_ns);
mmc_omap_start_command(host, data->stop);
@@ -477,7 +477,7 @@ mmc_omap_send_abort(struct mmc_omap_host *host, int maxloops)
u16 stat = 0;
/* Sending abort takes 80 clocks. Have some extra and round up */
- timeout = (120*1000000 + slot->fclk_freq - 1)/slot->fclk_freq;
+ timeout = DIV_ROUND_UP(120 * USEC_PER_SEC, slot->fclk_freq);
restarts = 0;
while (restarts < maxloops) {
OMAP_MMC_WRITE(host, STAT, 0xFFFF);
@@ -677,8 +677,8 @@ mmc_omap_xfer_data(struct mmc_omap_host *host, int write)
if (n > host->buffer_bytes_left)
n = host->buffer_bytes_left;
- nwords = n / 2;
- nwords += n & 1; /* handle odd number of bytes to transfer */
+ /* Round up to handle odd number of bytes to transfer */
+ nwords = DIV_ROUND_UP(n, 2);
host->buffer_bytes_left -= n;
host->total_bytes_left -= n;
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index e91ee21549d0..6b7b75585926 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -31,7 +31,7 @@
#include <linux/of.h>
#include <linux/of_gpio.h>
#include <linux/of_device.h>
-#include <linux/omap-dma.h>
+#include <linux/omap-dmaengine.h>
#include <linux/mmc/host.h>
#include <linux/mmc/core.h>
#include <linux/mmc/mmc.h>
@@ -582,7 +582,7 @@ static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host)
* - MMC/SD clock coming out of controller > 25MHz
*/
if ((mmc_slot(host).features & HSMMC_HAS_HSPE_SUPPORT) &&
- (ios->timing != MMC_TIMING_UHS_DDR50) &&
+ (ios->timing != MMC_TIMING_MMC_DDR52) &&
((OMAP_HSMMC_READ(host->base, CAPA) & HSS) == HSS)) {
regval = OMAP_HSMMC_READ(host->base, HCTL);
if (clkdiv && (clk_get_rate(host->fclk)/clkdiv) > 25000000)
@@ -602,7 +602,7 @@ static void omap_hsmmc_set_bus_width(struct omap_hsmmc_host *host)
u32 con;
con = OMAP_HSMMC_READ(host->base, CON);
- if (ios->timing == MMC_TIMING_UHS_DDR50)
+ if (ios->timing == MMC_TIMING_MMC_DDR52)
con |= DDR; /* configure in DDR mode */
else
con &= ~DDR;
@@ -920,16 +920,17 @@ omap_hsmmc_xfer_done(struct omap_hsmmc_host *host, struct mmc_data *data)
static void
omap_hsmmc_cmd_done(struct omap_hsmmc_host *host, struct mmc_command *cmd)
{
- host->cmd = NULL;
-
if (host->mrq->sbc && (host->cmd == host->mrq->sbc) &&
!host->mrq->sbc->error && !(host->flags & AUTO_CMD23)) {
+ host->cmd = NULL;
omap_hsmmc_start_dma_transfer(host);
omap_hsmmc_start_command(host, host->mrq->cmd,
host->mrq->data);
return;
}
+ host->cmd = NULL;
+
if (cmd->flags & MMC_RSP_PRESENT) {
if (cmd->flags & MMC_RSP_136) {
/* response type 2 */
@@ -1851,6 +1852,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
unsigned tx_req, rx_req;
struct pinctrl *pinctrl;
const struct omap_mmc_of_data *data;
+ void __iomem *base;
match = of_match_device(of_match_ptr(omap_mmc_of_match), &pdev->dev);
if (match) {
@@ -1881,9 +1883,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
if (res == NULL || irq < 0)
return -ENXIO;
- res = request_mem_region(res->start, resource_size(res), pdev->name);
- if (res == NULL)
- return -EBUSY;
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
ret = omap_hsmmc_gpio_init(pdata);
if (ret)
@@ -1904,7 +1906,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
host->irq = irq;
host->slot_id = 0;
host->mapbase = res->start + pdata->reg_offset;
- host->base = ioremap(host->mapbase, SZ_4K);
+ host->base = base + pdata->reg_offset;
host->power_mode = MMC_POWER_OFF;
host->next_data.cookie = 1;
host->pbias_enabled = 0;
@@ -1922,7 +1924,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
spin_lock_init(&host->irq_lock);
- host->fclk = clk_get(&pdev->dev, "fck");
+ host->fclk = devm_clk_get(&pdev->dev, "fck");
if (IS_ERR(host->fclk)) {
ret = PTR_ERR(host->fclk);
host->fclk = NULL;
@@ -1941,7 +1943,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
omap_hsmmc_context_save(host);
- host->dbclk = clk_get(&pdev->dev, "mmchsdb_fck");
+ host->dbclk = devm_clk_get(&pdev->dev, "mmchsdb_fck");
/*
* MMC can still work without debounce clock.
*/
@@ -1949,7 +1951,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
host->dbclk = NULL;
} else if (clk_prepare_enable(host->dbclk) != 0) {
dev_warn(mmc_dev(host->mmc), "Failed to enable debounce clk\n");
- clk_put(host->dbclk);
host->dbclk = NULL;
}
@@ -2018,7 +2019,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
}
/* Request IRQ for MMC operations */
- ret = request_irq(host->irq, omap_hsmmc_irq, 0,
+ ret = devm_request_irq(&pdev->dev, host->irq, omap_hsmmc_irq, 0,
mmc_hostname(mmc), host);
if (ret) {
dev_err(mmc_dev(host->mmc), "Unable to grab HSMMC IRQ\n");
@@ -2029,7 +2030,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
if (pdata->init(&pdev->dev) != 0) {
dev_err(mmc_dev(host->mmc),
"Unable to configure MMC IRQs\n");
- goto err_irq_cd_init;
+ goto err_irq;
}
}
@@ -2044,9 +2045,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
/* Request IRQ for card detect */
if ((mmc_slot(host).card_detect_irq)) {
- ret = request_threaded_irq(mmc_slot(host).card_detect_irq,
- NULL,
- omap_hsmmc_detect,
+ ret = devm_request_threaded_irq(&pdev->dev,
+ mmc_slot(host).card_detect_irq,
+ NULL, omap_hsmmc_detect,
IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
mmc_hostname(mmc), host);
if (ret) {
@@ -2089,15 +2090,12 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
err_slot_name:
mmc_remove_host(mmc);
- free_irq(mmc_slot(host).card_detect_irq, host);
err_irq_cd:
if (host->use_reg)
omap_hsmmc_reg_put(host);
err_reg:
if (host->pdata->cleanup)
host->pdata->cleanup(&pdev->dev);
-err_irq_cd_init:
- free_irq(host->irq, host);
err_irq:
if (host->tx_chan)
dma_release_channel(host->tx_chan);
@@ -2105,27 +2103,19 @@ err_irq:
dma_release_channel(host->rx_chan);
pm_runtime_put_sync(host->dev);
pm_runtime_disable(host->dev);
- clk_put(host->fclk);
- if (host->dbclk) {
+ if (host->dbclk)
clk_disable_unprepare(host->dbclk);
- clk_put(host->dbclk);
- }
err1:
- iounmap(host->base);
mmc_free_host(mmc);
err_alloc:
omap_hsmmc_gpio_free(pdata);
err:
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res)
- release_mem_region(res->start, resource_size(res));
return ret;
}
static int omap_hsmmc_remove(struct platform_device *pdev)
{
struct omap_hsmmc_host *host = platform_get_drvdata(pdev);
- struct resource *res;
pm_runtime_get_sync(host->dev);
mmc_remove_host(host->mmc);
@@ -2133,9 +2123,6 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
omap_hsmmc_reg_put(host);
if (host->pdata->cleanup)
host->pdata->cleanup(&pdev->dev);
- free_irq(host->irq, host);
- if (mmc_slot(host).card_detect_irq)
- free_irq(mmc_slot(host).card_detect_irq, host);
if (host->tx_chan)
dma_release_channel(host->tx_chan);
@@ -2144,20 +2131,12 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
pm_runtime_put_sync(host->dev);
pm_runtime_disable(host->dev);
- clk_put(host->fclk);
- if (host->dbclk) {
+ if (host->dbclk)
clk_disable_unprepare(host->dbclk);
- clk_put(host->dbclk);
- }
omap_hsmmc_gpio_free(host->pdata);
- iounmap(host->base);
mmc_free_host(host->mmc);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res)
- release_mem_region(res->start, resource_size(res));
-
return 0;
}
diff --git a/drivers/mmc/host/rtsx_pci_sdmmc.c b/drivers/mmc/host/rtsx_pci_sdmmc.c
index 0b9ded13a3ae..0d519649b575 100644
--- a/drivers/mmc/host/rtsx_pci_sdmmc.c
+++ b/drivers/mmc/host/rtsx_pci_sdmmc.c
@@ -236,6 +236,9 @@ static void sd_send_cmd_get_rsp(struct realtek_pci_sdmmc *host,
case MMC_RSP_R1:
rsp_type = SD_RSP_TYPE_R1;
break;
+ case MMC_RSP_R1 & ~MMC_RSP_CRC:
+ rsp_type = SD_RSP_TYPE_R1 | SD_NO_CHECK_CRC7;
+ break;
case MMC_RSP_R1B:
rsp_type = SD_RSP_TYPE_R1b;
break;
@@ -816,6 +819,7 @@ static int sd_set_timing(struct realtek_pci_sdmmc *host, unsigned char timing)
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, CLK_CTL, CLK_LOW_FREQ, 0);
break;
+ case MMC_TIMING_MMC_DDR52:
case MMC_TIMING_UHS_DDR50:
rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, SD_CFG1,
0x0C | SD_ASYNC_FIFO_NOT_RST,
@@ -896,6 +900,7 @@ static void sdmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
host->vpclk = true;
host->double_clk = false;
break;
+ case MMC_TIMING_MMC_DDR52:
case MMC_TIMING_UHS_DDR50:
case MMC_TIMING_UHS_SDR25:
host->ssc_depth = RTSX_SSC_DEPTH_1M;
diff --git a/drivers/mmc/host/rtsx_usb_sdmmc.c b/drivers/mmc/host/rtsx_usb_sdmmc.c
index e11fafa6fc6b..5d3766e792f0 100644
--- a/drivers/mmc/host/rtsx_usb_sdmmc.c
+++ b/drivers/mmc/host/rtsx_usb_sdmmc.c
@@ -34,7 +34,8 @@
#include <linux/mfd/rtsx_usb.h>
#include <asm/unaligned.h>
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#if defined(CONFIG_LEDS_CLASS) || (defined(CONFIG_LEDS_CLASS_MODULE) && \
+ defined(CONFIG_MMC_REALTEK_USB_MODULE))
#include <linux/leds.h>
#include <linux/workqueue.h>
#define RTSX_USB_USE_LEDS_CLASS
@@ -59,7 +60,7 @@ struct rtsx_usb_sdmmc {
unsigned char power_mode;
-#if defined(CONFIG_LEDS_CLASS) || defined(CONFIG_LEDS_CLASS_MODULE)
+#ifdef RTSX_USB_USE_LEDS_CLASS
struct led_classdev led;
char led_name[32];
struct work_struct led_work;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index ebb3f392b589..8ce3c28cb76e 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -102,11 +102,19 @@ static void sdhci_acpi_int_hw_reset(struct sdhci_host *host)
}
static const struct sdhci_ops sdhci_acpi_ops_dflt = {
+ .set_clock = sdhci_set_clock,
.enable_dma = sdhci_acpi_enable_dma,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static const struct sdhci_ops sdhci_acpi_ops_int = {
+ .set_clock = sdhci_set_clock,
.enable_dma = sdhci_acpi_enable_dma,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
.hw_reset = sdhci_acpi_int_hw_reset,
};
diff --git a/drivers/mmc/host/sdhci-bcm-kona.c b/drivers/mmc/host/sdhci-bcm-kona.c
index 6f166e63b817..dd780c315a63 100644
--- a/drivers/mmc/host/sdhci-bcm-kona.c
+++ b/drivers/mmc/host/sdhci-bcm-kona.c
@@ -206,9 +206,13 @@ static void sdhci_bcm_kona_init_74_clocks(struct sdhci_host *host,
}
static struct sdhci_ops sdhci_bcm_kona_ops = {
+ .set_clock = sdhci_set_clock,
.get_max_clock = sdhci_bcm_kona_get_max_clk,
.get_timeout_clock = sdhci_bcm_kona_get_timeout_clock,
.platform_send_init_74_clocks = sdhci_bcm_kona_init_74_clocks,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
.card_event = sdhci_bcm_kona_card_event,
};
diff --git a/drivers/mmc/host/sdhci-bcm2835.c b/drivers/mmc/host/sdhci-bcm2835.c
index f6d8d67c545f..46af9a439d7b 100644
--- a/drivers/mmc/host/sdhci-bcm2835.c
+++ b/drivers/mmc/host/sdhci-bcm2835.c
@@ -131,8 +131,12 @@ static const struct sdhci_ops bcm2835_sdhci_ops = {
.read_l = bcm2835_sdhci_readl,
.read_w = bcm2835_sdhci_readw,
.read_b = bcm2835_sdhci_readb,
+ .set_clock = sdhci_set_clock,
.get_max_clock = sdhci_pltfm_clk_get_max_clock,
.get_min_clock = bcm2835_sdhci_get_min_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static const struct sdhci_pltfm_data bcm2835_sdhci_pdata = {
diff --git a/drivers/mmc/host/sdhci-cns3xxx.c b/drivers/mmc/host/sdhci-cns3xxx.c
index f2cc26633cb2..14b74075589a 100644
--- a/drivers/mmc/host/sdhci-cns3xxx.c
+++ b/drivers/mmc/host/sdhci-cns3xxx.c
@@ -30,13 +30,12 @@ static void sdhci_cns3xxx_set_clock(struct sdhci_host *host, unsigned int clock)
u16 clk;
unsigned long timeout;
- if (clock == host->clock)
- return;
+ host->mmc->actual_clock = 0;
sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
if (clock == 0)
- goto out;
+ return;
while (host->max_clk / div > clock) {
/*
@@ -75,13 +74,14 @@ static void sdhci_cns3xxx_set_clock(struct sdhci_host *host, unsigned int clock)
clk |= SDHCI_CLOCK_CARD_EN;
sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
-out:
- host->clock = clock;
}
static const struct sdhci_ops sdhci_cns3xxx_ops = {
.get_max_clock = sdhci_cns3xxx_get_max_clk,
.set_clock = sdhci_cns3xxx_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
@@ -90,8 +90,7 @@ static const struct sdhci_pltfm_data sdhci_cns3xxx_pdata = {
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_INVERTED_WRITE_PROTECT |
SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN |
- SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
- SDHCI_QUIRK_NONSTANDARD_CLOCK,
+ SDHCI_QUIRK_BROKEN_TIMEOUT_VAL,
};
static int sdhci_cns3xxx_probe(struct platform_device *pdev)
diff --git a/drivers/mmc/host/sdhci-dove.c b/drivers/mmc/host/sdhci-dove.c
index 736d7a2eb7ec..e6278ec007d7 100644
--- a/drivers/mmc/host/sdhci-dove.c
+++ b/drivers/mmc/host/sdhci-dove.c
@@ -21,28 +21,17 @@
#include <linux/clk.h>
#include <linux/err.h>
-#include <linux/gpio.h>
#include <linux/io.h>
#include <linux/mmc/host.h>
#include <linux/module.h>
#include <linux/of.h>
-#include <linux/of_gpio.h>
#include "sdhci-pltfm.h"
struct sdhci_dove_priv {
struct clk *clk;
- int gpio_cd;
};
-static irqreturn_t sdhci_dove_carddetect_irq(int irq, void *data)
-{
- struct sdhci_host *host = data;
-
- tasklet_schedule(&host->card_tasklet);
- return IRQ_HANDLED;
-}
-
static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
{
u16 ret;
@@ -60,8 +49,6 @@ static u16 sdhci_dove_readw(struct sdhci_host *host, int reg)
static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
{
- struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
- struct sdhci_dove_priv *priv = pltfm_host->priv;
u32 ret;
ret = readl(host->ioaddr + reg);
@@ -71,14 +58,6 @@ static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
/* Mask the support for 3.0V */
ret &= ~SDHCI_CAN_VDD_300;
break;
- case SDHCI_PRESENT_STATE:
- if (gpio_is_valid(priv->gpio_cd)) {
- if (gpio_get_value(priv->gpio_cd) == 0)
- ret |= SDHCI_CARD_PRESENT;
- else
- ret &= ~SDHCI_CARD_PRESENT;
- }
- break;
}
return ret;
}
@@ -86,6 +65,10 @@ static u32 sdhci_dove_readl(struct sdhci_host *host, int reg)
static const struct sdhci_ops sdhci_dove_ops = {
.read_w = sdhci_dove_readw,
.read_l = sdhci_dove_readl,
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static const struct sdhci_pltfm_data sdhci_dove_pdata = {
@@ -113,28 +96,9 @@ static int sdhci_dove_probe(struct platform_device *pdev)
priv->clk = devm_clk_get(&pdev->dev, NULL);
- if (pdev->dev.of_node) {
- priv->gpio_cd = of_get_named_gpio(pdev->dev.of_node,
- "cd-gpios", 0);
- } else {
- priv->gpio_cd = -EINVAL;
- }
-
- if (gpio_is_valid(priv->gpio_cd)) {
- ret = gpio_request(priv->gpio_cd, "sdhci-cd");
- if (ret) {
- dev_err(&pdev->dev, "card detect gpio request failed: %d\n",
- ret);
- return ret;
- }
- gpio_direction_input(priv->gpio_cd);
- }
-
host = sdhci_pltfm_init(pdev, &sdhci_dove_pdata, 0);
- if (IS_ERR(host)) {
- ret = PTR_ERR(host);
- goto err_sdhci_pltfm_init;
- }
+ if (IS_ERR(host))
+ return PTR_ERR(host);
pltfm_host = sdhci_priv(host);
pltfm_host->priv = priv;
@@ -142,39 +106,20 @@ static int sdhci_dove_probe(struct platform_device *pdev)
if (!IS_ERR(priv->clk))
clk_prepare_enable(priv->clk);
- sdhci_get_of_property(pdev);
+ ret = mmc_of_parse(host->mmc);
+ if (ret)
+ goto err_sdhci_add;
ret = sdhci_add_host(host);
if (ret)
goto err_sdhci_add;
- /*
- * We must request the IRQ after sdhci_add_host(), as the tasklet only
- * gets setup in sdhci_add_host() and we oops.
- */
- if (gpio_is_valid(priv->gpio_cd)) {
- ret = request_irq(gpio_to_irq(priv->gpio_cd),
- sdhci_dove_carddetect_irq,
- IRQF_TRIGGER_FALLING | IRQF_TRIGGER_RISING,
- mmc_hostname(host->mmc), host);
- if (ret) {
- dev_err(&pdev->dev, "card detect irq request failed: %d\n",
- ret);
- goto err_request_irq;
- }
- }
-
return 0;
-err_request_irq:
- sdhci_remove_host(host, 0);
err_sdhci_add:
if (!IS_ERR(priv->clk))
clk_disable_unprepare(priv->clk);
sdhci_pltfm_free(pdev);
-err_sdhci_pltfm_init:
- if (gpio_is_valid(priv->gpio_cd))
- gpio_free(priv->gpio_cd);
return ret;
}
@@ -186,11 +131,6 @@ static int sdhci_dove_remove(struct platform_device *pdev)
sdhci_pltfm_unregister(pdev);
- if (gpio_is_valid(priv->gpio_cd)) {
- free_irq(gpio_to_irq(priv->gpio_cd), host);
- gpio_free(priv->gpio_cd);
- }
-
if (!IS_ERR(priv->clk))
clk_disable_unprepare(priv->clk);
diff --git a/drivers/mmc/host/sdhci-esdhc-imx.c b/drivers/mmc/host/sdhci-esdhc-imx.c
index b841bb7cd371..ccec0e32590f 100644
--- a/drivers/mmc/host/sdhci-esdhc-imx.c
+++ b/drivers/mmc/host/sdhci-esdhc-imx.c
@@ -160,7 +160,6 @@ struct pltfm_imx_data {
MULTIBLK_IN_PROCESS, /* exact multiblock cmd in process */
WAIT_FOR_INT, /* sent CMD12, waiting for response INT */
} multiblock_status;
- u32 uhs_mode;
u32 is_ddr;
};
@@ -382,7 +381,6 @@ static u16 esdhc_readw_le(struct sdhci_host *host, int reg)
if (val & ESDHC_MIX_CTRL_SMPCLK_SEL)
ret |= SDHCI_CTRL_TUNED_CLK;
- ret |= (imx_data->uhs_mode & SDHCI_CTRL_UHS_MASK);
ret &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
return ret;
@@ -429,7 +427,6 @@ static void esdhc_writew_le(struct sdhci_host *host, u16 val, int reg)
else
new_val &= ~ESDHC_VENDOR_SPEC_VSELECT;
writel(new_val, host->ioaddr + ESDHC_VENDOR_SPEC);
- imx_data->uhs_mode = val & SDHCI_CTRL_UHS_MASK;
if (imx_data->socdata->flags & ESDHC_FLAG_MAN_TUNING) {
new_val = readl(host->ioaddr + ESDHC_MIX_CTRL);
if (val & SDHCI_CTRL_TUNED_CLK)
@@ -600,12 +597,14 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
u32 temp, val;
if (clock == 0) {
+ host->mmc->actual_clock = 0;
+
if (esdhc_is_usdhc(imx_data)) {
val = readl(host->ioaddr + ESDHC_VENDOR_SPEC);
writel(val & ~ESDHC_VENDOR_SPEC_FRC_SDCLK_ON,
host->ioaddr + ESDHC_VENDOR_SPEC);
}
- goto out;
+ return;
}
if (esdhc_is_usdhc(imx_data) && !imx_data->is_ddr)
@@ -645,8 +644,6 @@ static inline void esdhc_pltfm_set_clock(struct sdhci_host *host,
}
mdelay(1);
-out:
- host->clock = clock;
}
static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
@@ -668,7 +665,7 @@ static unsigned int esdhc_pltfm_get_ro(struct sdhci_host *host)
return -ENOSYS;
}
-static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
+static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
{
u32 ctrl;
@@ -686,8 +683,6 @@ static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
esdhc_clrset_le(host, ESDHC_CTRL_BUSWIDTH_MASK, ctrl,
SDHCI_HOST_CONTROL);
-
- return 0;
}
static void esdhc_prepare_tuning(struct sdhci_host *host, u32 val)
@@ -697,6 +692,7 @@ static void esdhc_prepare_tuning(struct sdhci_host *host, u32 val)
/* FIXME: delay a bit for card to be ready for next tuning due to errors */
mdelay(1);
+ /* This is balanced by the runtime put in sdhci_tasklet_finish */
pm_runtime_get_sync(host->mmc->parent);
reg = readl(host->ioaddr + ESDHC_MIX_CTRL);
reg |= ESDHC_MIX_CTRL_EXE_TUNE | ESDHC_MIX_CTRL_SMPCLK_SEL |
@@ -713,13 +709,12 @@ static void esdhc_request_done(struct mmc_request *mrq)
complete(&mrq->completion);
}
-static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode)
+static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode,
+ struct scatterlist *sg)
{
struct mmc_command cmd = {0};
struct mmc_request mrq = {NULL};
struct mmc_data data = {0};
- struct scatterlist sg;
- char tuning_pattern[ESDHC_TUNING_BLOCK_PATTERN_LEN];
cmd.opcode = opcode;
cmd.arg = 0;
@@ -728,11 +723,9 @@ static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode)
data.blksz = ESDHC_TUNING_BLOCK_PATTERN_LEN;
data.blocks = 1;
data.flags = MMC_DATA_READ;
- data.sg = &sg;
+ data.sg = sg;
data.sg_len = 1;
- sg_init_one(&sg, tuning_pattern, sizeof(tuning_pattern));
-
mrq.cmd = &cmd;
mrq.cmd->mrq = &mrq;
mrq.data = &data;
@@ -742,14 +735,12 @@ static int esdhc_send_tuning_cmd(struct sdhci_host *host, u32 opcode)
mrq.done = esdhc_request_done;
init_completion(&(mrq.completion));
- disable_irq(host->irq);
- spin_lock(&host->lock);
+ spin_lock_irq(&host->lock);
host->mrq = &mrq;
sdhci_send_command(host, mrq.cmd);
- spin_unlock(&host->lock);
- enable_irq(host->irq);
+ spin_unlock_irq(&host->lock);
wait_for_completion(&mrq.completion);
@@ -772,13 +763,21 @@ static void esdhc_post_tuning(struct sdhci_host *host)
static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
{
+ struct scatterlist sg;
+ char *tuning_pattern;
int min, max, avg, ret;
+ tuning_pattern = kmalloc(ESDHC_TUNING_BLOCK_PATTERN_LEN, GFP_KERNEL);
+ if (!tuning_pattern)
+ return -ENOMEM;
+
+ sg_init_one(&sg, tuning_pattern, ESDHC_TUNING_BLOCK_PATTERN_LEN);
+
/* find the mininum delay first which can pass tuning */
min = ESDHC_TUNE_CTRL_MIN;
while (min < ESDHC_TUNE_CTRL_MAX) {
esdhc_prepare_tuning(host, min);
- if (!esdhc_send_tuning_cmd(host, opcode))
+ if (!esdhc_send_tuning_cmd(host, opcode, &sg))
break;
min += ESDHC_TUNE_CTRL_STEP;
}
@@ -787,7 +786,7 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
max = min + ESDHC_TUNE_CTRL_STEP;
while (max < ESDHC_TUNE_CTRL_MAX) {
esdhc_prepare_tuning(host, max);
- if (esdhc_send_tuning_cmd(host, opcode)) {
+ if (esdhc_send_tuning_cmd(host, opcode, &sg)) {
max -= ESDHC_TUNE_CTRL_STEP;
break;
}
@@ -797,9 +796,11 @@ static int esdhc_executing_tuning(struct sdhci_host *host, u32 opcode)
/* use average delay to get the best timing */
avg = (min + max) / 2;
esdhc_prepare_tuning(host, avg);
- ret = esdhc_send_tuning_cmd(host, opcode);
+ ret = esdhc_send_tuning_cmd(host, opcode, &sg);
esdhc_post_tuning(host);
+ kfree(tuning_pattern);
+
dev_dbg(mmc_dev(host->mmc), "tunning %s at 0x%x ret %d\n",
ret ? "failed" : "passed", avg, ret);
@@ -837,28 +838,21 @@ static int esdhc_change_pinstate(struct sdhci_host *host,
return pinctrl_select_state(imx_data->pinctrl, pinctrl);
}
-static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+static void esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct pltfm_imx_data *imx_data = pltfm_host->priv;
struct esdhc_platform_data *boarddata = &imx_data->boarddata;
- switch (uhs) {
+ switch (timing) {
case MMC_TIMING_UHS_SDR12:
- imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR12;
- break;
case MMC_TIMING_UHS_SDR25:
- imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR25;
- break;
case MMC_TIMING_UHS_SDR50:
- imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR50;
- break;
case MMC_TIMING_UHS_SDR104:
case MMC_TIMING_MMC_HS200:
- imx_data->uhs_mode = SDHCI_CTRL_UHS_SDR104;
break;
case MMC_TIMING_UHS_DDR50:
- imx_data->uhs_mode = SDHCI_CTRL_UHS_DDR50;
+ case MMC_TIMING_MMC_DDR52:
writel(readl(host->ioaddr + ESDHC_MIX_CTRL) |
ESDHC_MIX_CTRL_DDREN,
host->ioaddr + ESDHC_MIX_CTRL);
@@ -875,7 +869,15 @@ static int esdhc_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
break;
}
- return esdhc_change_pinstate(host, uhs);
+ esdhc_change_pinstate(host, timing);
+}
+
+static void esdhc_reset(struct sdhci_host *host, u8 mask)
+{
+ sdhci_reset(host, mask);
+
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
}
static struct sdhci_ops sdhci_esdhc_ops = {
@@ -888,8 +890,9 @@ static struct sdhci_ops sdhci_esdhc_ops = {
.get_max_clock = esdhc_pltfm_get_max_clock,
.get_min_clock = esdhc_pltfm_get_min_clock,
.get_ro = esdhc_pltfm_get_ro,
- .platform_bus_width = esdhc_pltfm_bus_width,
+ .set_bus_width = esdhc_pltfm_set_bus_width,
.set_uhs_signaling = esdhc_set_uhs_signaling,
+ .reset = esdhc_reset,
};
static const struct sdhci_pltfm_data sdhci_esdhc_imx_pdata = {
@@ -1170,8 +1173,10 @@ static int sdhci_esdhc_runtime_suspend(struct device *dev)
ret = sdhci_runtime_suspend_host(host);
- clk_disable_unprepare(imx_data->clk_per);
- clk_disable_unprepare(imx_data->clk_ipg);
+ if (!sdhci_sdio_irq_enabled(host)) {
+ clk_disable_unprepare(imx_data->clk_per);
+ clk_disable_unprepare(imx_data->clk_ipg);
+ }
clk_disable_unprepare(imx_data->clk_ahb);
return ret;
@@ -1183,8 +1188,10 @@ static int sdhci_esdhc_runtime_resume(struct device *dev)
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct pltfm_imx_data *imx_data = pltfm_host->priv;
- clk_prepare_enable(imx_data->clk_per);
- clk_prepare_enable(imx_data->clk_ipg);
+ if (!sdhci_sdio_irq_enabled(host)) {
+ clk_prepare_enable(imx_data->clk_per);
+ clk_prepare_enable(imx_data->clk_ipg);
+ }
clk_prepare_enable(imx_data->clk_ahb);
return sdhci_runtime_resume_host(host);
diff --git a/drivers/mmc/host/sdhci-esdhc.h b/drivers/mmc/host/sdhci-esdhc.h
index a7d9f95a7b03..3497cfaf683c 100644
--- a/drivers/mmc/host/sdhci-esdhc.h
+++ b/drivers/mmc/host/sdhci-esdhc.h
@@ -20,10 +20,8 @@
#define ESDHC_DEFAULT_QUIRKS (SDHCI_QUIRK_FORCE_BLK_SZ_2048 | \
SDHCI_QUIRK_NO_BUSY_IRQ | \
- SDHCI_QUIRK_NONSTANDARD_CLOCK | \
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK | \
- SDHCI_QUIRK_PIO_NEEDS_DELAY | \
- SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
+ SDHCI_QUIRK_PIO_NEEDS_DELAY)
#define ESDHC_SYSTEM_CONTROL 0x2c
#define ESDHC_CLOCK_MASK 0x0000fff0
diff --git a/drivers/mmc/host/sdhci-of-arasan.c b/drivers/mmc/host/sdhci-of-arasan.c
index f7c7cf62437d..5bd1092310f2 100644
--- a/drivers/mmc/host/sdhci-of-arasan.c
+++ b/drivers/mmc/host/sdhci-of-arasan.c
@@ -52,8 +52,12 @@ static unsigned int sdhci_arasan_get_timeout_clock(struct sdhci_host *host)
}
static struct sdhci_ops sdhci_arasan_ops = {
+ .set_clock = sdhci_set_clock,
.get_max_clock = sdhci_pltfm_clk_get_max_clock,
.get_timeout_clock = sdhci_arasan_get_timeout_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static struct sdhci_pltfm_data sdhci_arasan_pdata = {
diff --git a/drivers/mmc/host/sdhci-of-esdhc.c b/drivers/mmc/host/sdhci-of-esdhc.c
index 0b249970b119..8be4dcfb49a0 100644
--- a/drivers/mmc/host/sdhci-of-esdhc.c
+++ b/drivers/mmc/host/sdhci-of-esdhc.c
@@ -199,13 +199,14 @@ static unsigned int esdhc_of_get_min_clock(struct sdhci_host *host)
static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
{
-
int pre_div = 2;
int div = 1;
u32 temp;
+ host->mmc->actual_clock = 0;
+
if (clock == 0)
- goto out;
+ return;
/* Workaround to reduce the clock frequency for p1010 esdhc */
if (of_find_compatible_node(NULL, NULL, "fsl,p1010-esdhc")) {
@@ -238,24 +239,8 @@ static void esdhc_of_set_clock(struct sdhci_host *host, unsigned int clock)
| (pre_div << ESDHC_PREDIV_SHIFT));
sdhci_writel(host, temp, ESDHC_SYSTEM_CONTROL);
mdelay(1);
-out:
- host->clock = clock;
-}
-
-#ifdef CONFIG_PM
-static u32 esdhc_proctl;
-static void esdhc_of_suspend(struct sdhci_host *host)
-{
- esdhc_proctl = sdhci_be32bs_readl(host, SDHCI_HOST_CONTROL);
}
-static void esdhc_of_resume(struct sdhci_host *host)
-{
- esdhc_of_enable_dma(host);
- sdhci_be32bs_writel(host, esdhc_proctl, SDHCI_HOST_CONTROL);
-}
-#endif
-
static void esdhc_of_platform_init(struct sdhci_host *host)
{
u32 vvn;
@@ -269,7 +254,7 @@ static void esdhc_of_platform_init(struct sdhci_host *host)
host->quirks &= ~SDHCI_QUIRK_NO_BUSY_IRQ;
}
-static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
+static void esdhc_pltfm_set_bus_width(struct sdhci_host *host, int width)
{
u32 ctrl;
@@ -289,8 +274,6 @@ static int esdhc_pltfm_bus_width(struct sdhci_host *host, int width)
clrsetbits_be32(host->ioaddr + SDHCI_HOST_CONTROL,
ESDHC_CTRL_BUSWIDTH_MASK, ctrl);
-
- return 0;
}
static const struct sdhci_ops sdhci_esdhc_ops = {
@@ -305,13 +288,46 @@ static const struct sdhci_ops sdhci_esdhc_ops = {
.get_max_clock = esdhc_of_get_max_clock,
.get_min_clock = esdhc_of_get_min_clock,
.platform_init = esdhc_of_platform_init,
-#ifdef CONFIG_PM
- .platform_suspend = esdhc_of_suspend,
- .platform_resume = esdhc_of_resume,
-#endif
.adma_workaround = esdhci_of_adma_workaround,
- .platform_bus_width = esdhc_pltfm_bus_width,
+ .set_bus_width = esdhc_pltfm_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
+};
+
+#ifdef CONFIG_PM
+
+static u32 esdhc_proctl;
+static int esdhc_of_suspend(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+
+ esdhc_proctl = sdhci_be32bs_readl(host, SDHCI_HOST_CONTROL);
+
+ return sdhci_suspend_host(host);
+}
+
+static int esdhc_of_resume(struct device *dev)
+{
+ struct sdhci_host *host = dev_get_drvdata(dev);
+ int ret = sdhci_resume_host(host);
+
+ if (ret == 0) {
+ /* Isn't this already done by sdhci_resume_host() ? --rmk */
+ esdhc_of_enable_dma(host);
+ sdhci_be32bs_writel(host, esdhc_proctl, SDHCI_HOST_CONTROL);
+ }
+
+ return ret;
+}
+
+static const struct dev_pm_ops esdhc_pmops = {
+ .suspend = esdhc_of_suspend,
+ .resume = esdhc_of_resume,
};
+#define ESDHC_PMOPS (&esdhc_pmops)
+#else
+#define ESDHC_PMOPS NULL
+#endif
static const struct sdhci_pltfm_data sdhci_esdhc_pdata = {
/*
@@ -374,7 +390,7 @@ static struct platform_driver sdhci_esdhc_driver = {
.name = "sdhci-esdhc",
.owner = THIS_MODULE,
.of_match_table = sdhci_esdhc_of_match,
- .pm = SDHCI_PLTFM_PMOPS,
+ .pm = ESDHC_PMOPS,
},
.probe = sdhci_esdhc_probe,
.remove = sdhci_esdhc_remove,
diff --git a/drivers/mmc/host/sdhci-of-hlwd.c b/drivers/mmc/host/sdhci-of-hlwd.c
index 57c514a81ca5..b341661369a2 100644
--- a/drivers/mmc/host/sdhci-of-hlwd.c
+++ b/drivers/mmc/host/sdhci-of-hlwd.c
@@ -58,6 +58,10 @@ static const struct sdhci_ops sdhci_hlwd_ops = {
.write_l = sdhci_hlwd_writel,
.write_w = sdhci_hlwd_writew,
.write_b = sdhci_hlwd_writeb,
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static const struct sdhci_pltfm_data sdhci_hlwd_pdata = {
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.c b/drivers/mmc/host/sdhci-pci-o2micro.c
index f49666bcc52a..5670e381b0cf 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.c
+++ b/drivers/mmc/host/sdhci-pci-o2micro.c
@@ -21,6 +21,45 @@
#include "sdhci-pci.h"
#include "sdhci-pci-o2micro.h"
+static void o2_pci_set_baseclk(struct sdhci_pci_chip *chip, u32 value)
+{
+ u32 scratch_32;
+ pci_read_config_dword(chip->pdev,
+ O2_SD_PLL_SETTING, &scratch_32);
+
+ scratch_32 &= 0x0000FFFF;
+ scratch_32 |= value;
+
+ pci_write_config_dword(chip->pdev,
+ O2_SD_PLL_SETTING, scratch_32);
+}
+
+static void o2_pci_led_enable(struct sdhci_pci_chip *chip)
+{
+ int ret;
+ u32 scratch_32;
+
+ /* Set led of SD host function enable */
+ ret = pci_read_config_dword(chip->pdev,
+ O2_SD_FUNC_REG0, &scratch_32);
+ if (ret)
+ return;
+
+ scratch_32 &= ~O2_SD_FREG0_LEDOFF;
+ pci_write_config_dword(chip->pdev,
+ O2_SD_FUNC_REG0, scratch_32);
+
+ ret = pci_read_config_dword(chip->pdev,
+ O2_SD_TEST_REG, &scratch_32);
+ if (ret)
+ return;
+
+ scratch_32 |= O2_SD_LED_ENABLE;
+ pci_write_config_dword(chip->pdev,
+ O2_SD_TEST_REG, scratch_32);
+
+}
+
void sdhci_pci_o2_fujin2_pci_init(struct sdhci_pci_chip *chip)
{
u32 scratch_32;
@@ -216,6 +255,40 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
scratch &= 0x7f;
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
+ /* DevId=8520 subId= 0x11 or 0x12 Type Chip support */
+ if (chip->pdev->device == PCI_DEVICE_ID_O2_FUJIN2) {
+ ret = pci_read_config_dword(chip->pdev,
+ O2_SD_FUNC_REG0,
+ &scratch_32);
+ scratch_32 = ((scratch_32 & 0xFF000000) >> 24);
+
+ /* Check Whether subId is 0x11 or 0x12 */
+ if ((scratch_32 == 0x11) || (scratch_32 == 0x12)) {
+ scratch_32 = 0x2c280000;
+
+ /* Set Base Clock to 208MZ */
+ o2_pci_set_baseclk(chip, scratch_32);
+ ret = pci_read_config_dword(chip->pdev,
+ O2_SD_FUNC_REG4,
+ &scratch_32);
+
+ /* Enable Base Clk setting change */
+ scratch_32 |= O2_SD_FREG4_ENABLE_CLK_SET;
+ pci_write_config_dword(chip->pdev,
+ O2_SD_FUNC_REG4,
+ scratch_32);
+
+ /* Set Tuning Window to 4 */
+ pci_write_config_byte(chip->pdev,
+ O2_SD_TUNING_CTRL, 0x44);
+
+ break;
+ }
+ }
+
+ /* Enable 8520 led function */
+ o2_pci_led_enable(chip);
+
/* Set timeout CLK */
ret = pci_read_config_dword(chip->pdev,
O2_SD_CLK_SETTING, &scratch_32);
@@ -276,7 +349,7 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
pci_write_config_byte(chip->pdev, O2_SD_LOCK_WP, scratch);
ret = pci_read_config_dword(chip->pdev,
- O2_SD_FUNC_REG0, &scratch_32);
+ O2_SD_PLL_SETTING, &scratch_32);
if ((scratch_32 & 0xff000000) == 0x01000000) {
scratch_32 &= 0x0000FFFF;
@@ -299,6 +372,9 @@ int sdhci_pci_o2_probe(struct sdhci_pci_chip *chip)
O2_SD_FUNC_REG4, scratch_32);
}
+ /* Set Tuning Windows to 5 */
+ pci_write_config_byte(chip->pdev,
+ O2_SD_TUNING_CTRL, 0x55);
/* Lock WP */
ret = pci_read_config_byte(chip->pdev,
O2_SD_LOCK_WP, &scratch);
diff --git a/drivers/mmc/host/sdhci-pci-o2micro.h b/drivers/mmc/host/sdhci-pci-o2micro.h
index dbec4c933488..f7ffc908d9a0 100644
--- a/drivers/mmc/host/sdhci-pci-o2micro.h
+++ b/drivers/mmc/host/sdhci-pci-o2micro.h
@@ -57,6 +57,9 @@
#define O2_SD_UHS2_L1_CTRL 0x35C
#define O2_SD_FUNC_REG3 0x3E0
#define O2_SD_FUNC_REG4 0x3E4
+#define O2_SD_LED_ENABLE BIT(6)
+#define O2_SD_FREG0_LEDOFF BIT(13)
+#define O2_SD_FREG4_ENABLE_CLK_SET BIT(22)
#define O2_SD_VENDOR_SETTING 0x110
#define O2_SD_VENDOR_SETTING2 0x1C8
diff --git a/drivers/mmc/host/sdhci-pci.c b/drivers/mmc/host/sdhci-pci.c
index fdc612120362..52c42fcc284c 100644
--- a/drivers/mmc/host/sdhci-pci.c
+++ b/drivers/mmc/host/sdhci-pci.c
@@ -1031,7 +1031,7 @@ static int sdhci_pci_enable_dma(struct sdhci_host *host)
return 0;
}
-static int sdhci_pci_bus_width(struct sdhci_host *host, int width)
+static void sdhci_pci_set_bus_width(struct sdhci_host *host, int width)
{
u8 ctrl;
@@ -1052,8 +1052,6 @@ static int sdhci_pci_bus_width(struct sdhci_host *host, int width)
}
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-
- return 0;
}
static void sdhci_pci_gpio_hw_reset(struct sdhci_host *host)
@@ -1080,8 +1078,11 @@ static void sdhci_pci_hw_reset(struct sdhci_host *host)
}
static const struct sdhci_ops sdhci_pci_ops = {
+ .set_clock = sdhci_set_clock,
.enable_dma = sdhci_pci_enable_dma,
- .platform_bus_width = sdhci_pci_bus_width,
+ .set_bus_width = sdhci_pci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
.hw_reset = sdhci_pci_hw_reset,
};
diff --git a/drivers/mmc/host/sdhci-pltfm.c b/drivers/mmc/host/sdhci-pltfm.c
index bef250e95418..7e834fb78f42 100644
--- a/drivers/mmc/host/sdhci-pltfm.c
+++ b/drivers/mmc/host/sdhci-pltfm.c
@@ -45,6 +45,10 @@ unsigned int sdhci_pltfm_clk_get_max_clock(struct sdhci_host *host)
EXPORT_SYMBOL_GPL(sdhci_pltfm_clk_get_max_clock);
static const struct sdhci_ops sdhci_pltfm_ops = {
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
#ifdef CONFIG_OF
diff --git a/drivers/mmc/host/sdhci-pxav2.c b/drivers/mmc/host/sdhci-pxav2.c
index d51e061ec576..3c0f3c0a1cc8 100644
--- a/drivers/mmc/host/sdhci-pxav2.c
+++ b/drivers/mmc/host/sdhci-pxav2.c
@@ -51,11 +51,13 @@
#define MMC_CARD 0x1000
#define MMC_WIDTH 0x0100
-static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask)
+static void pxav2_reset(struct sdhci_host *host, u8 mask)
{
struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+ sdhci_reset(host, mask);
+
if (mask == SDHCI_RESET_ALL) {
u16 tmp = 0;
@@ -88,7 +90,7 @@ static void pxav2_set_private_registers(struct sdhci_host *host, u8 mask)
}
}
-static int pxav2_mmc_set_width(struct sdhci_host *host, int width)
+static void pxav2_mmc_set_bus_width(struct sdhci_host *host, int width)
{
u8 ctrl;
u16 tmp;
@@ -107,14 +109,14 @@ static int pxav2_mmc_set_width(struct sdhci_host *host, int width)
}
writew(tmp, host->ioaddr + SD_CE_ATA_2);
writeb(ctrl, host->ioaddr + SDHCI_HOST_CONTROL);
-
- return 0;
}
static const struct sdhci_ops pxav2_sdhci_ops = {
+ .set_clock = sdhci_set_clock,
.get_max_clock = sdhci_pltfm_clk_get_max_clock,
- .platform_reset_exit = pxav2_set_private_registers,
- .platform_bus_width = pxav2_mmc_set_width,
+ .set_bus_width = pxav2_mmc_set_bus_width,
+ .reset = pxav2_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
#ifdef CONFIG_OF
diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index 2fd73b38c303..f4f128947561 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -112,11 +112,13 @@ static int mv_conf_mbus_windows(struct platform_device *pdev,
return 0;
}
-static void pxav3_set_private_registers(struct sdhci_host *host, u8 mask)
+static void pxav3_reset(struct sdhci_host *host, u8 mask)
{
struct platform_device *pdev = to_platform_device(mmc_dev(host->mmc));
struct sdhci_pxa_platdata *pdata = pdev->dev.platform_data;
+ sdhci_reset(host, mask);
+
if (mask == SDHCI_RESET_ALL) {
/*
* tune timing of read data/command when crc error happen
@@ -184,7 +186,7 @@ static void pxav3_gen_init_74_clocks(struct sdhci_host *host, u8 power_mode)
pxa->power_mode = power_mode;
}
-static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
+static void pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
{
u16 ctrl_2;
@@ -218,15 +220,16 @@ static int pxav3_set_uhs_signaling(struct sdhci_host *host, unsigned int uhs)
dev_dbg(mmc_dev(host->mmc),
"%s uhs = %d, ctrl_2 = %04X\n",
__func__, uhs, ctrl_2);
-
- return 0;
}
static const struct sdhci_ops pxav3_sdhci_ops = {
- .platform_reset_exit = pxav3_set_private_registers,
+ .set_clock = sdhci_set_clock,
.set_uhs_signaling = pxav3_set_uhs_signaling,
.platform_send_init_74_clocks = pxav3_gen_init_74_clocks,
.get_max_clock = sdhci_pltfm_clk_get_max_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = pxav3_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static struct sdhci_pltfm_data sdhci_pxav3_pdata = {
diff --git a/drivers/mmc/host/sdhci-s3c.c b/drivers/mmc/host/sdhci-s3c.c
index d61eb5a70833..fa5954a05449 100644
--- a/drivers/mmc/host/sdhci-s3c.c
+++ b/drivers/mmc/host/sdhci-s3c.c
@@ -33,9 +33,6 @@
#define MAX_BUS_CLK (4)
-/* Number of gpio's used is max data bus width + command and clock lines */
-#define NUM_GPIOS(x) (x + 2)
-
/**
* struct sdhci_s3c - S3C SDHCI instance
* @host: The SDHCI host created
@@ -58,6 +55,8 @@ struct sdhci_s3c {
struct clk *clk_io;
struct clk *clk_bus[MAX_BUS_CLK];
unsigned long clk_rates[MAX_BUS_CLK];
+
+ bool no_divider;
};
/**
@@ -70,6 +69,7 @@ struct sdhci_s3c {
*/
struct sdhci_s3c_drv_data {
unsigned int sdhci_quirks;
+ bool no_divider;
};
static inline struct sdhci_s3c *to_s3c(struct sdhci_host *host)
@@ -119,7 +119,7 @@ static unsigned int sdhci_s3c_consider_clock(struct sdhci_s3c *ourhost,
* If controller uses a non-standard clock division, find the best clock
* speed possible with selected clock source and skip the division.
*/
- if (ourhost->host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+ if (ourhost->no_divider) {
rate = clk_round_rate(clksrc, wanted);
return wanted - rate;
}
@@ -161,9 +161,13 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
int src;
u32 ctrl;
+ host->mmc->actual_clock = 0;
+
/* don't bother if the clock is going off. */
- if (clock == 0)
+ if (clock == 0) {
+ sdhci_set_clock(host, clock);
return;
+ }
for (src = 0; src < MAX_BUS_CLK; src++) {
delta = sdhci_s3c_consider_clock(ourhost, src, clock);
@@ -215,6 +219,8 @@ static void sdhci_s3c_set_clock(struct sdhci_host *host, unsigned int clock)
if (clock < 25 * 1000000)
ctrl |= (S3C_SDHCI_CTRL3_FCSEL3 | S3C_SDHCI_CTRL3_FCSEL2);
writel(ctrl, host->ioaddr + S3C_SDHCI_CONTROL3);
+
+ sdhci_set_clock(host, clock);
}
/**
@@ -295,10 +301,11 @@ static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
unsigned long timeout;
u16 clk = 0;
+ host->mmc->actual_clock = 0;
+
/* If the clock is going off, set to 0 at clock control register */
if (clock == 0) {
sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
- host->clock = clock;
return;
}
@@ -306,8 +313,6 @@ static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
clk_set_rate(ourhost->clk_bus[ourhost->cur_clk], clock);
- host->clock = clock;
-
clk = SDHCI_CLOCK_INT_EN;
sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
@@ -329,14 +334,14 @@ static void sdhci_cmu_set_clock(struct sdhci_host *host, unsigned int clock)
}
/**
- * sdhci_s3c_platform_bus_width - support 8bit buswidth
+ * sdhci_s3c_set_bus_width - support 8bit buswidth
* @host: The SDHCI host being queried
* @width: MMC_BUS_WIDTH_ macro for the bus width being requested
*
* We have 8-bit width support but is not a v3 controller.
* So we add platform_bus_width() and support 8bit width.
*/
-static int sdhci_s3c_platform_bus_width(struct sdhci_host *host, int width)
+static void sdhci_s3c_set_bus_width(struct sdhci_host *host, int width)
{
u8 ctrl;
@@ -358,93 +363,23 @@ static int sdhci_s3c_platform_bus_width(struct sdhci_host *host, int width)
}
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
-
- return 0;
}
static struct sdhci_ops sdhci_s3c_ops = {
.get_max_clock = sdhci_s3c_get_max_clk,
.set_clock = sdhci_s3c_set_clock,
.get_min_clock = sdhci_s3c_get_min_clock,
- .platform_bus_width = sdhci_s3c_platform_bus_width,
+ .set_bus_width = sdhci_s3c_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
-static void sdhci_s3c_notify_change(struct platform_device *dev, int state)
-{
- struct sdhci_host *host = platform_get_drvdata(dev);
-#ifdef CONFIG_PM_RUNTIME
- struct sdhci_s3c *sc = sdhci_priv(host);
-#endif
- unsigned long flags;
-
- if (host) {
- spin_lock_irqsave(&host->lock, flags);
- if (state) {
- dev_dbg(&dev->dev, "card inserted.\n");
-#ifdef CONFIG_PM_RUNTIME
- clk_prepare_enable(sc->clk_io);
-#endif
- host->flags &= ~SDHCI_DEVICE_DEAD;
- host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
- } else {
- dev_dbg(&dev->dev, "card removed.\n");
- host->flags |= SDHCI_DEVICE_DEAD;
- host->quirks &= ~SDHCI_QUIRK_BROKEN_CARD_DETECTION;
-#ifdef CONFIG_PM_RUNTIME
- clk_disable_unprepare(sc->clk_io);
-#endif
- }
- tasklet_schedule(&host->card_tasklet);
- spin_unlock_irqrestore(&host->lock, flags);
- }
-}
-
-static irqreturn_t sdhci_s3c_gpio_card_detect_thread(int irq, void *dev_id)
-{
- struct sdhci_s3c *sc = dev_id;
- int status = gpio_get_value(sc->ext_cd_gpio);
- if (sc->pdata->ext_cd_gpio_invert)
- status = !status;
- sdhci_s3c_notify_change(sc->pdev, status);
- return IRQ_HANDLED;
-}
-
-static void sdhci_s3c_setup_card_detect_gpio(struct sdhci_s3c *sc)
-{
- struct s3c_sdhci_platdata *pdata = sc->pdata;
- struct device *dev = &sc->pdev->dev;
-
- if (devm_gpio_request(dev, pdata->ext_cd_gpio, "SDHCI EXT CD") == 0) {
- sc->ext_cd_gpio = pdata->ext_cd_gpio;
- sc->ext_cd_irq = gpio_to_irq(pdata->ext_cd_gpio);
- if (sc->ext_cd_irq &&
- request_threaded_irq(sc->ext_cd_irq, NULL,
- sdhci_s3c_gpio_card_detect_thread,
- IRQF_TRIGGER_RISING |
- IRQF_TRIGGER_FALLING |
- IRQF_ONESHOT,
- dev_name(dev), sc) == 0) {
- int status = gpio_get_value(sc->ext_cd_gpio);
- if (pdata->ext_cd_gpio_invert)
- status = !status;
- sdhci_s3c_notify_change(sc->pdev, status);
- } else {
- dev_warn(dev, "cannot request irq for card detect\n");
- sc->ext_cd_irq = 0;
- }
- } else {
- dev_err(dev, "cannot request gpio for card detect\n");
- }
-}
-
#ifdef CONFIG_OF
static int sdhci_s3c_parse_dt(struct device *dev,
struct sdhci_host *host, struct s3c_sdhci_platdata *pdata)
{
struct device_node *node = dev->of_node;
- struct sdhci_s3c *ourhost = to_s3c(host);
u32 max_width;
- int gpio;
/* if the bus-width property is not specified, assume width as 1 */
if (of_property_read_u32(node, "bus-width", &max_width))
@@ -462,18 +397,8 @@ static int sdhci_s3c_parse_dt(struct device *dev,
return 0;
}
- gpio = of_get_named_gpio(node, "cd-gpios", 0);
- if (gpio_is_valid(gpio)) {
- pdata->cd_type = S3C_SDHCI_CD_GPIO;
- pdata->ext_cd_gpio = gpio;
- ourhost->ext_cd_gpio = -1;
- if (of_get_property(node, "cd-inverted", NULL))
- pdata->ext_cd_gpio_invert = 1;
+ if (of_get_named_gpio(node, "cd-gpios", 0))
return 0;
- } else if (gpio != -ENOENT) {
- dev_err(dev, "invalid card detect gpio specified\n");
- return -EINVAL;
- }
/* assuming internal card detect that will be configured by pinctrl */
pdata->cd_type = S3C_SDHCI_CD_INTERNAL;
@@ -606,8 +531,10 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
/* Setup quirks for the controller */
host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
host->quirks |= SDHCI_QUIRK_NO_HISPD_BIT;
- if (drv_data)
+ if (drv_data) {
host->quirks |= drv_data->sdhci_quirks;
+ sc->no_divider = drv_data->no_divider;
+ }
#ifndef CONFIG_MMC_SDHCI_S3C_DMA
@@ -656,7 +583,7 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
* If controller does not have internal clock divider,
* we can use overriding functions instead of default.
*/
- if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK) {
+ if (sc->no_divider) {
sdhci_s3c_ops.set_clock = sdhci_cmu_set_clock;
sdhci_s3c_ops.get_min_clock = sdhci_cmu_get_min_clock;
sdhci_s3c_ops.get_max_clock = sdhci_cmu_get_max_clock;
@@ -674,6 +601,8 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
pm_runtime_use_autosuspend(&pdev->dev);
pm_suspend_ignore_children(&pdev->dev, 1);
+ mmc_of_parse(host->mmc);
+
ret = sdhci_add_host(host);
if (ret) {
dev_err(dev, "sdhci_add_host() failed\n");
@@ -682,15 +611,6 @@ static int sdhci_s3c_probe(struct platform_device *pdev)
goto err_req_regs;
}
- /* The following two methods of card detection might call
- sdhci_s3c_notify_change() immediately, so they can be called
- only after sdhci_add_host(). Setup errors are ignored. */
- if (pdata->cd_type == S3C_SDHCI_CD_EXTERNAL && pdata->ext_cd_init)
- pdata->ext_cd_init(&sdhci_s3c_notify_change);
- if (pdata->cd_type == S3C_SDHCI_CD_GPIO &&
- gpio_is_valid(pdata->ext_cd_gpio))
- sdhci_s3c_setup_card_detect_gpio(sc);
-
#ifdef CONFIG_PM_RUNTIME
if (pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
clk_disable_unprepare(sc->clk_io);
@@ -711,16 +631,12 @@ static int sdhci_s3c_remove(struct platform_device *pdev)
{
struct sdhci_host *host = platform_get_drvdata(pdev);
struct sdhci_s3c *sc = sdhci_priv(host);
- struct s3c_sdhci_platdata *pdata = sc->pdata;
-
- if (pdata->cd_type == S3C_SDHCI_CD_EXTERNAL && pdata->ext_cd_cleanup)
- pdata->ext_cd_cleanup(&sdhci_s3c_notify_change);
if (sc->ext_cd_irq)
free_irq(sc->ext_cd_irq, sc);
#ifdef CONFIG_PM_RUNTIME
- if (pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
+ if (sc->pdata->cd_type != S3C_SDHCI_CD_INTERNAL)
clk_prepare_enable(sc->clk_io);
#endif
sdhci_remove_host(host, 1);
@@ -797,7 +713,7 @@ static const struct dev_pm_ops sdhci_s3c_pmops = {
#if defined(CONFIG_CPU_EXYNOS4210) || defined(CONFIG_SOC_EXYNOS4212)
static struct sdhci_s3c_drv_data exynos4_sdhci_drv_data = {
- .sdhci_quirks = SDHCI_QUIRK_NONSTANDARD_CLOCK,
+ .no_divider = true,
};
#define EXYNOS4_SDHCI_DRV_DATA ((kernel_ulong_t)&exynos4_sdhci_drv_data)
#else
diff --git a/drivers/mmc/host/sdhci-sirf.c b/drivers/mmc/host/sdhci-sirf.c
index 696122c1b468..17004531d089 100644
--- a/drivers/mmc/host/sdhci-sirf.c
+++ b/drivers/mmc/host/sdhci-sirf.c
@@ -28,7 +28,11 @@ static unsigned int sdhci_sirf_get_max_clk(struct sdhci_host *host)
}
static struct sdhci_ops sdhci_sirf_ops = {
+ .set_clock = sdhci_set_clock,
.get_max_clock = sdhci_sirf_get_max_clk,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
static struct sdhci_pltfm_data sdhci_sirf_pdata = {
diff --git a/drivers/mmc/host/sdhci-spear.c b/drivers/mmc/host/sdhci-spear.c
index 0316dec3f006..9d535c7336ef 100644
--- a/drivers/mmc/host/sdhci-spear.c
+++ b/drivers/mmc/host/sdhci-spear.c
@@ -38,7 +38,10 @@ struct spear_sdhci {
/* sdhci ops */
static const struct sdhci_ops sdhci_pltfm_ops = {
- /* Nothing to do for now. */
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = sdhci_set_bus_width,
+ .reset = sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
};
#ifdef CONFIG_OF
diff --git a/drivers/mmc/host/sdhci-tegra.c b/drivers/mmc/host/sdhci-tegra.c
index a835898a68dd..d93a063a36f3 100644
--- a/drivers/mmc/host/sdhci-tegra.c
+++ b/drivers/mmc/host/sdhci-tegra.c
@@ -32,11 +32,17 @@
/* Tegra SDHOST controller vendor register definitions */
#define SDHCI_TEGRA_VENDOR_MISC_CTRL 0x120
+#define SDHCI_MISC_CTRL_ENABLE_SDR104 0x8
+#define SDHCI_MISC_CTRL_ENABLE_SDR50 0x10
#define SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300 0x20
+#define SDHCI_MISC_CTRL_ENABLE_DDR50 0x200
#define NVQUIRK_FORCE_SDHCI_SPEC_200 BIT(0)
#define NVQUIRK_ENABLE_BLOCK_GAP_DET BIT(1)
#define NVQUIRK_ENABLE_SDHCI_SPEC_300 BIT(2)
+#define NVQUIRK_DISABLE_SDR50 BIT(3)
+#define NVQUIRK_DISABLE_SDR104 BIT(4)
+#define NVQUIRK_DISABLE_DDR50 BIT(5)
struct sdhci_tegra_soc_data {
const struct sdhci_pltfm_data *pdata;
@@ -48,19 +54,6 @@ struct sdhci_tegra {
int power_gpio;
};
-static u32 tegra_sdhci_readl(struct sdhci_host *host, int reg)
-{
- u32 val;
-
- if (unlikely(reg == SDHCI_PRESENT_STATE)) {
- /* Use wp_gpio here instead? */
- val = readl(host->ioaddr + reg);
- return val | SDHCI_WRITE_PROTECT;
- }
-
- return readl(host->ioaddr + reg);
-}
-
static u16 tegra_sdhci_readw(struct sdhci_host *host, int reg)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
@@ -108,26 +101,33 @@ static unsigned int tegra_sdhci_get_ro(struct sdhci_host *host)
return mmc_gpio_get_ro(host->mmc);
}
-static void tegra_sdhci_reset_exit(struct sdhci_host *host, u8 mask)
+static void tegra_sdhci_reset(struct sdhci_host *host, u8 mask)
{
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_tegra *tegra_host = pltfm_host->priv;
const struct sdhci_tegra_soc_data *soc_data = tegra_host->soc_data;
+ u32 misc_ctrl;
+
+ sdhci_reset(host, mask);
if (!(mask & SDHCI_RESET_ALL))
return;
+ misc_ctrl = sdhci_readw(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
/* Erratum: Enable SDHCI spec v3.00 support */
- if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300) {
- u32 misc_ctrl;
-
- misc_ctrl = sdhci_readb(host, SDHCI_TEGRA_VENDOR_MISC_CTRL);
+ if (soc_data->nvquirks & NVQUIRK_ENABLE_SDHCI_SPEC_300)
misc_ctrl |= SDHCI_MISC_CTRL_ENABLE_SDHCI_SPEC_300;
- sdhci_writeb(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
- }
+ /* Don't advertise UHS modes which aren't supported yet */
+ if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR50)
+ misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR50;
+ if (soc_data->nvquirks & NVQUIRK_DISABLE_DDR50)
+ misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_DDR50;
+ if (soc_data->nvquirks & NVQUIRK_DISABLE_SDR104)
+ misc_ctrl &= ~SDHCI_MISC_CTRL_ENABLE_SDR104;
+ sdhci_writew(host, misc_ctrl, SDHCI_TEGRA_VENDOR_MISC_CTRL);
}
-static int tegra_sdhci_buswidth(struct sdhci_host *host, int bus_width)
+static void tegra_sdhci_set_bus_width(struct sdhci_host *host, int bus_width)
{
u32 ctrl;
@@ -144,23 +144,25 @@ static int tegra_sdhci_buswidth(struct sdhci_host *host, int bus_width)
ctrl &= ~SDHCI_CTRL_4BITBUS;
}
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
- return 0;
}
static const struct sdhci_ops tegra_sdhci_ops = {
.get_ro = tegra_sdhci_get_ro,
- .read_l = tegra_sdhci_readl,
.read_w = tegra_sdhci_readw,
.write_l = tegra_sdhci_writel,
- .platform_bus_width = tegra_sdhci_buswidth,
- .platform_reset_exit = tegra_sdhci_reset_exit,
+ .set_clock = sdhci_set_clock,
+ .set_bus_width = tegra_sdhci_set_bus_width,
+ .reset = tegra_sdhci_reset,
+ .set_uhs_signaling = sdhci_set_uhs_signaling,
+ .get_max_clock = sdhci_pltfm_clk_get_max_clock,
};
static const struct sdhci_pltfm_data sdhci_tegra20_pdata = {
.quirks = SDHCI_QUIRK_BROKEN_TIMEOUT_VAL |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
- SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+ SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.ops = &tegra_sdhci_ops,
};
@@ -175,13 +177,16 @@ static const struct sdhci_pltfm_data sdhci_tegra30_pdata = {
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
- SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+ SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.ops = &tegra_sdhci_ops,
};
static struct sdhci_tegra_soc_data soc_data_tegra30 = {
.pdata = &sdhci_tegra30_pdata,
- .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300,
+ .nvquirks = NVQUIRK_ENABLE_SDHCI_SPEC_300 |
+ NVQUIRK_DISABLE_SDR50 |
+ NVQUIRK_DISABLE_SDR104,
};
static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
@@ -189,12 +194,16 @@ static const struct sdhci_pltfm_data sdhci_tegra114_pdata = {
SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK |
SDHCI_QUIRK_SINGLE_POWER_WRITE |
SDHCI_QUIRK_NO_HISPD_BIT |
- SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC,
+ SDHCI_QUIRK_BROKEN_ADMA_ZEROLEN_DESC |
+ SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN,
.ops = &tegra_sdhci_ops,
};
static struct sdhci_tegra_soc_data soc_data_tegra114 = {
.pdata = &sdhci_tegra114_pdata,
+ .nvquirks = NVQUIRK_DISABLE_SDR50 |
+ NVQUIRK_DISABLE_DDR50 |
+ NVQUIRK_DISABLE_SDR104,
};
static const struct of_device_id sdhci_tegra_dt_match[] = {
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 9a79fc4b60ca..47055f3f01b8 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -44,6 +44,8 @@
#define MAX_TUNING_LOOP 40
+#define ADMA_SIZE ((128 * 2 + 1) * 4)
+
static unsigned int debug_quirks = 0;
static unsigned int debug_quirks2;
@@ -131,43 +133,26 @@ static void sdhci_dumpregs(struct sdhci_host *host)
* *
\*****************************************************************************/
-static void sdhci_clear_set_irqs(struct sdhci_host *host, u32 clear, u32 set)
-{
- u32 ier;
-
- ier = sdhci_readl(host, SDHCI_INT_ENABLE);
- ier &= ~clear;
- ier |= set;
- sdhci_writel(host, ier, SDHCI_INT_ENABLE);
- sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
-}
-
-static void sdhci_unmask_irqs(struct sdhci_host *host, u32 irqs)
-{
- sdhci_clear_set_irqs(host, 0, irqs);
-}
-
-static void sdhci_mask_irqs(struct sdhci_host *host, u32 irqs)
-{
- sdhci_clear_set_irqs(host, irqs, 0);
-}
-
static void sdhci_set_card_detection(struct sdhci_host *host, bool enable)
{
- u32 present, irqs;
+ u32 present;
if ((host->quirks & SDHCI_QUIRK_BROKEN_CARD_DETECTION) ||
(host->mmc->caps & MMC_CAP_NONREMOVABLE))
return;
- present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
- SDHCI_CARD_PRESENT;
- irqs = present ? SDHCI_INT_CARD_REMOVE : SDHCI_INT_CARD_INSERT;
+ if (enable) {
+ present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+ SDHCI_CARD_PRESENT;
- if (enable)
- sdhci_unmask_irqs(host, irqs);
- else
- sdhci_mask_irqs(host, irqs);
+ host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+ SDHCI_INT_CARD_INSERT;
+ } else {
+ host->ier &= ~(SDHCI_INT_CARD_REMOVE | SDHCI_INT_CARD_INSERT);
+ }
+
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
}
static void sdhci_enable_card_detection(struct sdhci_host *host)
@@ -180,22 +165,9 @@ static void sdhci_disable_card_detection(struct sdhci_host *host)
sdhci_set_card_detection(host, false);
}
-static void sdhci_reset(struct sdhci_host *host, u8 mask)
+void sdhci_reset(struct sdhci_host *host, u8 mask)
{
unsigned long timeout;
- u32 uninitialized_var(ier);
-
- if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
- if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
- SDHCI_CARD_PRESENT))
- return;
- }
-
- if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
- ier = sdhci_readl(host, SDHCI_INT_ENABLE);
-
- if (host->ops->platform_reset_enter)
- host->ops->platform_reset_enter(host, mask);
sdhci_writeb(host, mask, SDHCI_SOFTWARE_RESET);
@@ -220,16 +192,27 @@ static void sdhci_reset(struct sdhci_host *host, u8 mask)
timeout--;
mdelay(1);
}
+}
+EXPORT_SYMBOL_GPL(sdhci_reset);
+
+static void sdhci_do_reset(struct sdhci_host *host, u8 mask)
+{
+ if (host->quirks & SDHCI_QUIRK_NO_CARD_NO_RESET) {
+ if (!(sdhci_readl(host, SDHCI_PRESENT_STATE) &
+ SDHCI_CARD_PRESENT))
+ return;
+ }
- if (host->ops->platform_reset_exit)
- host->ops->platform_reset_exit(host, mask);
+ host->ops->reset(host, mask);
- if (host->quirks & SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET)
- sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK, ier);
+ if (mask & SDHCI_RESET_ALL) {
+ if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
+ if (host->ops->enable_dma)
+ host->ops->enable_dma(host);
+ }
- if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
- if ((host->ops->enable_dma) && (mask & SDHCI_RESET_ALL))
- host->ops->enable_dma(host);
+ /* Resetting the controller clears many */
+ host->preset_enabled = false;
}
}
@@ -238,15 +221,18 @@ static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios);
static void sdhci_init(struct sdhci_host *host, int soft)
{
if (soft)
- sdhci_reset(host, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
+ sdhci_do_reset(host, SDHCI_RESET_CMD|SDHCI_RESET_DATA);
else
- sdhci_reset(host, SDHCI_RESET_ALL);
+ sdhci_do_reset(host, SDHCI_RESET_ALL);
- sdhci_clear_set_irqs(host, SDHCI_INT_ALL_MASK,
- SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
- SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_INDEX |
- SDHCI_INT_END_BIT | SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
- SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE);
+ host->ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
+ SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
+ SDHCI_INT_INDEX | SDHCI_INT_END_BIT | SDHCI_INT_CRC |
+ SDHCI_INT_TIMEOUT | SDHCI_INT_DATA_END |
+ SDHCI_INT_RESPONSE;
+
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
if (soft) {
/* force clock reconfiguration */
@@ -502,11 +488,6 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
else
direction = DMA_TO_DEVICE;
- /*
- * The ADMA descriptor table is mapped further down as we
- * need to fill it with data first.
- */
-
host->align_addr = dma_map_single(mmc_dev(host->mmc),
host->align_buffer, 128 * 4, direction);
if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
@@ -567,7 +548,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
* If this triggers then we have a calculation bug
* somewhere. :/
*/
- WARN_ON((desc - host->adma_desc) > (128 * 2 + 1) * 4);
+ WARN_ON((desc - host->adma_desc) > ADMA_SIZE);
}
if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
@@ -595,17 +576,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
host->align_addr, 128 * 4, direction);
}
- host->adma_addr = dma_map_single(mmc_dev(host->mmc),
- host->adma_desc, (128 * 2 + 1) * 4, DMA_TO_DEVICE);
- if (dma_mapping_error(mmc_dev(host->mmc), host->adma_addr))
- goto unmap_entries;
- BUG_ON(host->adma_addr & 0x3);
-
return 0;
-unmap_entries:
- dma_unmap_sg(mmc_dev(host->mmc), data->sg,
- data->sg_len, direction);
unmap_align:
dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
128 * 4, direction);
@@ -623,19 +595,25 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
u8 *align;
char *buffer;
unsigned long flags;
+ bool has_unaligned;
if (data->flags & MMC_DATA_READ)
direction = DMA_FROM_DEVICE;
else
direction = DMA_TO_DEVICE;
- dma_unmap_single(mmc_dev(host->mmc), host->adma_addr,
- (128 * 2 + 1) * 4, DMA_TO_DEVICE);
-
dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
128 * 4, direction);
- if (data->flags & MMC_DATA_READ) {
+ /* Do a quick scan of the SG list for any unaligned mappings */
+ has_unaligned = false;
+ for_each_sg(data->sg, sg, host->sg_count, i)
+ if (sg_dma_address(sg) & 3) {
+ has_unaligned = true;
+ break;
+ }
+
+ if (has_unaligned && data->flags & MMC_DATA_READ) {
dma_sync_sg_for_cpu(mmc_dev(host->mmc), data->sg,
data->sg_len, direction);
@@ -721,9 +699,12 @@ static void sdhci_set_transfer_irqs(struct sdhci_host *host)
u32 dma_irqs = SDHCI_INT_DMA_END | SDHCI_INT_ADMA_ERROR;
if (host->flags & SDHCI_REQ_USE_DMA)
- sdhci_clear_set_irqs(host, pio_irqs, dma_irqs);
+ host->ier = (host->ier & ~pio_irqs) | dma_irqs;
else
- sdhci_clear_set_irqs(host, dma_irqs, pio_irqs);
+ host->ier = (host->ier & ~dma_irqs) | pio_irqs;
+
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
}
static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
@@ -976,8 +957,8 @@ static void sdhci_finish_data(struct sdhci_host *host)
* upon error conditions.
*/
if (data->error) {
- sdhci_reset(host, SDHCI_RESET_CMD);
- sdhci_reset(host, SDHCI_RESET_DATA);
+ sdhci_do_reset(host, SDHCI_RESET_CMD);
+ sdhci_do_reset(host, SDHCI_RESET_DATA);
}
sdhci_send_command(host, data->stop);
@@ -1107,24 +1088,23 @@ static void sdhci_finish_command(struct sdhci_host *host)
static u16 sdhci_get_preset_value(struct sdhci_host *host)
{
- u16 ctrl, preset = 0;
-
- ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+ u16 preset = 0;
- switch (ctrl & SDHCI_CTRL_UHS_MASK) {
- case SDHCI_CTRL_UHS_SDR12:
+ switch (host->timing) {
+ case MMC_TIMING_UHS_SDR12:
preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR12);
break;
- case SDHCI_CTRL_UHS_SDR25:
+ case MMC_TIMING_UHS_SDR25:
preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR25);
break;
- case SDHCI_CTRL_UHS_SDR50:
+ case MMC_TIMING_UHS_SDR50:
preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR50);
break;
- case SDHCI_CTRL_UHS_SDR104:
+ case MMC_TIMING_UHS_SDR104:
+ case MMC_TIMING_MMC_HS200:
preset = sdhci_readw(host, SDHCI_PRESET_FOR_SDR104);
break;
- case SDHCI_CTRL_UHS_DDR50:
+ case MMC_TIMING_UHS_DDR50:
preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
break;
default:
@@ -1136,32 +1116,22 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host)
return preset;
}
-static void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock)
{
int div = 0; /* Initialized for compiler warning */
int real_div = div, clk_mul = 1;
u16 clk = 0;
unsigned long timeout;
- if (clock && clock == host->clock)
- return;
-
host->mmc->actual_clock = 0;
- if (host->ops->set_clock) {
- host->ops->set_clock(host, clock);
- if (host->quirks & SDHCI_QUIRK_NONSTANDARD_CLOCK)
- return;
- }
-
sdhci_writew(host, 0, SDHCI_CLOCK_CONTROL);
if (clock == 0)
- goto out;
+ return;
if (host->version >= SDHCI_SPEC_300) {
- if (sdhci_readw(host, SDHCI_HOST_CONTROL2) &
- SDHCI_CTRL_PRESET_VAL_ENABLE) {
+ if (host->preset_enabled) {
u16 pre_val;
clk = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
@@ -1247,26 +1217,16 @@ clock_set:
clk |= SDHCI_CLOCK_CARD_EN;
sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
-
-out:
- host->clock = clock;
}
+EXPORT_SYMBOL_GPL(sdhci_set_clock);
-static inline void sdhci_update_clock(struct sdhci_host *host)
-{
- unsigned int clock;
-
- clock = host->clock;
- host->clock = 0;
- sdhci_set_clock(host, clock);
-}
-
-static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
+static void sdhci_set_power(struct sdhci_host *host, unsigned char mode,
+ unsigned short vdd)
{
u8 pwr = 0;
- if (power != (unsigned short)-1) {
- switch (1 << power) {
+ if (mode != MMC_POWER_OFF) {
+ switch (1 << vdd) {
case MMC_VDD_165_195:
pwr = SDHCI_POWER_180;
break;
@@ -1284,7 +1244,7 @@ static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
}
if (host->pwr == pwr)
- return -1;
+ return;
host->pwr = pwr;
@@ -1292,38 +1252,43 @@ static int sdhci_set_power(struct sdhci_host *host, unsigned short power)
sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
sdhci_runtime_pm_bus_off(host);
- return 0;
- }
-
- /*
- * Spec says that we should clear the power reg before setting
- * a new value. Some controllers don't seem to like this though.
- */
- if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
- sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
+ vdd = 0;
+ } else {
+ /*
+ * Spec says that we should clear the power reg before setting
+ * a new value. Some controllers don't seem to like this though.
+ */
+ if (!(host->quirks & SDHCI_QUIRK_SINGLE_POWER_WRITE))
+ sdhci_writeb(host, 0, SDHCI_POWER_CONTROL);
- /*
- * At least the Marvell CaFe chip gets confused if we set the voltage
- * and set turn on power at the same time, so set the voltage first.
- */
- if (host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)
- sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+ /*
+ * At least the Marvell CaFe chip gets confused if we set the
+ * voltage and set turn on power at the same time, so set the
+ * voltage first.
+ */
+ if (host->quirks & SDHCI_QUIRK_NO_SIMULT_VDD_AND_POWER)
+ sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
- pwr |= SDHCI_POWER_ON;
+ pwr |= SDHCI_POWER_ON;
- sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
+ sdhci_writeb(host, pwr, SDHCI_POWER_CONTROL);
- if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
- sdhci_runtime_pm_bus_on(host);
+ if (host->quirks2 & SDHCI_QUIRK2_CARD_ON_NEEDS_BUS_ON)
+ sdhci_runtime_pm_bus_on(host);
- /*
- * Some controllers need an extra 10ms delay of 10ms before they
- * can apply clock after applying power
- */
- if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
- mdelay(10);
+ /*
+ * Some controllers need an extra 10ms delay of 10ms before
+ * they can apply clock after applying power
+ */
+ if (host->quirks & SDHCI_QUIRK_DELAY_AFTER_POWER)
+ mdelay(10);
+ }
- return power;
+ if (host->vmmc) {
+ spin_unlock_irq(&host->lock);
+ mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd);
+ spin_lock_irq(&host->lock);
+ }
}
/*****************************************************************************\
@@ -1427,10 +1392,53 @@ static void sdhci_request(struct mmc_host *mmc, struct mmc_request *mrq)
spin_unlock_irqrestore(&host->lock, flags);
}
+void sdhci_set_bus_width(struct sdhci_host *host, int width)
+{
+ u8 ctrl;
+
+ ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+ if (width == MMC_BUS_WIDTH_8) {
+ ctrl &= ~SDHCI_CTRL_4BITBUS;
+ if (host->version >= SDHCI_SPEC_300)
+ ctrl |= SDHCI_CTRL_8BITBUS;
+ } else {
+ if (host->version >= SDHCI_SPEC_300)
+ ctrl &= ~SDHCI_CTRL_8BITBUS;
+ if (width == MMC_BUS_WIDTH_4)
+ ctrl |= SDHCI_CTRL_4BITBUS;
+ else
+ ctrl &= ~SDHCI_CTRL_4BITBUS;
+ }
+ sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+}
+EXPORT_SYMBOL_GPL(sdhci_set_bus_width);
+
+void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
+{
+ u16 ctrl_2;
+
+ ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+ /* Select Bus Speed Mode for host */
+ ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
+ if ((timing == MMC_TIMING_MMC_HS200) ||
+ (timing == MMC_TIMING_UHS_SDR104))
+ ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
+ else if (timing == MMC_TIMING_UHS_SDR12)
+ ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
+ else if (timing == MMC_TIMING_UHS_SDR25)
+ ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
+ else if (timing == MMC_TIMING_UHS_SDR50)
+ ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
+ else if ((timing == MMC_TIMING_UHS_DDR50) ||
+ (timing == MMC_TIMING_MMC_DDR52))
+ ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+ sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
+}
+EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
+
static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
{
unsigned long flags;
- int vdd_bit = -1;
u8 ctrl;
spin_lock_irqsave(&host->lock, flags);
@@ -1456,45 +1464,17 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN))
sdhci_enable_preset_value(host, false);
- sdhci_set_clock(host, ios->clock);
-
- if (ios->power_mode == MMC_POWER_OFF)
- vdd_bit = sdhci_set_power(host, -1);
- else
- vdd_bit = sdhci_set_power(host, ios->vdd);
-
- if (host->vmmc && vdd_bit != -1) {
- spin_unlock_irqrestore(&host->lock, flags);
- mmc_regulator_set_ocr(host->mmc, host->vmmc, vdd_bit);
- spin_lock_irqsave(&host->lock, flags);
+ if (!ios->clock || ios->clock != host->clock) {
+ host->ops->set_clock(host, ios->clock);
+ host->clock = ios->clock;
}
+ sdhci_set_power(host, ios->power_mode, ios->vdd);
+
if (host->ops->platform_send_init_74_clocks)
host->ops->platform_send_init_74_clocks(host, ios->power_mode);
- /*
- * If your platform has 8-bit width support but is not a v3 controller,
- * or if it requires special setup code, you should implement that in
- * platform_bus_width().
- */
- if (host->ops->platform_bus_width) {
- host->ops->platform_bus_width(host, ios->bus_width);
- } else {
- ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
- if (ios->bus_width == MMC_BUS_WIDTH_8) {
- ctrl &= ~SDHCI_CTRL_4BITBUS;
- if (host->version >= SDHCI_SPEC_300)
- ctrl |= SDHCI_CTRL_8BITBUS;
- } else {
- if (host->version >= SDHCI_SPEC_300)
- ctrl &= ~SDHCI_CTRL_8BITBUS;
- if (ios->bus_width == MMC_BUS_WIDTH_4)
- ctrl |= SDHCI_CTRL_4BITBUS;
- else
- ctrl &= ~SDHCI_CTRL_4BITBUS;
- }
- sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
- }
+ host->ops->set_bus_width(host, ios->bus_width);
ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
@@ -1510,19 +1490,20 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
/* In case of UHS-I modes, set High Speed Enable */
if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+ (ios->timing == MMC_TIMING_MMC_DDR52) ||
(ios->timing == MMC_TIMING_UHS_SDR50) ||
(ios->timing == MMC_TIMING_UHS_SDR104) ||
(ios->timing == MMC_TIMING_UHS_DDR50) ||
(ios->timing == MMC_TIMING_UHS_SDR25))
ctrl |= SDHCI_CTRL_HISPD;
- ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
- if (!(ctrl_2 & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
+ if (!host->preset_enabled) {
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
/*
* We only need to set Driver Strength if the
* preset value enable is not set.
*/
+ ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
ctrl_2 &= ~SDHCI_CTRL_DRV_TYPE_MASK;
if (ios->drv_type == MMC_SET_DRIVER_TYPE_A)
ctrl_2 |= SDHCI_CTRL_DRV_TYPE_A;
@@ -1546,7 +1527,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
/* Re-enable SD Clock */
- sdhci_update_clock(host);
+ host->ops->set_clock(host, host->clock);
}
@@ -1555,25 +1536,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
clk &= ~SDHCI_CLOCK_CARD_EN;
sdhci_writew(host, clk, SDHCI_CLOCK_CONTROL);
- if (host->ops->set_uhs_signaling)
- host->ops->set_uhs_signaling(host, ios->timing);
- else {
- ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
- /* Select Bus Speed Mode for host */
- ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
- if ((ios->timing == MMC_TIMING_MMC_HS200) ||
- (ios->timing == MMC_TIMING_UHS_SDR104))
- ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
- else if (ios->timing == MMC_TIMING_UHS_SDR12)
- ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
- else if (ios->timing == MMC_TIMING_UHS_SDR25)
- ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
- else if (ios->timing == MMC_TIMING_UHS_SDR50)
- ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
- else if (ios->timing == MMC_TIMING_UHS_DDR50)
- ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
- sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
- }
+ host->ops->set_uhs_signaling(host, ios->timing);
+ host->timing = ios->timing;
if (!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN) &&
((ios->timing == MMC_TIMING_UHS_SDR12) ||
@@ -1590,7 +1554,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
}
/* Re-enable SD Clock */
- sdhci_update_clock(host);
+ host->ops->set_clock(host, host->clock);
} else
sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
@@ -1600,7 +1564,7 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
* it on each ios seems to solve the problem.
*/
if(host->quirks & SDHCI_QUIRK_RESET_CMD_DATA_ON_IOS)
- sdhci_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
+ sdhci_do_reset(host, SDHCI_RESET_CMD | SDHCI_RESET_DATA);
mmiowb();
spin_unlock_irqrestore(&host->lock, flags);
@@ -1709,24 +1673,16 @@ static int sdhci_get_ro(struct mmc_host *mmc)
static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
{
- if (host->flags & SDHCI_DEVICE_DEAD)
- goto out;
-
- if (enable)
- host->flags |= SDHCI_SDIO_IRQ_ENABLED;
- else
- host->flags &= ~SDHCI_SDIO_IRQ_ENABLED;
-
- /* SDIO IRQ will be enabled as appropriate in runtime resume */
- if (host->runtime_suspended)
- goto out;
+ if (!(host->flags & SDHCI_DEVICE_DEAD)) {
+ if (enable)
+ host->ier |= SDHCI_INT_CARD_INT;
+ else
+ host->ier &= ~SDHCI_INT_CARD_INT;
- if (enable)
- sdhci_unmask_irqs(host, SDHCI_INT_CARD_INT);
- else
- sdhci_mask_irqs(host, SDHCI_INT_CARD_INT);
-out:
- mmiowb();
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+ mmiowb();
+ }
}
static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
@@ -1734,9 +1690,18 @@ static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
struct sdhci_host *host = mmc_priv(mmc);
unsigned long flags;
+ sdhci_runtime_pm_get(host);
+
spin_lock_irqsave(&host->lock, flags);
+ if (enable)
+ host->flags |= SDHCI_SDIO_IRQ_ENABLED;
+ else
+ host->flags &= ~SDHCI_SDIO_IRQ_ENABLED;
+
sdhci_enable_sdio_irq_nolock(host, enable);
spin_unlock_irqrestore(&host->lock, flags);
+
+ sdhci_runtime_pm_put(host);
}
static int sdhci_do_start_signal_voltage_switch(struct sdhci_host *host,
@@ -1855,22 +1820,15 @@ static int sdhci_card_busy(struct mmc_host *mmc)
static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
{
- struct sdhci_host *host;
+ struct sdhci_host *host = mmc_priv(mmc);
u16 ctrl;
- u32 ier;
int tuning_loop_counter = MAX_TUNING_LOOP;
- unsigned long timeout;
int err = 0;
- bool requires_tuning_nonuhs = false;
unsigned long flags;
- host = mmc_priv(mmc);
-
sdhci_runtime_pm_get(host);
spin_lock_irqsave(&host->lock, flags);
- ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
/*
* The Host Controller needs tuning only in case of SDR104 mode
* and for SDR50 mode when Use Tuning for SDR50 is set in the
@@ -1878,15 +1836,18 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
* If the Host Controller supports the HS200 mode then the
* tuning function has to be executed.
*/
- if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR50) &&
- (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
- host->flags & SDHCI_SDR104_NEEDS_TUNING))
- requires_tuning_nonuhs = true;
-
- if (((ctrl & SDHCI_CTRL_UHS_MASK) == SDHCI_CTRL_UHS_SDR104) ||
- requires_tuning_nonuhs)
- ctrl |= SDHCI_CTRL_EXEC_TUNING;
- else {
+ switch (host->timing) {
+ case MMC_TIMING_MMC_HS200:
+ case MMC_TIMING_UHS_SDR104:
+ break;
+
+ case MMC_TIMING_UHS_SDR50:
+ if (host->flags & SDHCI_SDR50_NEEDS_TUNING ||
+ host->flags & SDHCI_SDR104_NEEDS_TUNING)
+ break;
+ /* FALLTHROUGH */
+
+ default:
spin_unlock_irqrestore(&host->lock, flags);
sdhci_runtime_pm_put(host);
return 0;
@@ -1899,6 +1860,8 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
return err;
}
+ ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+ ctrl |= SDHCI_CTRL_EXEC_TUNING;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
/*
@@ -1911,21 +1874,17 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
* to make sure we don't hit a controller bug, we _only_
* enable Buffer Read Ready interrupt here.
*/
- ier = sdhci_readl(host, SDHCI_INT_ENABLE);
- sdhci_clear_set_irqs(host, ier, SDHCI_INT_DATA_AVAIL);
+ sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_INT_ENABLE);
+ sdhci_writel(host, SDHCI_INT_DATA_AVAIL, SDHCI_SIGNAL_ENABLE);
/*
* Issue CMD19 repeatedly till Execute Tuning is set to 0 or the number
* of loops reaches 40 times or a timeout of 150ms occurs.
*/
- timeout = 150;
do {
struct mmc_command cmd = {0};
struct mmc_request mrq = {NULL};
- if (!tuning_loop_counter && !timeout)
- break;
-
cmd.opcode = opcode;
cmd.arg = 0;
cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
@@ -1933,6 +1892,9 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
cmd.data = NULL;
cmd.error = 0;
+ if (tuning_loop_counter-- == 0)
+ break;
+
mrq.cmd = &cmd;
host->mrq = &mrq;
@@ -1990,26 +1952,25 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
host->tuning_done = 0;
ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
- tuning_loop_counter--;
- timeout--;
- mdelay(1);
+
+ /* eMMC spec does not require a delay between tuning cycles */
+ if (opcode == MMC_SEND_TUNING_BLOCK)
+ mdelay(1);
} while (ctrl & SDHCI_CTRL_EXEC_TUNING);
/*
* The Host Driver has exhausted the maximum number of loops allowed,
* so use fixed sampling frequency.
*/
- if (!tuning_loop_counter || !timeout) {
+ if (tuning_loop_counter < 0) {
ctrl &= ~SDHCI_CTRL_TUNED_CLK;
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
+ }
+ if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
+ pr_info(DRIVER_NAME ": Tuning procedure"
+ " failed, falling back to fixed sampling"
+ " clock\n");
err = -EIO;
- } else {
- if (!(ctrl & SDHCI_CTRL_TUNED_CLK)) {
- pr_info(DRIVER_NAME ": Tuning procedure"
- " failed, falling back to fixed sampling"
- " clock\n");
- err = -EIO;
- }
}
out:
@@ -2044,7 +2005,8 @@ out:
if (err && (host->flags & SDHCI_USING_RETUNING_TIMER))
err = 0;
- sdhci_clear_set_irqs(host, SDHCI_INT_DATA_AVAIL, ier);
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
spin_unlock_irqrestore(&host->lock, flags);
sdhci_runtime_pm_put(host);
@@ -2054,26 +2016,30 @@ out:
static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable)
{
- u16 ctrl;
-
/* Host Controller v3.00 defines preset value registers */
if (host->version < SDHCI_SPEC_300)
return;
- ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
-
/*
* We only enable or disable Preset Value if they are not already
* enabled or disabled respectively. Otherwise, we bail out.
*/
- if (enable && !(ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
- ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
- sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- host->flags |= SDHCI_PV_ENABLED;
- } else if (!enable && (ctrl & SDHCI_CTRL_PRESET_VAL_ENABLE)) {
- ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+ if (host->preset_enabled != enable) {
+ u16 ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+
+ if (enable)
+ ctrl |= SDHCI_CTRL_PRESET_VAL_ENABLE;
+ else
+ ctrl &= ~SDHCI_CTRL_PRESET_VAL_ENABLE;
+
sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
- host->flags &= ~SDHCI_PV_ENABLED;
+
+ if (enable)
+ host->flags |= SDHCI_PV_ENABLED;
+ else
+ host->flags &= ~SDHCI_PV_ENABLED;
+
+ host->preset_enabled = enable;
}
}
@@ -2095,8 +2061,8 @@ static void sdhci_card_event(struct mmc_host *mmc)
pr_err("%s: Resetting controller.\n",
mmc_hostname(host->mmc));
- sdhci_reset(host, SDHCI_RESET_CMD);
- sdhci_reset(host, SDHCI_RESET_DATA);
+ sdhci_do_reset(host, SDHCI_RESET_CMD);
+ sdhci_do_reset(host, SDHCI_RESET_DATA);
host->mrq->cmd->error = -ENOMEDIUM;
tasklet_schedule(&host->finish_tasklet);
@@ -2124,15 +2090,6 @@ static const struct mmc_host_ops sdhci_ops = {
* *
\*****************************************************************************/
-static void sdhci_tasklet_card(unsigned long param)
-{
- struct sdhci_host *host = (struct sdhci_host*)param;
-
- sdhci_card_event(host->mmc);
-
- mmc_detect_change(host->mmc, msecs_to_jiffies(200));
-}
-
static void sdhci_tasklet_finish(unsigned long param)
{
struct sdhci_host *host;
@@ -2169,12 +2126,12 @@ static void sdhci_tasklet_finish(unsigned long param)
/* Some controllers need this kick or reset won't work here */
if (host->quirks & SDHCI_QUIRK_CLOCK_BEFORE_RESET)
/* This is to force an update */
- sdhci_update_clock(host);
+ host->ops->set_clock(host, host->clock);
/* Spec says we should do both at the same time, but Ricoh
controllers do not like that. */
- sdhci_reset(host, SDHCI_RESET_CMD);
- sdhci_reset(host, SDHCI_RESET_DATA);
+ sdhci_do_reset(host, SDHCI_RESET_CMD);
+ sdhci_do_reset(host, SDHCI_RESET_DATA);
}
host->mrq = NULL;
@@ -2424,101 +2381,94 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
static irqreturn_t sdhci_irq(int irq, void *dev_id)
{
- irqreturn_t result;
+ irqreturn_t result = IRQ_NONE;
struct sdhci_host *host = dev_id;
- u32 intmask, unexpected = 0;
- int cardint = 0, max_loops = 16;
+ u32 intmask, mask, unexpected = 0;
+ int max_loops = 16;
spin_lock(&host->lock);
- if (host->runtime_suspended) {
+ if (host->runtime_suspended && !sdhci_sdio_irq_enabled(host)) {
spin_unlock(&host->lock);
return IRQ_NONE;
}
intmask = sdhci_readl(host, SDHCI_INT_STATUS);
-
if (!intmask || intmask == 0xffffffff) {
result = IRQ_NONE;
goto out;
}
-again:
- DBG("*** %s got interrupt: 0x%08x\n",
- mmc_hostname(host->mmc), intmask);
-
- if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
- u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
- SDHCI_CARD_PRESENT;
-
- /*
- * There is a observation on i.mx esdhc. INSERT bit will be
- * immediately set again when it gets cleared, if a card is
- * inserted. We have to mask the irq to prevent interrupt
- * storm which will freeze the system. And the REMOVE gets
- * the same situation.
- *
- * More testing are needed here to ensure it works for other
- * platforms though.
- */
- sdhci_mask_irqs(host, present ? SDHCI_INT_CARD_INSERT :
- SDHCI_INT_CARD_REMOVE);
- sdhci_unmask_irqs(host, present ? SDHCI_INT_CARD_REMOVE :
- SDHCI_INT_CARD_INSERT);
-
- sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
- SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
- intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE);
- tasklet_schedule(&host->card_tasklet);
- }
-
- if (intmask & SDHCI_INT_CMD_MASK) {
- sdhci_writel(host, intmask & SDHCI_INT_CMD_MASK,
- SDHCI_INT_STATUS);
- sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
- }
+ do {
+ /* Clear selected interrupts. */
+ mask = intmask & (SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
+ SDHCI_INT_BUS_POWER);
+ sdhci_writel(host, mask, SDHCI_INT_STATUS);
- if (intmask & SDHCI_INT_DATA_MASK) {
- sdhci_writel(host, intmask & SDHCI_INT_DATA_MASK,
- SDHCI_INT_STATUS);
- sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
- }
+ DBG("*** %s got interrupt: 0x%08x\n",
+ mmc_hostname(host->mmc), intmask);
- intmask &= ~(SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK);
+ if (intmask & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+ u32 present = sdhci_readl(host, SDHCI_PRESENT_STATE) &
+ SDHCI_CARD_PRESENT;
- intmask &= ~SDHCI_INT_ERROR;
+ /*
+ * There is a observation on i.mx esdhc. INSERT
+ * bit will be immediately set again when it gets
+ * cleared, if a card is inserted. We have to mask
+ * the irq to prevent interrupt storm which will
+ * freeze the system. And the REMOVE gets the
+ * same situation.
+ *
+ * More testing are needed here to ensure it works
+ * for other platforms though.
+ */
+ host->ier &= ~(SDHCI_INT_CARD_INSERT |
+ SDHCI_INT_CARD_REMOVE);
+ host->ier |= present ? SDHCI_INT_CARD_REMOVE :
+ SDHCI_INT_CARD_INSERT;
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+
+ sdhci_writel(host, intmask & (SDHCI_INT_CARD_INSERT |
+ SDHCI_INT_CARD_REMOVE), SDHCI_INT_STATUS);
+
+ host->thread_isr |= intmask & (SDHCI_INT_CARD_INSERT |
+ SDHCI_INT_CARD_REMOVE);
+ result = IRQ_WAKE_THREAD;
+ }
- if (intmask & SDHCI_INT_BUS_POWER) {
- pr_err("%s: Card is consuming too much power!\n",
- mmc_hostname(host->mmc));
- sdhci_writel(host, SDHCI_INT_BUS_POWER, SDHCI_INT_STATUS);
- }
+ if (intmask & SDHCI_INT_CMD_MASK)
+ sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
- intmask &= ~SDHCI_INT_BUS_POWER;
+ if (intmask & SDHCI_INT_DATA_MASK)
+ sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
- if (intmask & SDHCI_INT_CARD_INT)
- cardint = 1;
+ if (intmask & SDHCI_INT_BUS_POWER)
+ pr_err("%s: Card is consuming too much power!\n",
+ mmc_hostname(host->mmc));
- intmask &= ~SDHCI_INT_CARD_INT;
+ if (intmask & SDHCI_INT_CARD_INT) {
+ sdhci_enable_sdio_irq_nolock(host, false);
+ host->thread_isr |= SDHCI_INT_CARD_INT;
+ result = IRQ_WAKE_THREAD;
+ }
- if (intmask) {
- unexpected |= intmask;
- sdhci_writel(host, intmask, SDHCI_INT_STATUS);
- }
+ intmask &= ~(SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE |
+ SDHCI_INT_CMD_MASK | SDHCI_INT_DATA_MASK |
+ SDHCI_INT_ERROR | SDHCI_INT_BUS_POWER |
+ SDHCI_INT_CARD_INT);
- result = IRQ_HANDLED;
+ if (intmask) {
+ unexpected |= intmask;
+ sdhci_writel(host, intmask, SDHCI_INT_STATUS);
+ }
- intmask = sdhci_readl(host, SDHCI_INT_STATUS);
+ if (result == IRQ_NONE)
+ result = IRQ_HANDLED;
- /*
- * If we know we'll call the driver to signal SDIO IRQ, disregard
- * further indications of Card Interrupt in the status to avoid a
- * needless loop.
- */
- if (cardint)
- intmask &= ~SDHCI_INT_CARD_INT;
- if (intmask && --max_loops)
- goto again;
+ intmask = sdhci_readl(host, SDHCI_INT_STATUS);
+ } while (intmask && --max_loops);
out:
spin_unlock(&host->lock);
@@ -2527,15 +2477,38 @@ out:
mmc_hostname(host->mmc), unexpected);
sdhci_dumpregs(host);
}
- /*
- * We have to delay this as it calls back into the driver.
- */
- if (cardint)
- mmc_signal_sdio_irq(host->mmc);
return result;
}
+static irqreturn_t sdhci_thread_irq(int irq, void *dev_id)
+{
+ struct sdhci_host *host = dev_id;
+ unsigned long flags;
+ u32 isr;
+
+ spin_lock_irqsave(&host->lock, flags);
+ isr = host->thread_isr;
+ host->thread_isr = 0;
+ spin_unlock_irqrestore(&host->lock, flags);
+
+ if (isr & (SDHCI_INT_CARD_INSERT | SDHCI_INT_CARD_REMOVE)) {
+ sdhci_card_event(host->mmc);
+ mmc_detect_change(host->mmc, msecs_to_jiffies(200));
+ }
+
+ if (isr & SDHCI_INT_CARD_INT) {
+ sdio_run_irqs(host->mmc);
+
+ spin_lock_irqsave(&host->lock, flags);
+ if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
+ sdhci_enable_sdio_irq_nolock(host, true);
+ spin_unlock_irqrestore(&host->lock, flags);
+ }
+
+ return isr ? IRQ_HANDLED : IRQ_NONE;
+}
+
/*****************************************************************************\
* *
* Suspend/resume *
@@ -2572,9 +2545,6 @@ EXPORT_SYMBOL_GPL(sdhci_disable_irq_wakeups);
int sdhci_suspend_host(struct sdhci_host *host)
{
- if (host->ops->platform_suspend)
- host->ops->platform_suspend(host);
-
sdhci_disable_card_detection(host);
/* Disable tuning since we are suspending */
@@ -2584,7 +2554,9 @@ int sdhci_suspend_host(struct sdhci_host *host)
}
if (!device_may_wakeup(mmc_dev(host->mmc))) {
- sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+ host->ier = 0;
+ sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+ sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
free_irq(host->irq, host);
} else {
sdhci_enable_irq_wakeups(host);
@@ -2605,8 +2577,9 @@ int sdhci_resume_host(struct sdhci_host *host)
}
if (!device_may_wakeup(mmc_dev(host->mmc))) {
- ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
- mmc_hostname(host->mmc), host);
+ ret = request_threaded_irq(host->irq, sdhci_irq,
+ sdhci_thread_irq, IRQF_SHARED,
+ mmc_hostname(host->mmc), host);
if (ret)
return ret;
} else {
@@ -2628,9 +2601,6 @@ int sdhci_resume_host(struct sdhci_host *host)
sdhci_enable_card_detection(host);
- if (host->ops->platform_resume)
- host->ops->platform_resume(host);
-
/* Set the re-tuning expiration flag */
if (host->flags & SDHCI_USING_RETUNING_TIMER)
host->flags |= SDHCI_NEEDS_RETUNING;
@@ -2682,10 +2652,12 @@ int sdhci_runtime_suspend_host(struct sdhci_host *host)
}
spin_lock_irqsave(&host->lock, flags);
- sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+ host->ier &= SDHCI_INT_CARD_INT;
+ sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+ sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
spin_unlock_irqrestore(&host->lock, flags);
- synchronize_irq(host->irq);
+ synchronize_hardirq(host->irq);
spin_lock_irqsave(&host->lock, flags);
host->runtime_suspended = true;
@@ -2729,7 +2701,7 @@ int sdhci_runtime_resume_host(struct sdhci_host *host)
host->runtime_suspended = false;
/* Enable SDIO IRQ */
- if ((host->flags & SDHCI_SDIO_IRQ_ENABLED))
+ if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
sdhci_enable_sdio_irq_nolock(host, true);
/* Enable Card Detection */
@@ -2788,7 +2760,7 @@ int sdhci_add_host(struct sdhci_host *host)
if (debug_quirks2)
host->quirks2 = debug_quirks2;
- sdhci_reset(host, SDHCI_RESET_ALL);
+ sdhci_do_reset(host, SDHCI_RESET_ALL);
host->version = sdhci_readw(host, SDHCI_HOST_VERSION);
host->version = (host->version & SDHCI_SPEC_VER_MASK)
@@ -2848,15 +2820,29 @@ int sdhci_add_host(struct sdhci_host *host)
* (128) and potentially one alignment transfer for
* each of those entries.
*/
- host->adma_desc = kmalloc((128 * 2 + 1) * 4, GFP_KERNEL);
+ host->adma_desc = dma_alloc_coherent(mmc_dev(host->mmc),
+ ADMA_SIZE, &host->adma_addr,
+ GFP_KERNEL);
host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
if (!host->adma_desc || !host->align_buffer) {
- kfree(host->adma_desc);
+ dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
pr_warning("%s: Unable to allocate ADMA "
"buffers. Falling back to standard DMA.\n",
mmc_hostname(mmc));
host->flags &= ~SDHCI_USE_ADMA;
+ host->adma_desc = NULL;
+ host->align_buffer = NULL;
+ } else if (host->adma_addr & 3) {
+ pr_warning("%s: unable to allocate aligned ADMA descriptor\n",
+ mmc_hostname(mmc));
+ host->flags &= ~SDHCI_USE_ADMA;
+ dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ host->adma_desc, host->adma_addr);
+ kfree(host->align_buffer);
+ host->adma_desc = NULL;
+ host->align_buffer = NULL;
}
}
@@ -2941,6 +2927,7 @@ int sdhci_add_host(struct sdhci_host *host)
mmc->max_busy_timeout = (1 << 27) / host->timeout_clk;
mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
+ mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
host->flags |= SDHCI_AUTO_CMD12;
@@ -3212,8 +3199,6 @@ int sdhci_add_host(struct sdhci_host *host)
/*
* Init tasklets.
*/
- tasklet_init(&host->card_tasklet,
- sdhci_tasklet_card, (unsigned long)host);
tasklet_init(&host->finish_tasklet,
sdhci_tasklet_finish, (unsigned long)host);
@@ -3230,8 +3215,8 @@ int sdhci_add_host(struct sdhci_host *host)
sdhci_init(host, 0);
- ret = request_irq(host->irq, sdhci_irq, IRQF_SHARED,
- mmc_hostname(mmc), host);
+ ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
+ IRQF_SHARED, mmc_hostname(mmc), host);
if (ret) {
pr_err("%s: Failed to request IRQ %d: %d\n",
mmc_hostname(mmc), host->irq, ret);
@@ -3273,12 +3258,12 @@ int sdhci_add_host(struct sdhci_host *host)
#ifdef SDHCI_USE_LEDS_CLASS
reset:
- sdhci_reset(host, SDHCI_RESET_ALL);
- sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+ sdhci_do_reset(host, SDHCI_RESET_ALL);
+ sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+ sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
free_irq(host->irq, host);
#endif
untasklet:
- tasklet_kill(&host->card_tasklet);
tasklet_kill(&host->finish_tasklet);
return ret;
@@ -3315,14 +3300,14 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
#endif
if (!dead)
- sdhci_reset(host, SDHCI_RESET_ALL);
+ sdhci_do_reset(host, SDHCI_RESET_ALL);
- sdhci_mask_irqs(host, SDHCI_INT_ALL_MASK);
+ sdhci_writel(host, 0, SDHCI_INT_ENABLE);
+ sdhci_writel(host, 0, SDHCI_SIGNAL_ENABLE);
free_irq(host->irq, host);
del_timer_sync(&host->timer);
- tasklet_kill(&host->card_tasklet);
tasklet_kill(&host->finish_tasklet);
if (host->vmmc) {
@@ -3335,7 +3320,9 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
regulator_put(host->vqmmc);
}
- kfree(host->adma_desc);
+ if (host->adma_desc)
+ dma_free_coherent(mmc_dev(host->mmc), ADMA_SIZE,
+ host->adma_desc, host->adma_addr);
kfree(host->align_buffer);
host->adma_desc = NULL;
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 0a3ed01887db..4a5cd5e3fa3e 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -281,18 +281,14 @@ struct sdhci_ops {
unsigned int (*get_max_clock)(struct sdhci_host *host);
unsigned int (*get_min_clock)(struct sdhci_host *host);
unsigned int (*get_timeout_clock)(struct sdhci_host *host);
- int (*platform_bus_width)(struct sdhci_host *host,
- int width);
+ void (*set_bus_width)(struct sdhci_host *host, int width);
void (*platform_send_init_74_clocks)(struct sdhci_host *host,
u8 power_mode);
unsigned int (*get_ro)(struct sdhci_host *host);
- void (*platform_reset_enter)(struct sdhci_host *host, u8 mask);
- void (*platform_reset_exit)(struct sdhci_host *host, u8 mask);
+ void (*reset)(struct sdhci_host *host, u8 mask);
int (*platform_execute_tuning)(struct sdhci_host *host, u32 opcode);
- int (*set_uhs_signaling)(struct sdhci_host *host, unsigned int uhs);
+ void (*set_uhs_signaling)(struct sdhci_host *host, unsigned int uhs);
void (*hw_reset)(struct sdhci_host *host);
- void (*platform_suspend)(struct sdhci_host *host);
- void (*platform_resume)(struct sdhci_host *host);
void (*adma_workaround)(struct sdhci_host *host, u32 intmask);
void (*platform_init)(struct sdhci_host *host);
void (*card_event)(struct sdhci_host *host);
@@ -397,6 +393,16 @@ extern void sdhci_remove_host(struct sdhci_host *host, int dead);
extern void sdhci_send_command(struct sdhci_host *host,
struct mmc_command *cmd);
+static inline bool sdhci_sdio_irq_enabled(struct sdhci_host *host)
+{
+ return !!(host->flags & SDHCI_SDIO_IRQ_ENABLED);
+}
+
+void sdhci_set_clock(struct sdhci_host *host, unsigned int clock);
+void sdhci_set_bus_width(struct sdhci_host *host, int width);
+void sdhci_reset(struct sdhci_host *host, u8 mask);
+void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing);
+
#ifdef CONFIG_PM
extern int sdhci_suspend_host(struct sdhci_host *host);
extern int sdhci_resume_host(struct sdhci_host *host);
diff --git a/drivers/mmc/host/sh_mmcif.c b/drivers/mmc/host/sh_mmcif.c
index 54730f4aac87..656fbba4c422 100644
--- a/drivers/mmc/host/sh_mmcif.c
+++ b/drivers/mmc/host/sh_mmcif.c
@@ -803,12 +803,13 @@ static u32 sh_mmcif_set_cmd(struct sh_mmcif_host *host,
break;
}
switch (host->timing) {
- case MMC_TIMING_UHS_DDR50:
+ case MMC_TIMING_MMC_DDR52:
/*
* MMC core will only set this timing, if the host
- * advertises the MMC_CAP_UHS_DDR50 capability. MMCIF
- * implementations with this capability, e.g. sh73a0,
- * will have to set it in their platform data.
+ * advertises the MMC_CAP_1_8V_DDR/MMC_CAP_1_2V_DDR
+ * capability. MMCIF implementations with this
+ * capability, e.g. sh73a0, will have to set it
+ * in their platform data.
*/
tmp |= CMD_SET_DARS;
break;
diff --git a/drivers/mmc/host/usdhi6rol0.c b/drivers/mmc/host/usdhi6rol0.c
new file mode 100644
index 000000000000..eb2bbbef19c6
--- /dev/null
+++ b/drivers/mmc/host/usdhi6rol0.c
@@ -0,0 +1,1847 @@
+/*
+ * Copyright (C) 2013-2014 Renesas Electronics Europe Ltd.
+ * Author: Guennadi Liakhovetski <g.liakhovetski@gmx.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmaengine.h>
+#include <linux/highmem.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/log2.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
+#include <linux/mmc/sdio.h>
+#include <linux/module.h>
+#include <linux/pagemap.h>
+#include <linux/platform_device.h>
+#include <linux/scatterlist.h>
+#include <linux/string.h>
+#include <linux/time.h>
+#include <linux/virtio.h>
+#include <linux/workqueue.h>
+
+#define USDHI6_SD_CMD 0x0000
+#define USDHI6_SD_PORT_SEL 0x0004
+#define USDHI6_SD_ARG 0x0008
+#define USDHI6_SD_STOP 0x0010
+#define USDHI6_SD_SECCNT 0x0014
+#define USDHI6_SD_RSP10 0x0018
+#define USDHI6_SD_RSP32 0x0020
+#define USDHI6_SD_RSP54 0x0028
+#define USDHI6_SD_RSP76 0x0030
+#define USDHI6_SD_INFO1 0x0038
+#define USDHI6_SD_INFO2 0x003c
+#define USDHI6_SD_INFO1_MASK 0x0040
+#define USDHI6_SD_INFO2_MASK 0x0044
+#define USDHI6_SD_CLK_CTRL 0x0048
+#define USDHI6_SD_SIZE 0x004c
+#define USDHI6_SD_OPTION 0x0050
+#define USDHI6_SD_ERR_STS1 0x0058
+#define USDHI6_SD_ERR_STS2 0x005c
+#define USDHI6_SD_BUF0 0x0060
+#define USDHI6_SDIO_MODE 0x0068
+#define USDHI6_SDIO_INFO1 0x006c
+#define USDHI6_SDIO_INFO1_MASK 0x0070
+#define USDHI6_CC_EXT_MODE 0x01b0
+#define USDHI6_SOFT_RST 0x01c0
+#define USDHI6_VERSION 0x01c4
+#define USDHI6_HOST_MODE 0x01c8
+#define USDHI6_SDIF_MODE 0x01cc
+
+#define USDHI6_SD_CMD_APP 0x0040
+#define USDHI6_SD_CMD_MODE_RSP_AUTO 0x0000
+#define USDHI6_SD_CMD_MODE_RSP_NONE 0x0300
+#define USDHI6_SD_CMD_MODE_RSP_R1 0x0400 /* Also R5, R6, R7 */
+#define USDHI6_SD_CMD_MODE_RSP_R1B 0x0500 /* R1b */
+#define USDHI6_SD_CMD_MODE_RSP_R2 0x0600
+#define USDHI6_SD_CMD_MODE_RSP_R3 0x0700 /* Also R4 */
+#define USDHI6_SD_CMD_DATA 0x0800
+#define USDHI6_SD_CMD_READ 0x1000
+#define USDHI6_SD_CMD_MULTI 0x2000
+#define USDHI6_SD_CMD_CMD12_AUTO_OFF 0x4000
+
+#define USDHI6_CC_EXT_MODE_SDRW BIT(1)
+
+#define USDHI6_SD_INFO1_RSP_END BIT(0)
+#define USDHI6_SD_INFO1_ACCESS_END BIT(2)
+#define USDHI6_SD_INFO1_CARD_OUT BIT(3)
+#define USDHI6_SD_INFO1_CARD_IN BIT(4)
+#define USDHI6_SD_INFO1_CD BIT(5)
+#define USDHI6_SD_INFO1_WP BIT(7)
+#define USDHI6_SD_INFO1_D3_CARD_OUT BIT(8)
+#define USDHI6_SD_INFO1_D3_CARD_IN BIT(9)
+
+#define USDHI6_SD_INFO2_CMD_ERR BIT(0)
+#define USDHI6_SD_INFO2_CRC_ERR BIT(1)
+#define USDHI6_SD_INFO2_END_ERR BIT(2)
+#define USDHI6_SD_INFO2_TOUT BIT(3)
+#define USDHI6_SD_INFO2_IWA_ERR BIT(4)
+#define USDHI6_SD_INFO2_IRA_ERR BIT(5)
+#define USDHI6_SD_INFO2_RSP_TOUT BIT(6)
+#define USDHI6_SD_INFO2_SDDAT0 BIT(7)
+#define USDHI6_SD_INFO2_BRE BIT(8)
+#define USDHI6_SD_INFO2_BWE BIT(9)
+#define USDHI6_SD_INFO2_SCLKDIVEN BIT(13)
+#define USDHI6_SD_INFO2_CBSY BIT(14)
+#define USDHI6_SD_INFO2_ILA BIT(15)
+
+#define USDHI6_SD_INFO1_CARD_INSERT (USDHI6_SD_INFO1_CARD_IN | USDHI6_SD_INFO1_D3_CARD_IN)
+#define USDHI6_SD_INFO1_CARD_EJECT (USDHI6_SD_INFO1_CARD_OUT | USDHI6_SD_INFO1_D3_CARD_OUT)
+#define USDHI6_SD_INFO1_CARD (USDHI6_SD_INFO1_CARD_INSERT | USDHI6_SD_INFO1_CARD_EJECT)
+#define USDHI6_SD_INFO1_CARD_CD (USDHI6_SD_INFO1_CARD_IN | USDHI6_SD_INFO1_CARD_OUT)
+
+#define USDHI6_SD_INFO2_ERR (USDHI6_SD_INFO2_CMD_ERR | \
+ USDHI6_SD_INFO2_CRC_ERR | USDHI6_SD_INFO2_END_ERR | \
+ USDHI6_SD_INFO2_TOUT | USDHI6_SD_INFO2_IWA_ERR | \
+ USDHI6_SD_INFO2_IRA_ERR | USDHI6_SD_INFO2_RSP_TOUT | \
+ USDHI6_SD_INFO2_ILA)
+
+#define USDHI6_SD_INFO1_IRQ (USDHI6_SD_INFO1_RSP_END | USDHI6_SD_INFO1_ACCESS_END | \
+ USDHI6_SD_INFO1_CARD)
+
+#define USDHI6_SD_INFO2_IRQ (USDHI6_SD_INFO2_ERR | USDHI6_SD_INFO2_BRE | \
+ USDHI6_SD_INFO2_BWE | 0x0800 | USDHI6_SD_INFO2_ILA)
+
+#define USDHI6_SD_CLK_CTRL_SCLKEN BIT(8)
+
+#define USDHI6_SD_STOP_STP BIT(0)
+#define USDHI6_SD_STOP_SEC BIT(8)
+
+#define USDHI6_SDIO_INFO1_IOIRQ BIT(0)
+#define USDHI6_SDIO_INFO1_EXPUB52 BIT(14)
+#define USDHI6_SDIO_INFO1_EXWT BIT(15)
+
+#define USDHI6_SD_ERR_STS1_CRC_NO_ERROR BIT(13)
+
+#define USDHI6_SOFT_RST_RESERVED (BIT(1) | BIT(2))
+#define USDHI6_SOFT_RST_RESET BIT(0)
+
+#define USDHI6_SD_OPTION_TIMEOUT_SHIFT 4
+#define USDHI6_SD_OPTION_TIMEOUT_MASK (0xf << USDHI6_SD_OPTION_TIMEOUT_SHIFT)
+#define USDHI6_SD_OPTION_WIDTH_1 BIT(15)
+
+#define USDHI6_SD_PORT_SEL_PORTS_SHIFT 8
+
+#define USDHI6_SD_CLK_CTRL_DIV_MASK 0xff
+
+#define USDHI6_SDIO_INFO1_IRQ (USDHI6_SDIO_INFO1_IOIRQ | 3 | \
+ USDHI6_SDIO_INFO1_EXPUB52 | USDHI6_SDIO_INFO1_EXWT)
+
+#define USDHI6_MIN_DMA 64
+
+enum usdhi6_wait_for {
+ USDHI6_WAIT_FOR_REQUEST,
+ USDHI6_WAIT_FOR_CMD,
+ USDHI6_WAIT_FOR_MREAD,
+ USDHI6_WAIT_FOR_MWRITE,
+ USDHI6_WAIT_FOR_READ,
+ USDHI6_WAIT_FOR_WRITE,
+ USDHI6_WAIT_FOR_DATA_END,
+ USDHI6_WAIT_FOR_STOP,
+ USDHI6_WAIT_FOR_DMA,
+};
+
+struct usdhi6_page {
+ struct page *page;
+ void *mapped; /* mapped page */
+};
+
+struct usdhi6_host {
+ struct mmc_host *mmc;
+ struct mmc_request *mrq;
+ void __iomem *base;
+ struct clk *clk;
+
+ /* SG memory handling */
+
+ /* Common for multiple and single block requests */
+ struct usdhi6_page pg; /* current page from an SG */
+ void *blk_page; /* either a mapped page, or the bounce buffer */
+ size_t offset; /* offset within a page, including sg->offset */
+
+ /* Blocks, crossing a page boundary */
+ size_t head_len;
+ struct usdhi6_page head_pg;
+
+ /* A bounce buffer for unaligned blocks or blocks, crossing a page boundary */
+ struct scatterlist bounce_sg;
+ u8 bounce_buf[512];
+
+ /* Multiple block requests only */
+ struct scatterlist *sg; /* current SG segment */
+ int page_idx; /* page index within an SG segment */
+
+ enum usdhi6_wait_for wait;
+ u32 status_mask;
+ u32 status2_mask;
+ u32 sdio_mask;
+ u32 io_error;
+ u32 irq_status;
+ unsigned long imclk;
+ unsigned long rate;
+ bool app_cmd;
+
+ /* Timeout handling */
+ struct delayed_work timeout_work;
+ unsigned long timeout;
+
+ /* DMA support */
+ struct dma_chan *chan_rx;
+ struct dma_chan *chan_tx;
+ bool dma_active;
+};
+
+/* I/O primitives */
+
+static void usdhi6_write(struct usdhi6_host *host, u32 reg, u32 data)
+{
+ iowrite32(data, host->base + reg);
+ dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+ host->base, reg, data);
+}
+
+static void usdhi6_write16(struct usdhi6_host *host, u32 reg, u16 data)
+{
+ iowrite16(data, host->base + reg);
+ dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+ host->base, reg, data);
+}
+
+static u32 usdhi6_read(struct usdhi6_host *host, u32 reg)
+{
+ u32 data = ioread32(host->base + reg);
+ dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+ host->base, reg, data);
+ return data;
+}
+
+static u16 usdhi6_read16(struct usdhi6_host *host, u32 reg)
+{
+ u16 data = ioread16(host->base + reg);
+ dev_vdbg(mmc_dev(host->mmc), "%s(0x%p + 0x%x) = 0x%x\n", __func__,
+ host->base, reg, data);
+ return data;
+}
+
+static void usdhi6_irq_enable(struct usdhi6_host *host, u32 info1, u32 info2)
+{
+ host->status_mask = USDHI6_SD_INFO1_IRQ & ~info1;
+ host->status2_mask = USDHI6_SD_INFO2_IRQ & ~info2;
+ usdhi6_write(host, USDHI6_SD_INFO1_MASK, host->status_mask);
+ usdhi6_write(host, USDHI6_SD_INFO2_MASK, host->status2_mask);
+}
+
+static void usdhi6_wait_for_resp(struct usdhi6_host *host)
+{
+ usdhi6_irq_enable(host, USDHI6_SD_INFO1_RSP_END |
+ USDHI6_SD_INFO1_ACCESS_END | USDHI6_SD_INFO1_CARD_CD,
+ USDHI6_SD_INFO2_ERR);
+}
+
+static void usdhi6_wait_for_brwe(struct usdhi6_host *host, bool read)
+{
+ usdhi6_irq_enable(host, USDHI6_SD_INFO1_ACCESS_END |
+ USDHI6_SD_INFO1_CARD_CD, USDHI6_SD_INFO2_ERR |
+ (read ? USDHI6_SD_INFO2_BRE : USDHI6_SD_INFO2_BWE));
+}
+
+static void usdhi6_only_cd(struct usdhi6_host *host)
+{
+ /* Mask all except card hotplug */
+ usdhi6_irq_enable(host, USDHI6_SD_INFO1_CARD_CD, 0);
+}
+
+static void usdhi6_mask_all(struct usdhi6_host *host)
+{
+ usdhi6_irq_enable(host, 0, 0);
+}
+
+static int usdhi6_error_code(struct usdhi6_host *host)
+{
+ u32 err;
+
+ usdhi6_write(host, USDHI6_SD_STOP, USDHI6_SD_STOP_STP);
+
+ if (host->io_error &
+ (USDHI6_SD_INFO2_RSP_TOUT | USDHI6_SD_INFO2_TOUT)) {
+ u32 rsp54 = usdhi6_read(host, USDHI6_SD_RSP54);
+ int opc = host->mrq ? host->mrq->cmd->opcode : -1;
+
+ err = usdhi6_read(host, USDHI6_SD_ERR_STS2);
+ /* Response timeout is often normal, don't spam the log */
+ if (host->wait == USDHI6_WAIT_FOR_CMD)
+ dev_dbg(mmc_dev(host->mmc),
+ "T-out sts 0x%x, resp 0x%x, state %u, CMD%d\n",
+ err, rsp54, host->wait, opc);
+ else
+ dev_warn(mmc_dev(host->mmc),
+ "T-out sts 0x%x, resp 0x%x, state %u, CMD%d\n",
+ err, rsp54, host->wait, opc);
+ return -ETIMEDOUT;
+ }
+
+ err = usdhi6_read(host, USDHI6_SD_ERR_STS1);
+ if (err != USDHI6_SD_ERR_STS1_CRC_NO_ERROR)
+ dev_warn(mmc_dev(host->mmc), "Err sts 0x%x, state %u, CMD%d\n",
+ err, host->wait, host->mrq ? host->mrq->cmd->opcode : -1);
+ if (host->io_error & USDHI6_SD_INFO2_ILA)
+ return -EILSEQ;
+
+ return -EIO;
+}
+
+/* Scatter-Gather management */
+
+/*
+ * In PIO mode we have to map each page separately, using kmap(). That way
+ * adjacent pages are mapped to non-adjacent virtual addresses. That's why we
+ * have to use a bounce buffer for blocks, crossing page boundaries. Such blocks
+ * have been observed with an SDIO WiFi card (b43 driver).
+ */
+static void usdhi6_blk_bounce(struct usdhi6_host *host,
+ struct scatterlist *sg)
+{
+ struct mmc_data *data = host->mrq->data;
+ size_t blk_head = host->head_len;
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): CMD%u of %u SG: %ux%u @ 0x%x\n",
+ __func__, host->mrq->cmd->opcode, data->sg_len,
+ data->blksz, data->blocks, sg->offset);
+
+ host->head_pg.page = host->pg.page;
+ host->head_pg.mapped = host->pg.mapped;
+ host->pg.page = nth_page(host->pg.page, 1);
+ host->pg.mapped = kmap(host->pg.page);
+
+ host->blk_page = host->bounce_buf;
+ host->offset = 0;
+
+ if (data->flags & MMC_DATA_READ)
+ return;
+
+ memcpy(host->bounce_buf, host->head_pg.mapped + PAGE_SIZE - blk_head,
+ blk_head);
+ memcpy(host->bounce_buf + blk_head, host->pg.mapped,
+ data->blksz - blk_head);
+}
+
+/* Only called for multiple block IO */
+static void usdhi6_sg_prep(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_data *data = mrq->data;
+
+ usdhi6_write(host, USDHI6_SD_SECCNT, data->blocks);
+
+ host->sg = data->sg;
+ /* TODO: if we always map, this is redundant */
+ host->offset = host->sg->offset;
+}
+
+/* Map the first page in an SG segment: common for multiple and single block IO */
+static void *usdhi6_sg_map(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+ struct scatterlist *sg = data->sg_len > 1 ? host->sg : data->sg;
+ size_t head = PAGE_SIZE - sg->offset;
+ size_t blk_head = head % data->blksz;
+
+ WARN(host->pg.page, "%p not properly unmapped!\n", host->pg.page);
+ if (WARN(sg_dma_len(sg) % data->blksz,
+ "SG size %zd isn't a multiple of block size %zd\n",
+ sg_dma_len(sg), data->blksz))
+ return NULL;
+
+ host->pg.page = sg_page(sg);
+ host->pg.mapped = kmap(host->pg.page);
+ host->offset = sg->offset;
+
+ /*
+ * Block size must be a power of 2 for multi-block transfers,
+ * therefore blk_head is equal for all pages in this SG
+ */
+ host->head_len = blk_head;
+
+ if (head < data->blksz)
+ /*
+ * The first block in the SG crosses a page boundary.
+ * Max blksz = 512, so blocks can only span 2 pages
+ */
+ usdhi6_blk_bounce(host, sg);
+ else
+ host->blk_page = host->pg.mapped;
+
+ dev_dbg(mmc_dev(host->mmc), "Mapped %p (%lx) at %p + %u for CMD%u @ 0x%p\n",
+ host->pg.page, page_to_pfn(host->pg.page), host->pg.mapped,
+ sg->offset, host->mrq->cmd->opcode, host->mrq);
+
+ return host->blk_page + host->offset;
+}
+
+/* Unmap the current page: common for multiple and single block IO */
+static void usdhi6_sg_unmap(struct usdhi6_host *host, bool force)
+{
+ struct mmc_data *data = host->mrq->data;
+ struct page *page = host->head_pg.page;
+
+ if (page) {
+ /* Previous block was cross-page boundary */
+ struct scatterlist *sg = data->sg_len > 1 ?
+ host->sg : data->sg;
+ size_t blk_head = host->head_len;
+
+ if (!data->error && data->flags & MMC_DATA_READ) {
+ memcpy(host->head_pg.mapped + PAGE_SIZE - blk_head,
+ host->bounce_buf, blk_head);
+ memcpy(host->pg.mapped, host->bounce_buf + blk_head,
+ data->blksz - blk_head);
+ }
+
+ flush_dcache_page(page);
+ kunmap(page);
+
+ host->head_pg.page = NULL;
+
+ if (!force && sg_dma_len(sg) + sg->offset >
+ (host->page_idx << PAGE_SHIFT) + data->blksz - blk_head)
+ /* More blocks in this SG, don't unmap the next page */
+ return;
+ }
+
+ page = host->pg.page;
+ if (!page)
+ return;
+
+ flush_dcache_page(page);
+ kunmap(page);
+
+ host->pg.page = NULL;
+}
+
+/* Called from MMC_WRITE_MULTIPLE_BLOCK or MMC_READ_MULTIPLE_BLOCK */
+static void usdhi6_sg_advance(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+ size_t done, total;
+
+ /* New offset: set at the end of the previous block */
+ if (host->head_pg.page) {
+ /* Finished a cross-page block, jump to the new page */
+ host->page_idx++;
+ host->offset = data->blksz - host->head_len;
+ host->blk_page = host->pg.mapped;
+ usdhi6_sg_unmap(host, false);
+ } else {
+ host->offset += data->blksz;
+ /* The completed block didn't cross a page boundary */
+ if (host->offset == PAGE_SIZE) {
+ /* If required, we'll map the page below */
+ host->offset = 0;
+ host->page_idx++;
+ }
+ }
+
+ /*
+ * Now host->blk_page + host->offset point at the end of our last block
+ * and host->page_idx is the index of the page, in which our new block
+ * is located, if any
+ */
+
+ done = (host->page_idx << PAGE_SHIFT) + host->offset;
+ total = host->sg->offset + sg_dma_len(host->sg);
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): %zu of %zu @ %u\n", __func__,
+ done, total, host->offset);
+
+ if (done < total && host->offset) {
+ /* More blocks in this page */
+ if (host->offset + data->blksz > PAGE_SIZE)
+ /* We approached at a block, that spans 2 pages */
+ usdhi6_blk_bounce(host, host->sg);
+
+ return;
+ }
+
+ /* Finished current page or an SG segment */
+ usdhi6_sg_unmap(host, false);
+
+ if (done == total) {
+ /*
+ * End of an SG segment or the complete SG: jump to the next
+ * segment, we'll map it later in usdhi6_blk_read() or
+ * usdhi6_blk_write()
+ */
+ struct scatterlist *next = sg_next(host->sg);
+
+ host->page_idx = 0;
+
+ if (!next)
+ host->wait = USDHI6_WAIT_FOR_DATA_END;
+ host->sg = next;
+
+ if (WARN(next && sg_dma_len(next) % data->blksz,
+ "SG size %zd isn't a multiple of block size %zd\n",
+ sg_dma_len(next), data->blksz))
+ data->error = -EINVAL;
+
+ return;
+ }
+
+ /* We cannot get here after crossing a page border */
+
+ /* Next page in the same SG */
+ host->pg.page = nth_page(sg_page(host->sg), host->page_idx);
+ host->pg.mapped = kmap(host->pg.page);
+ host->blk_page = host->pg.mapped;
+
+ dev_dbg(mmc_dev(host->mmc), "Mapped %p (%lx) at %p for CMD%u @ 0x%p\n",
+ host->pg.page, page_to_pfn(host->pg.page), host->pg.mapped,
+ host->mrq->cmd->opcode, host->mrq);
+}
+
+/* DMA handling */
+
+static void usdhi6_dma_release(struct usdhi6_host *host)
+{
+ host->dma_active = false;
+ if (host->chan_tx) {
+ struct dma_chan *chan = host->chan_tx;
+ host->chan_tx = NULL;
+ dma_release_channel(chan);
+ }
+ if (host->chan_rx) {
+ struct dma_chan *chan = host->chan_rx;
+ host->chan_rx = NULL;
+ dma_release_channel(chan);
+ }
+}
+
+static void usdhi6_dma_stop_unmap(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+
+ if (!host->dma_active)
+ return;
+
+ usdhi6_write(host, USDHI6_CC_EXT_MODE, 0);
+ host->dma_active = false;
+
+ if (data->flags & MMC_DATA_READ)
+ dma_unmap_sg(host->chan_rx->device->dev, data->sg,
+ data->sg_len, DMA_FROM_DEVICE);
+ else
+ dma_unmap_sg(host->chan_tx->device->dev, data->sg,
+ data->sg_len, DMA_TO_DEVICE);
+}
+
+static void usdhi6_dma_complete(void *arg)
+{
+ struct usdhi6_host *host = arg;
+ struct mmc_request *mrq = host->mrq;
+
+ if (WARN(!mrq || !mrq->data, "%s: NULL data in DMA completion for %p!\n",
+ dev_name(mmc_dev(host->mmc)), mrq))
+ return;
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): CMD%u DMA completed\n", __func__,
+ mrq->cmd->opcode);
+
+ usdhi6_dma_stop_unmap(host);
+ usdhi6_wait_for_brwe(host, mrq->data->flags & MMC_DATA_READ);
+}
+
+static int usdhi6_dma_setup(struct usdhi6_host *host, struct dma_chan *chan,
+ enum dma_transfer_direction dir)
+{
+ struct mmc_data *data = host->mrq->data;
+ struct scatterlist *sg = data->sg;
+ struct dma_async_tx_descriptor *desc = NULL;
+ dma_cookie_t cookie = -EINVAL;
+ enum dma_data_direction data_dir;
+ int ret;
+
+ switch (dir) {
+ case DMA_MEM_TO_DEV:
+ data_dir = DMA_TO_DEVICE;
+ break;
+ case DMA_DEV_TO_MEM:
+ data_dir = DMA_FROM_DEVICE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ ret = dma_map_sg(chan->device->dev, sg, data->sg_len, data_dir);
+ if (ret > 0) {
+ host->dma_active = true;
+ desc = dmaengine_prep_slave_sg(chan, sg, ret, dir,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+ }
+
+ if (desc) {
+ desc->callback = usdhi6_dma_complete;
+ desc->callback_param = host;
+ cookie = dmaengine_submit(desc);
+ }
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): mapped %d -> %d, cookie %d @ %p\n",
+ __func__, data->sg_len, ret, cookie, desc);
+
+ if (cookie < 0) {
+ /* DMA failed, fall back to PIO */
+ if (ret >= 0)
+ ret = cookie;
+ usdhi6_dma_release(host);
+ dev_warn(mmc_dev(host->mmc),
+ "DMA failed: %d, falling back to PIO\n", ret);
+ }
+
+ return cookie;
+}
+
+static int usdhi6_dma_start(struct usdhi6_host *host)
+{
+ if (!host->chan_rx || !host->chan_tx)
+ return -ENODEV;
+
+ if (host->mrq->data->flags & MMC_DATA_READ)
+ return usdhi6_dma_setup(host, host->chan_rx, DMA_DEV_TO_MEM);
+
+ return usdhi6_dma_setup(host, host->chan_tx, DMA_MEM_TO_DEV);
+}
+
+static void usdhi6_dma_kill(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): SG of %u: %ux%u\n",
+ __func__, data->sg_len, data->blocks, data->blksz);
+ /* Abort DMA */
+ if (data->flags & MMC_DATA_READ)
+ dmaengine_terminate_all(host->chan_rx);
+ else
+ dmaengine_terminate_all(host->chan_tx);
+}
+
+static void usdhi6_dma_check_error(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): IO error %d, status 0x%x\n",
+ __func__, host->io_error, usdhi6_read(host, USDHI6_SD_INFO1));
+
+ if (host->io_error) {
+ data->error = usdhi6_error_code(host);
+ data->bytes_xfered = 0;
+ usdhi6_dma_kill(host);
+ usdhi6_dma_release(host);
+ dev_warn(mmc_dev(host->mmc),
+ "DMA failed: %d, falling back to PIO\n", data->error);
+ return;
+ }
+
+ /*
+ * The datasheet tells us to check a response from the card, whereas
+ * responses only come after the command phase, not after the data
+ * phase. Let's check anyway.
+ */
+ if (host->irq_status & USDHI6_SD_INFO1_RSP_END)
+ dev_warn(mmc_dev(host->mmc), "Unexpected response received!\n");
+}
+
+static void usdhi6_dma_kick(struct usdhi6_host *host)
+{
+ if (host->mrq->data->flags & MMC_DATA_READ)
+ dma_async_issue_pending(host->chan_rx);
+ else
+ dma_async_issue_pending(host->chan_tx);
+}
+
+static void usdhi6_dma_request(struct usdhi6_host *host, phys_addr_t start)
+{
+ struct dma_slave_config cfg = {
+ .src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ .dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES,
+ };
+ int ret;
+
+ host->chan_tx = dma_request_slave_channel(mmc_dev(host->mmc), "tx");
+ dev_dbg(mmc_dev(host->mmc), "%s: TX: got channel %p\n", __func__,
+ host->chan_tx);
+
+ if (!host->chan_tx)
+ return;
+
+ cfg.direction = DMA_MEM_TO_DEV;
+ cfg.dst_addr = start + USDHI6_SD_BUF0;
+ cfg.dst_maxburst = 128; /* 128 words * 4 bytes = 512 bytes */
+ cfg.src_addr = 0;
+ ret = dmaengine_slave_config(host->chan_tx, &cfg);
+ if (ret < 0)
+ goto e_release_tx;
+
+ host->chan_rx = dma_request_slave_channel(mmc_dev(host->mmc), "rx");
+ dev_dbg(mmc_dev(host->mmc), "%s: RX: got channel %p\n", __func__,
+ host->chan_rx);
+
+ if (!host->chan_rx)
+ goto e_release_tx;
+
+ cfg.direction = DMA_DEV_TO_MEM;
+ cfg.src_addr = cfg.dst_addr;
+ cfg.src_maxburst = 128; /* 128 words * 4 bytes = 512 bytes */
+ cfg.dst_addr = 0;
+ ret = dmaengine_slave_config(host->chan_rx, &cfg);
+ if (ret < 0)
+ goto e_release_rx;
+
+ return;
+
+e_release_rx:
+ dma_release_channel(host->chan_rx);
+ host->chan_rx = NULL;
+e_release_tx:
+ dma_release_channel(host->chan_tx);
+ host->chan_tx = NULL;
+}
+
+/* API helpers */
+
+static void usdhi6_clk_set(struct usdhi6_host *host, struct mmc_ios *ios)
+{
+ unsigned long rate = ios->clock;
+ u32 val;
+ unsigned int i;
+
+ for (i = 1000; i; i--) {
+ if (usdhi6_read(host, USDHI6_SD_INFO2) & USDHI6_SD_INFO2_SCLKDIVEN)
+ break;
+ usleep_range(10, 100);
+ }
+
+ if (!i) {
+ dev_err(mmc_dev(host->mmc), "SD bus busy, clock set aborted\n");
+ return;
+ }
+
+ val = usdhi6_read(host, USDHI6_SD_CLK_CTRL) & ~USDHI6_SD_CLK_CTRL_DIV_MASK;
+
+ if (rate) {
+ unsigned long new_rate;
+
+ if (host->imclk <= rate) {
+ if (ios->timing != MMC_TIMING_UHS_DDR50) {
+ /* Cannot have 1-to-1 clock in DDR mode */
+ new_rate = host->imclk;
+ val |= 0xff;
+ } else {
+ new_rate = host->imclk / 2;
+ }
+ } else {
+ unsigned long div =
+ roundup_pow_of_two(DIV_ROUND_UP(host->imclk, rate));
+ val |= div >> 2;
+ new_rate = host->imclk / div;
+ }
+
+ if (host->rate == new_rate)
+ return;
+
+ host->rate = new_rate;
+
+ dev_dbg(mmc_dev(host->mmc), "target %lu, div %u, set %lu\n",
+ rate, (val & 0xff) << 2, new_rate);
+ }
+
+ /*
+ * if old or new rate is equal to input rate, have to switch the clock
+ * off before changing and on after
+ */
+ if (host->imclk == rate || host->imclk == host->rate || !rate)
+ usdhi6_write(host, USDHI6_SD_CLK_CTRL,
+ val & ~USDHI6_SD_CLK_CTRL_SCLKEN);
+
+ if (!rate) {
+ host->rate = 0;
+ return;
+ }
+
+ usdhi6_write(host, USDHI6_SD_CLK_CTRL, val);
+
+ if (host->imclk == rate || host->imclk == host->rate ||
+ !(val & USDHI6_SD_CLK_CTRL_SCLKEN))
+ usdhi6_write(host, USDHI6_SD_CLK_CTRL,
+ val | USDHI6_SD_CLK_CTRL_SCLKEN);
+}
+
+static void usdhi6_set_power(struct usdhi6_host *host, struct mmc_ios *ios)
+{
+ struct mmc_host *mmc = host->mmc;
+
+ if (!IS_ERR(mmc->supply.vmmc))
+ /* Errors ignored... */
+ mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
+ ios->power_mode ? ios->vdd : 0);
+}
+
+static int usdhi6_reset(struct usdhi6_host *host)
+{
+ int i;
+
+ usdhi6_write(host, USDHI6_SOFT_RST, USDHI6_SOFT_RST_RESERVED);
+ cpu_relax();
+ usdhi6_write(host, USDHI6_SOFT_RST, USDHI6_SOFT_RST_RESERVED | USDHI6_SOFT_RST_RESET);
+ for (i = 1000; i; i--)
+ if (usdhi6_read(host, USDHI6_SOFT_RST) & USDHI6_SOFT_RST_RESET)
+ break;
+
+ return i ? 0 : -ETIMEDOUT;
+}
+
+static void usdhi6_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+ u32 option, mode;
+ int ret;
+
+ dev_dbg(mmc_dev(mmc), "%uHz, OCR: %u, power %u, bus-width %u, timing %u\n",
+ ios->clock, ios->vdd, ios->power_mode, ios->bus_width, ios->timing);
+
+ switch (ios->power_mode) {
+ case MMC_POWER_OFF:
+ usdhi6_set_power(host, ios);
+ usdhi6_only_cd(host);
+ break;
+ case MMC_POWER_UP:
+ /*
+ * We only also touch USDHI6_SD_OPTION from .request(), which
+ * cannot race with MMC_POWER_UP
+ */
+ ret = usdhi6_reset(host);
+ if (ret < 0) {
+ dev_err(mmc_dev(mmc), "Cannot reset the interface!\n");
+ } else {
+ usdhi6_set_power(host, ios);
+ usdhi6_only_cd(host);
+ }
+ break;
+ case MMC_POWER_ON:
+ option = usdhi6_read(host, USDHI6_SD_OPTION);
+ /*
+ * The eMMC standard only allows 4 or 8 bits in the DDR mode,
+ * the same probably holds for SD cards. We check here anyway,
+ * since the datasheet explicitly requires 4 bits for DDR.
+ */
+ if (ios->bus_width == MMC_BUS_WIDTH_1) {
+ if (ios->timing == MMC_TIMING_UHS_DDR50)
+ dev_err(mmc_dev(mmc),
+ "4 bits are required for DDR\n");
+ option |= USDHI6_SD_OPTION_WIDTH_1;
+ mode = 0;
+ } else {
+ option &= ~USDHI6_SD_OPTION_WIDTH_1;
+ mode = ios->timing == MMC_TIMING_UHS_DDR50;
+ }
+ usdhi6_write(host, USDHI6_SD_OPTION, option);
+ usdhi6_write(host, USDHI6_SDIF_MODE, mode);
+ break;
+ }
+
+ if (host->rate != ios->clock)
+ usdhi6_clk_set(host, ios);
+}
+
+/* This is data timeout. Response timeout is fixed to 640 clock cycles */
+static void usdhi6_timeout_set(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ u32 val;
+ unsigned long ticks;
+
+ if (!mrq->data)
+ ticks = host->rate / 1000 * mrq->cmd->busy_timeout;
+ else
+ ticks = host->rate / 1000000 * (mrq->data->timeout_ns / 1000) +
+ mrq->data->timeout_clks;
+
+ if (!ticks || ticks > 1 << 27)
+ /* Max timeout */
+ val = 14;
+ else if (ticks < 1 << 13)
+ /* Min timeout */
+ val = 0;
+ else
+ val = order_base_2(ticks) - 13;
+
+ dev_dbg(mmc_dev(host->mmc), "Set %s timeout %lu ticks @ %lu Hz\n",
+ mrq->data ? "data" : "cmd", ticks, host->rate);
+
+ /* Timeout Counter mask: 0xf0 */
+ usdhi6_write(host, USDHI6_SD_OPTION, (val << USDHI6_SD_OPTION_TIMEOUT_SHIFT) |
+ (usdhi6_read(host, USDHI6_SD_OPTION) & ~USDHI6_SD_OPTION_TIMEOUT_MASK));
+}
+
+static void usdhi6_request_done(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_data *data = mrq->data;
+
+ if (WARN(host->pg.page || host->head_pg.page,
+ "Page %p or %p not unmapped: wait %u, CMD%d(%c) @ +0x%x %ux%u in SG%u!\n",
+ host->pg.page, host->head_pg.page, host->wait, mrq->cmd->opcode,
+ data ? (data->flags & MMC_DATA_READ ? 'R' : 'W') : '-',
+ data ? host->offset : 0, data ? data->blocks : 0,
+ data ? data->blksz : 0, data ? data->sg_len : 0))
+ usdhi6_sg_unmap(host, true);
+
+ if (mrq->cmd->error ||
+ (data && data->error) ||
+ (mrq->stop && mrq->stop->error))
+ dev_dbg(mmc_dev(host->mmc), "%s(CMD%d: %ux%u): err %d %d %d\n",
+ __func__, mrq->cmd->opcode, data ? data->blocks : 0,
+ data ? data->blksz : 0,
+ mrq->cmd->error,
+ data ? data->error : 1,
+ mrq->stop ? mrq->stop->error : 1);
+
+ /* Disable DMA */
+ usdhi6_write(host, USDHI6_CC_EXT_MODE, 0);
+ host->wait = USDHI6_WAIT_FOR_REQUEST;
+ host->mrq = NULL;
+
+ mmc_request_done(host->mmc, mrq);
+}
+
+static int usdhi6_cmd_flags(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_command *cmd = mrq->cmd;
+ u16 opc = cmd->opcode;
+
+ if (host->app_cmd) {
+ host->app_cmd = false;
+ opc |= USDHI6_SD_CMD_APP;
+ }
+
+ if (mrq->data) {
+ opc |= USDHI6_SD_CMD_DATA;
+
+ if (mrq->data->flags & MMC_DATA_READ)
+ opc |= USDHI6_SD_CMD_READ;
+
+ if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+ cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+ (cmd->opcode == SD_IO_RW_EXTENDED &&
+ mrq->data->blocks > 1)) {
+ opc |= USDHI6_SD_CMD_MULTI;
+ if (!mrq->stop)
+ opc |= USDHI6_SD_CMD_CMD12_AUTO_OFF;
+ }
+
+ switch (mmc_resp_type(cmd)) {
+ case MMC_RSP_NONE:
+ opc |= USDHI6_SD_CMD_MODE_RSP_NONE;
+ break;
+ case MMC_RSP_R1:
+ opc |= USDHI6_SD_CMD_MODE_RSP_R1;
+ break;
+ case MMC_RSP_R1B:
+ opc |= USDHI6_SD_CMD_MODE_RSP_R1B;
+ break;
+ case MMC_RSP_R2:
+ opc |= USDHI6_SD_CMD_MODE_RSP_R2;
+ break;
+ case MMC_RSP_R3:
+ opc |= USDHI6_SD_CMD_MODE_RSP_R3;
+ break;
+ default:
+ dev_warn(mmc_dev(host->mmc),
+ "Unknown response type %d\n",
+ mmc_resp_type(cmd));
+ return -EINVAL;
+ }
+ }
+
+ return opc;
+}
+
+static int usdhi6_rq_start(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_command *cmd = mrq->cmd;
+ struct mmc_data *data = mrq->data;
+ int opc = usdhi6_cmd_flags(host);
+ int i;
+
+ if (opc < 0)
+ return opc;
+
+ for (i = 1000; i; i--) {
+ if (!(usdhi6_read(host, USDHI6_SD_INFO2) & USDHI6_SD_INFO2_CBSY))
+ break;
+ usleep_range(10, 100);
+ }
+
+ if (!i) {
+ dev_dbg(mmc_dev(host->mmc), "Command active, request aborted\n");
+ return -EAGAIN;
+ }
+
+ if (data) {
+ bool use_dma;
+ int ret = 0;
+
+ host->page_idx = 0;
+
+ if (cmd->opcode == SD_IO_RW_EXTENDED && data->blocks > 1) {
+ switch (data->blksz) {
+ case 512:
+ break;
+ case 32:
+ case 64:
+ case 128:
+ case 256:
+ if (mrq->stop)
+ ret = -EINVAL;
+ break;
+ default:
+ ret = -EINVAL;
+ }
+ } else if ((cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+ cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK) &&
+ data->blksz != 512) {
+ ret = -EINVAL;
+ }
+
+ if (ret < 0) {
+ dev_warn(mmc_dev(host->mmc), "%s(): %u blocks of %u bytes\n",
+ __func__, data->blocks, data->blksz);
+ return -EINVAL;
+ }
+
+ if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+ cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+ (cmd->opcode == SD_IO_RW_EXTENDED &&
+ data->blocks > 1))
+ usdhi6_sg_prep(host);
+
+ usdhi6_write(host, USDHI6_SD_SIZE, data->blksz);
+
+ if ((data->blksz >= USDHI6_MIN_DMA ||
+ data->blocks > 1) &&
+ (data->blksz % 4 ||
+ data->sg->offset % 4))
+ dev_dbg(mmc_dev(host->mmc),
+ "Bad SG of %u: %ux%u @ %u\n", data->sg_len,
+ data->blksz, data->blocks, data->sg->offset);
+
+ /* Enable DMA for USDHI6_MIN_DMA bytes or more */
+ use_dma = data->blksz >= USDHI6_MIN_DMA &&
+ !(data->blksz % 4) &&
+ usdhi6_dma_start(host) >= DMA_MIN_COOKIE;
+
+ if (use_dma)
+ usdhi6_write(host, USDHI6_CC_EXT_MODE, USDHI6_CC_EXT_MODE_SDRW);
+
+ dev_dbg(mmc_dev(host->mmc),
+ "%s(): request opcode %u, %u blocks of %u bytes in %u segments, %s %s @+0x%x%s\n",
+ __func__, cmd->opcode, data->blocks, data->blksz,
+ data->sg_len, use_dma ? "DMA" : "PIO",
+ data->flags & MMC_DATA_READ ? "read" : "write",
+ data->sg->offset, mrq->stop ? " + stop" : "");
+ } else {
+ dev_dbg(mmc_dev(host->mmc), "%s(): request opcode %u\n",
+ __func__, cmd->opcode);
+ }
+
+ /* We have to get a command completion interrupt with DMA too */
+ usdhi6_wait_for_resp(host);
+
+ host->wait = USDHI6_WAIT_FOR_CMD;
+ schedule_delayed_work(&host->timeout_work, host->timeout);
+
+ /* SEC bit is required to enable block counting by the core */
+ usdhi6_write(host, USDHI6_SD_STOP,
+ data && data->blocks > 1 ? USDHI6_SD_STOP_SEC : 0);
+ usdhi6_write(host, USDHI6_SD_ARG, cmd->arg);
+
+ /* Kick command execution */
+ usdhi6_write(host, USDHI6_SD_CMD, opc);
+
+ return 0;
+}
+
+static void usdhi6_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+ int ret;
+
+ cancel_delayed_work_sync(&host->timeout_work);
+
+ host->mrq = mrq;
+ host->sg = NULL;
+
+ usdhi6_timeout_set(host);
+ ret = usdhi6_rq_start(host);
+ if (ret < 0) {
+ mrq->cmd->error = ret;
+ usdhi6_request_done(host);
+ }
+}
+
+static int usdhi6_get_cd(struct mmc_host *mmc)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+ /* Read is atomic, no need to lock */
+ u32 status = usdhi6_read(host, USDHI6_SD_INFO1) & USDHI6_SD_INFO1_CD;
+
+/*
+ * level status.CD CD_ACTIVE_HIGH card present
+ * 1 0 0 0
+ * 1 0 1 1
+ * 0 1 0 1
+ * 0 1 1 0
+ */
+ return !status ^ !(mmc->caps2 & MMC_CAP2_CD_ACTIVE_HIGH);
+}
+
+static int usdhi6_get_ro(struct mmc_host *mmc)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+ /* No locking as above */
+ u32 status = usdhi6_read(host, USDHI6_SD_INFO1) & USDHI6_SD_INFO1_WP;
+
+/*
+ * level status.WP RO_ACTIVE_HIGH card read-only
+ * 1 0 0 0
+ * 1 0 1 1
+ * 0 1 0 1
+ * 0 1 1 0
+ */
+ return !status ^ !(mmc->caps2 & MMC_CAP2_RO_ACTIVE_HIGH);
+}
+
+static void usdhi6_enable_sdio_irq(struct mmc_host *mmc, int enable)
+{
+ struct usdhi6_host *host = mmc_priv(mmc);
+
+ dev_dbg(mmc_dev(mmc), "%s(): %sable\n", __func__, enable ? "en" : "dis");
+
+ if (enable) {
+ host->sdio_mask = USDHI6_SDIO_INFO1_IRQ & ~USDHI6_SDIO_INFO1_IOIRQ;
+ usdhi6_write(host, USDHI6_SDIO_INFO1_MASK, host->sdio_mask);
+ usdhi6_write(host, USDHI6_SDIO_MODE, 1);
+ } else {
+ usdhi6_write(host, USDHI6_SDIO_MODE, 0);
+ usdhi6_write(host, USDHI6_SDIO_INFO1_MASK, USDHI6_SDIO_INFO1_IRQ);
+ host->sdio_mask = USDHI6_SDIO_INFO1_IRQ;
+ }
+}
+
+static struct mmc_host_ops usdhi6_ops = {
+ .request = usdhi6_request,
+ .set_ios = usdhi6_set_ios,
+ .get_cd = usdhi6_get_cd,
+ .get_ro = usdhi6_get_ro,
+ .enable_sdio_irq = usdhi6_enable_sdio_irq,
+};
+
+/* State machine handlers */
+
+static void usdhi6_resp_cmd12(struct usdhi6_host *host)
+{
+ struct mmc_command *cmd = host->mrq->stop;
+ cmd->resp[0] = usdhi6_read(host, USDHI6_SD_RSP10);
+}
+
+static void usdhi6_resp_read(struct usdhi6_host *host)
+{
+ struct mmc_command *cmd = host->mrq->cmd;
+ u32 *rsp = cmd->resp, tmp = 0;
+ int i;
+
+/*
+ * RSP10 39-8
+ * RSP32 71-40
+ * RSP54 103-72
+ * RSP76 127-104
+ * R2-type response:
+ * resp[0] = r[127..96]
+ * resp[1] = r[95..64]
+ * resp[2] = r[63..32]
+ * resp[3] = r[31..0]
+ * Other responses:
+ * resp[0] = r[39..8]
+ */
+
+ if (mmc_resp_type(cmd) == MMC_RSP_NONE)
+ return;
+
+ if (!(host->irq_status & USDHI6_SD_INFO1_RSP_END)) {
+ dev_err(mmc_dev(host->mmc),
+ "CMD%d: response expected but is missing!\n", cmd->opcode);
+ return;
+ }
+
+ if (mmc_resp_type(cmd) & MMC_RSP_136)
+ for (i = 0; i < 4; i++) {
+ if (i)
+ rsp[3 - i] = tmp >> 24;
+ tmp = usdhi6_read(host, USDHI6_SD_RSP10 + i * 8);
+ rsp[3 - i] |= tmp << 8;
+ }
+ else if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+ cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK)
+ /* Read RSP54 to avoid conflict with auto CMD12 */
+ rsp[0] = usdhi6_read(host, USDHI6_SD_RSP54);
+ else
+ rsp[0] = usdhi6_read(host, USDHI6_SD_RSP10);
+
+ dev_dbg(mmc_dev(host->mmc), "Response 0x%x\n", rsp[0]);
+}
+
+static int usdhi6_blk_read(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+ u32 *p;
+ int i, rest;
+
+ if (host->io_error) {
+ data->error = usdhi6_error_code(host);
+ goto error;
+ }
+
+ if (host->pg.page) {
+ p = host->blk_page + host->offset;
+ } else {
+ p = usdhi6_sg_map(host);
+ if (!p) {
+ data->error = -ENOMEM;
+ goto error;
+ }
+ }
+
+ for (i = 0; i < data->blksz / 4; i++, p++)
+ *p = usdhi6_read(host, USDHI6_SD_BUF0);
+
+ rest = data->blksz % 4;
+ for (i = 0; i < (rest + 1) / 2; i++) {
+ u16 d = usdhi6_read16(host, USDHI6_SD_BUF0);
+ ((u8 *)p)[2 * i] = ((u8 *)&d)[0];
+ if (rest > 1 && !i)
+ ((u8 *)p)[2 * i + 1] = ((u8 *)&d)[1];
+ }
+
+ return 0;
+
+error:
+ dev_dbg(mmc_dev(host->mmc), "%s(): %d\n", __func__, data->error);
+ host->wait = USDHI6_WAIT_FOR_REQUEST;
+ return data->error;
+}
+
+static int usdhi6_blk_write(struct usdhi6_host *host)
+{
+ struct mmc_data *data = host->mrq->data;
+ u32 *p;
+ int i, rest;
+
+ if (host->io_error) {
+ data->error = usdhi6_error_code(host);
+ goto error;
+ }
+
+ if (host->pg.page) {
+ p = host->blk_page + host->offset;
+ } else {
+ p = usdhi6_sg_map(host);
+ if (!p) {
+ data->error = -ENOMEM;
+ goto error;
+ }
+ }
+
+ for (i = 0; i < data->blksz / 4; i++, p++)
+ usdhi6_write(host, USDHI6_SD_BUF0, *p);
+
+ rest = data->blksz % 4;
+ for (i = 0; i < (rest + 1) / 2; i++) {
+ u16 d;
+ ((u8 *)&d)[0] = ((u8 *)p)[2 * i];
+ if (rest > 1 && !i)
+ ((u8 *)&d)[1] = ((u8 *)p)[2 * i + 1];
+ else
+ ((u8 *)&d)[1] = 0;
+ usdhi6_write16(host, USDHI6_SD_BUF0, d);
+ }
+
+ return 0;
+
+error:
+ dev_dbg(mmc_dev(host->mmc), "%s(): %d\n", __func__, data->error);
+ host->wait = USDHI6_WAIT_FOR_REQUEST;
+ return data->error;
+}
+
+static int usdhi6_stop_cmd(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+
+ switch (mrq->cmd->opcode) {
+ case MMC_READ_MULTIPLE_BLOCK:
+ case MMC_WRITE_MULTIPLE_BLOCK:
+ if (mrq->stop->opcode == MMC_STOP_TRANSMISSION) {
+ host->wait = USDHI6_WAIT_FOR_STOP;
+ return 0;
+ }
+ /* Unsupported STOP command */
+ default:
+ dev_err(mmc_dev(host->mmc),
+ "unsupported stop CMD%d for CMD%d\n",
+ mrq->stop->opcode, mrq->cmd->opcode);
+ mrq->stop->error = -EOPNOTSUPP;
+ }
+
+ return -EOPNOTSUPP;
+}
+
+static bool usdhi6_end_cmd(struct usdhi6_host *host)
+{
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_command *cmd = mrq->cmd;
+
+ if (host->io_error) {
+ cmd->error = usdhi6_error_code(host);
+ return false;
+ }
+
+ usdhi6_resp_read(host);
+
+ if (!mrq->data)
+ return false;
+
+ if (host->dma_active) {
+ usdhi6_dma_kick(host);
+ if (!mrq->stop)
+ host->wait = USDHI6_WAIT_FOR_DMA;
+ else if (usdhi6_stop_cmd(host) < 0)
+ return false;
+ } else if (mrq->data->flags & MMC_DATA_READ) {
+ if (cmd->opcode == MMC_READ_MULTIPLE_BLOCK ||
+ (cmd->opcode == SD_IO_RW_EXTENDED &&
+ mrq->data->blocks > 1))
+ host->wait = USDHI6_WAIT_FOR_MREAD;
+ else
+ host->wait = USDHI6_WAIT_FOR_READ;
+ } else {
+ if (cmd->opcode == MMC_WRITE_MULTIPLE_BLOCK ||
+ (cmd->opcode == SD_IO_RW_EXTENDED &&
+ mrq->data->blocks > 1))
+ host->wait = USDHI6_WAIT_FOR_MWRITE;
+ else
+ host->wait = USDHI6_WAIT_FOR_WRITE;
+ }
+
+ return true;
+}
+
+static bool usdhi6_read_block(struct usdhi6_host *host)
+{
+ /* ACCESS_END IRQ is already unmasked */
+ int ret = usdhi6_blk_read(host);
+
+ /*
+ * Have to force unmapping both pages: the single block could have been
+ * cross-page, in which case for single-block IO host->page_idx == 0.
+ * So, if we don't force, the second page won't be unmapped.
+ */
+ usdhi6_sg_unmap(host, true);
+
+ if (ret < 0)
+ return false;
+
+ host->wait = USDHI6_WAIT_FOR_DATA_END;
+ return true;
+}
+
+static bool usdhi6_mread_block(struct usdhi6_host *host)
+{
+ int ret = usdhi6_blk_read(host);
+
+ if (ret < 0)
+ return false;
+
+ usdhi6_sg_advance(host);
+
+ return !host->mrq->data->error &&
+ (host->wait != USDHI6_WAIT_FOR_DATA_END || !host->mrq->stop);
+}
+
+static bool usdhi6_write_block(struct usdhi6_host *host)
+{
+ int ret = usdhi6_blk_write(host);
+
+ /* See comment in usdhi6_read_block() */
+ usdhi6_sg_unmap(host, true);
+
+ if (ret < 0)
+ return false;
+
+ host->wait = USDHI6_WAIT_FOR_DATA_END;
+ return true;
+}
+
+static bool usdhi6_mwrite_block(struct usdhi6_host *host)
+{
+ int ret = usdhi6_blk_write(host);
+
+ if (ret < 0)
+ return false;
+
+ usdhi6_sg_advance(host);
+
+ return !host->mrq->data->error &&
+ (host->wait != USDHI6_WAIT_FOR_DATA_END || !host->mrq->stop);
+}
+
+/* Interrupt & timeout handlers */
+
+static irqreturn_t usdhi6_sd_bh(int irq, void *dev_id)
+{
+ struct usdhi6_host *host = dev_id;
+ struct mmc_request *mrq;
+ struct mmc_command *cmd;
+ struct mmc_data *data;
+ bool io_wait = false;
+
+ cancel_delayed_work_sync(&host->timeout_work);
+
+ mrq = host->mrq;
+ if (!mrq)
+ return IRQ_HANDLED;
+
+ cmd = mrq->cmd;
+ data = mrq->data;
+
+ switch (host->wait) {
+ case USDHI6_WAIT_FOR_REQUEST:
+ /* We're too late, the timeout has already kicked in */
+ return IRQ_HANDLED;
+ case USDHI6_WAIT_FOR_CMD:
+ /* Wait for data? */
+ io_wait = usdhi6_end_cmd(host);
+ break;
+ case USDHI6_WAIT_FOR_MREAD:
+ /* Wait for more data? */
+ io_wait = usdhi6_mread_block(host);
+ break;
+ case USDHI6_WAIT_FOR_READ:
+ /* Wait for data end? */
+ io_wait = usdhi6_read_block(host);
+ break;
+ case USDHI6_WAIT_FOR_MWRITE:
+ /* Wait data to write? */
+ io_wait = usdhi6_mwrite_block(host);
+ break;
+ case USDHI6_WAIT_FOR_WRITE:
+ /* Wait for data end? */
+ io_wait = usdhi6_write_block(host);
+ break;
+ case USDHI6_WAIT_FOR_DMA:
+ usdhi6_dma_check_error(host);
+ break;
+ case USDHI6_WAIT_FOR_STOP:
+ usdhi6_write(host, USDHI6_SD_STOP, 0);
+ if (host->io_error) {
+ int ret = usdhi6_error_code(host);
+ if (mrq->stop)
+ mrq->stop->error = ret;
+ else
+ mrq->data->error = ret;
+ dev_warn(mmc_dev(host->mmc), "%s(): %d\n", __func__, ret);
+ break;
+ }
+ usdhi6_resp_cmd12(host);
+ mrq->stop->error = 0;
+ break;
+ case USDHI6_WAIT_FOR_DATA_END:
+ if (host->io_error) {
+ mrq->data->error = usdhi6_error_code(host);
+ dev_warn(mmc_dev(host->mmc), "%s(): %d\n", __func__,
+ mrq->data->error);
+ }
+ break;
+ default:
+ cmd->error = -EFAULT;
+ dev_err(mmc_dev(host->mmc), "Invalid state %u\n", host->wait);
+ usdhi6_request_done(host);
+ return IRQ_HANDLED;
+ }
+
+ if (io_wait) {
+ schedule_delayed_work(&host->timeout_work, host->timeout);
+ /* Wait for more data or ACCESS_END */
+ if (!host->dma_active)
+ usdhi6_wait_for_brwe(host, mrq->data->flags & MMC_DATA_READ);
+ return IRQ_HANDLED;
+ }
+
+ if (!cmd->error) {
+ if (data) {
+ if (!data->error) {
+ if (host->wait != USDHI6_WAIT_FOR_STOP &&
+ host->mrq->stop &&
+ !host->mrq->stop->error &&
+ !usdhi6_stop_cmd(host)) {
+ /* Sending STOP */
+ usdhi6_wait_for_resp(host);
+
+ schedule_delayed_work(&host->timeout_work,
+ host->timeout);
+
+ return IRQ_HANDLED;
+ }
+
+ data->bytes_xfered = data->blocks * data->blksz;
+ } else {
+ /* Data error: might need to unmap the last page */
+ dev_warn(mmc_dev(host->mmc), "%s(): data error %d\n",
+ __func__, data->error);
+ usdhi6_sg_unmap(host, true);
+ }
+ } else if (cmd->opcode == MMC_APP_CMD) {
+ host->app_cmd = true;
+ }
+ }
+
+ usdhi6_request_done(host);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t usdhi6_sd(int irq, void *dev_id)
+{
+ struct usdhi6_host *host = dev_id;
+ u16 status, status2, error;
+
+ status = usdhi6_read(host, USDHI6_SD_INFO1) & ~host->status_mask &
+ ~USDHI6_SD_INFO1_CARD;
+ status2 = usdhi6_read(host, USDHI6_SD_INFO2) & ~host->status2_mask;
+
+ usdhi6_only_cd(host);
+
+ dev_dbg(mmc_dev(host->mmc),
+ "IRQ status = 0x%08x, status2 = 0x%08x\n", status, status2);
+
+ if (!status && !status2)
+ return IRQ_NONE;
+
+ error = status2 & USDHI6_SD_INFO2_ERR;
+
+ /* Ack / clear interrupts */
+ if (USDHI6_SD_INFO1_IRQ & status)
+ usdhi6_write(host, USDHI6_SD_INFO1,
+ 0xffff & ~(USDHI6_SD_INFO1_IRQ & status));
+
+ if (USDHI6_SD_INFO2_IRQ & status2) {
+ if (error)
+ /* In error cases BWE and BRE aren't cleared automatically */
+ status2 |= USDHI6_SD_INFO2_BWE | USDHI6_SD_INFO2_BRE;
+
+ usdhi6_write(host, USDHI6_SD_INFO2,
+ 0xffff & ~(USDHI6_SD_INFO2_IRQ & status2));
+ }
+
+ host->io_error = error;
+ host->irq_status = status;
+
+ if (error) {
+ /* Don't pollute the log with unsupported command timeouts */
+ if (host->wait != USDHI6_WAIT_FOR_CMD ||
+ error != USDHI6_SD_INFO2_RSP_TOUT)
+ dev_warn(mmc_dev(host->mmc),
+ "%s(): INFO2 error bits 0x%08x\n",
+ __func__, error);
+ else
+ dev_dbg(mmc_dev(host->mmc),
+ "%s(): INFO2 error bits 0x%08x\n",
+ __func__, error);
+ }
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t usdhi6_sdio(int irq, void *dev_id)
+{
+ struct usdhi6_host *host = dev_id;
+ u32 status = usdhi6_read(host, USDHI6_SDIO_INFO1) & ~host->sdio_mask;
+
+ dev_dbg(mmc_dev(host->mmc), "%s(): status 0x%x\n", __func__, status);
+
+ if (!status)
+ return IRQ_NONE;
+
+ usdhi6_write(host, USDHI6_SDIO_INFO1, ~status);
+
+ mmc_signal_sdio_irq(host->mmc);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t usdhi6_cd(int irq, void *dev_id)
+{
+ struct usdhi6_host *host = dev_id;
+ struct mmc_host *mmc = host->mmc;
+ u16 status;
+
+ /* We're only interested in hotplug events here */
+ status = usdhi6_read(host, USDHI6_SD_INFO1) & ~host->status_mask &
+ USDHI6_SD_INFO1_CARD;
+
+ if (!status)
+ return IRQ_NONE;
+
+ /* Ack */
+ usdhi6_write(host, USDHI6_SD_INFO1, !status);
+
+ if (!work_pending(&mmc->detect.work) &&
+ (((status & USDHI6_SD_INFO1_CARD_INSERT) &&
+ !mmc->card) ||
+ ((status & USDHI6_SD_INFO1_CARD_EJECT) &&
+ mmc->card)))
+ mmc_detect_change(mmc, msecs_to_jiffies(100));
+
+ return IRQ_HANDLED;
+}
+
+/*
+ * Actually this should not be needed, if the built-in timeout works reliably in
+ * the both PIO cases and DMA never fails. But if DMA does fail, a timeout
+ * handler might be the only way to catch the error.
+ */
+static void usdhi6_timeout_work(struct work_struct *work)
+{
+ struct delayed_work *d = container_of(work, struct delayed_work, work);
+ struct usdhi6_host *host = container_of(d, struct usdhi6_host, timeout_work);
+ struct mmc_request *mrq = host->mrq;
+ struct mmc_data *data = mrq ? mrq->data : NULL;
+
+ dev_warn(mmc_dev(host->mmc),
+ "%s timeout wait %u CMD%d: IRQ 0x%08x:0x%08x, last IRQ 0x%08x\n",
+ host->dma_active ? "DMA" : "PIO",
+ host->wait, mrq ? mrq->cmd->opcode : -1,
+ usdhi6_read(host, USDHI6_SD_INFO1),
+ usdhi6_read(host, USDHI6_SD_INFO2), host->irq_status);
+
+ if (host->dma_active) {
+ usdhi6_dma_kill(host);
+ usdhi6_dma_stop_unmap(host);
+ }
+
+ switch (host->wait) {
+ default:
+ dev_err(mmc_dev(host->mmc), "Invalid state %u\n", host->wait);
+ /* mrq can be NULL in this actually impossible case */
+ case USDHI6_WAIT_FOR_CMD:
+ usdhi6_error_code(host);
+ if (mrq)
+ mrq->cmd->error = -ETIMEDOUT;
+ break;
+ case USDHI6_WAIT_FOR_STOP:
+ usdhi6_error_code(host);
+ mrq->stop->error = -ETIMEDOUT;
+ break;
+ case USDHI6_WAIT_FOR_DMA:
+ case USDHI6_WAIT_FOR_MREAD:
+ case USDHI6_WAIT_FOR_MWRITE:
+ case USDHI6_WAIT_FOR_READ:
+ case USDHI6_WAIT_FOR_WRITE:
+ dev_dbg(mmc_dev(host->mmc),
+ "%c: page #%u @ +0x%x %ux%u in SG%u. Current SG %u bytes @ %u\n",
+ data->flags & MMC_DATA_READ ? 'R' : 'W', host->page_idx,
+ host->offset, data->blocks, data->blksz, data->sg_len,
+ sg_dma_len(host->sg), host->sg->offset);
+ usdhi6_sg_unmap(host, true);
+ /*
+ * If USDHI6_WAIT_FOR_DATA_END times out, we have already unmapped
+ * the page
+ */
+ case USDHI6_WAIT_FOR_DATA_END:
+ usdhi6_error_code(host);
+ data->error = -ETIMEDOUT;
+ }
+
+ if (mrq)
+ usdhi6_request_done(host);
+}
+
+/* Probe / release */
+
+static const struct of_device_id usdhi6_of_match[] = {
+ {.compatible = "renesas,usdhi6rol0"},
+ {}
+};
+MODULE_DEVICE_TABLE(of, usdhi6_of_match);
+
+static int usdhi6_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct mmc_host *mmc;
+ struct usdhi6_host *host;
+ struct resource *res;
+ int irq_cd, irq_sd, irq_sdio;
+ u32 version;
+ int ret;
+
+ if (!dev->of_node)
+ return -ENODEV;
+
+ irq_cd = platform_get_irq_byname(pdev, "card detect");
+ irq_sd = platform_get_irq_byname(pdev, "data");
+ irq_sdio = platform_get_irq_byname(pdev, "SDIO");
+ if (irq_sd < 0 || irq_sdio < 0)
+ return -ENODEV;
+
+ mmc = mmc_alloc_host(sizeof(struct usdhi6_host), dev);
+ if (!mmc)
+ return -ENOMEM;
+
+ ret = mmc_of_parse(mmc);
+ if (ret < 0)
+ goto e_free_mmc;
+
+ mmc_regulator_get_supply(mmc);
+
+ host = mmc_priv(mmc);
+ host->mmc = mmc;
+ host->wait = USDHI6_WAIT_FOR_REQUEST;
+ host->timeout = msecs_to_jiffies(4000);
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ host->base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(host->base)) {
+ ret = PTR_ERR(host->base);
+ goto e_free_mmc;
+ }
+
+ host->clk = devm_clk_get(dev, NULL);
+ if (IS_ERR(host->clk))
+ goto e_free_mmc;
+
+ host->imclk = clk_get_rate(host->clk);
+
+ ret = clk_prepare_enable(host->clk);
+ if (ret < 0)
+ goto e_free_mmc;
+
+ version = usdhi6_read(host, USDHI6_VERSION);
+ if ((version & 0xfff) != 0xa0d) {
+ dev_err(dev, "Version not recognized %x\n", version);
+ goto e_clk_off;
+ }
+
+ dev_info(dev, "A USDHI6ROL0 SD host detected with %d ports\n",
+ usdhi6_read(host, USDHI6_SD_PORT_SEL) >> USDHI6_SD_PORT_SEL_PORTS_SHIFT);
+
+ usdhi6_mask_all(host);
+
+ if (irq_cd >= 0) {
+ ret = devm_request_irq(dev, irq_cd, usdhi6_cd, 0,
+ dev_name(dev), host);
+ if (ret < 0)
+ goto e_clk_off;
+ } else {
+ mmc->caps |= MMC_CAP_NEEDS_POLL;
+ }
+
+ ret = devm_request_threaded_irq(dev, irq_sd, usdhi6_sd, usdhi6_sd_bh, 0,
+ dev_name(dev), host);
+ if (ret < 0)
+ goto e_clk_off;
+
+ ret = devm_request_irq(dev, irq_sdio, usdhi6_sdio, 0,
+ dev_name(dev), host);
+ if (ret < 0)
+ goto e_clk_off;
+
+ INIT_DELAYED_WORK(&host->timeout_work, usdhi6_timeout_work);
+
+ usdhi6_dma_request(host, res->start);
+
+ mmc->ops = &usdhi6_ops;
+ mmc->caps |= MMC_CAP_SD_HIGHSPEED | MMC_CAP_MMC_HIGHSPEED |
+ MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_DDR50 | MMC_CAP_SDIO_IRQ;
+ /* Set .max_segs to some random number. Feel free to adjust. */
+ mmc->max_segs = 32;
+ mmc->max_blk_size = 512;
+ mmc->max_req_size = PAGE_CACHE_SIZE * mmc->max_segs;
+ mmc->max_blk_count = mmc->max_req_size / mmc->max_blk_size;
+ /*
+ * Setting .max_seg_size to 1 page would simplify our page-mapping code,
+ * But OTOH, having large segments makes DMA more efficient. We could
+ * check, whether we managed to get DMA and fall back to 1 page
+ * segments, but if we do manage to obtain DMA and then it fails at
+ * run-time and we fall back to PIO, we will continue getting large
+ * segments. So, we wouldn't be able to get rid of the code anyway.
+ */
+ mmc->max_seg_size = mmc->max_req_size;
+ if (!mmc->f_max)
+ mmc->f_max = host->imclk;
+ mmc->f_min = host->imclk / 512;
+
+ platform_set_drvdata(pdev, host);
+
+ ret = mmc_add_host(mmc);
+ if (ret < 0)
+ goto e_clk_off;
+
+ return 0;
+
+e_clk_off:
+ clk_disable_unprepare(host->clk);
+e_free_mmc:
+ mmc_free_host(mmc);
+
+ return ret;
+}
+
+static int usdhi6_remove(struct platform_device *pdev)
+{
+ struct usdhi6_host *host = platform_get_drvdata(pdev);
+
+ mmc_remove_host(host->mmc);
+
+ usdhi6_mask_all(host);
+ cancel_delayed_work_sync(&host->timeout_work);
+ usdhi6_dma_release(host);
+ clk_disable_unprepare(host->clk);
+ mmc_free_host(host->mmc);
+
+ return 0;
+}
+
+static struct platform_driver usdhi6_driver = {
+ .probe = usdhi6_probe,
+ .remove = usdhi6_remove,
+ .driver = {
+ .name = "usdhi6rol0",
+ .owner = THIS_MODULE,
+ .of_match_table = usdhi6_of_match,
+ },
+};
+
+module_platform_driver(usdhi6_driver);
+
+MODULE_DESCRIPTION("Renesas usdhi6rol0 SD/SDIO host driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:usdhi6rol0");
+MODULE_AUTHOR("Guennadi Liakhovetski <g.liakhovetski@gmx.de>");
diff --git a/drivers/mmc/host/wmt-sdmmc.c b/drivers/mmc/host/wmt-sdmmc.c
index 498d1f799085..282891a8e451 100644
--- a/drivers/mmc/host/wmt-sdmmc.c
+++ b/drivers/mmc/host/wmt-sdmmc.c
@@ -840,7 +840,7 @@ static int wmt_mci_probe(struct platform_device *pdev)
priv->dma_desc_buffer = dma_alloc_coherent(&pdev->dev,
mmc->max_blk_count * 16,
&priv->dma_desc_device_addr,
- 208);
+ GFP_KERNEL);
if (!priv->dma_desc_buffer) {
dev_err(&pdev->dev, "DMA alloc fail\n");
ret = -EPERM;
diff --git a/drivers/net/wireless/rsi/rsi_91x_sdio.c b/drivers/net/wireless/rsi/rsi_91x_sdio.c
index 2e39d38d6a9e..46e7af446f01 100644
--- a/drivers/net/wireless/rsi/rsi_91x_sdio.c
+++ b/drivers/net/wireless/rsi/rsi_91x_sdio.c
@@ -285,7 +285,6 @@ static void rsi_reset_card(struct sdio_func *pfunction)
if (err) {
rsi_dbg(ERR_ZONE, "%s: CCCR speed reg read failed: %d\n",
__func__, err);
- card->state &= ~MMC_STATE_HIGHSPEED;
} else {
err = rsi_cmd52writebyte(card,
SDIO_CCCR_SPEED,
@@ -296,14 +295,13 @@ static void rsi_reset_card(struct sdio_func *pfunction)
__func__, err);
return;
}
- mmc_card_set_highspeed(card);
host->ios.timing = MMC_TIMING_SD_HS;
host->ops->set_ios(host, &host->ios);
}
}
/* Set clock */
- if (mmc_card_highspeed(card))
+ if (mmc_card_hs(card))
clock = 50000000;
else
clock = card->cis.max_dtr;
diff --git a/fs/attr.c b/fs/attr.c
index 5d4e59d56e85..6530ced19697 100644
--- a/fs/attr.c
+++ b/fs/attr.c
@@ -50,14 +50,14 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
if ((ia_valid & ATTR_UID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
!uid_eq(attr->ia_uid, inode->i_uid)) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure caller can chgrp. */
if ((ia_valid & ATTR_GID) &&
(!uid_eq(current_fsuid(), inode->i_uid) ||
(!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) &&
- !inode_capable(inode, CAP_CHOWN))
+ !capable_wrt_inode_uidgid(inode, CAP_CHOWN))
return -EPERM;
/* Make sure a caller can chmod. */
@@ -67,7 +67,7 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr)
/* Also check the setgid bit! */
if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
@@ -160,7 +160,7 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
umode_t mode = attr->ia_mode;
if (!in_group_p(inode->i_gid) &&
- !inode_capable(inode, CAP_FSETID))
+ !capable_wrt_inode_uidgid(inode, CAP_FSETID))
mode &= ~S_ISGID;
inode->i_mode = mode;
}
diff --git a/fs/inode.c b/fs/inode.c
index 2feb9b69f1be..6eecb7ff0b9a 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1839,14 +1839,18 @@ EXPORT_SYMBOL(inode_init_owner);
* inode_owner_or_capable - check current task permissions to inode
* @inode: inode being checked
*
- * Return true if current either has CAP_FOWNER to the inode, or
- * owns the file.
+ * Return true if current either has CAP_FOWNER in a namespace with the
+ * inode owner uid mapped, or owns the file.
*/
bool inode_owner_or_capable(const struct inode *inode)
{
+ struct user_namespace *ns;
+
if (uid_eq(current_fsuid(), inode->i_uid))
return true;
- if (inode_capable(inode, CAP_FOWNER))
+
+ ns = current_user_ns();
+ if (ns_capable(ns, CAP_FOWNER) && kuid_has_mapping(ns, inode->i_uid))
return true;
return false;
}
diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c
index 00ec0b9c94d1..d3e40db28930 100644
--- a/fs/lockd/clnt4xdr.c
+++ b/fs/lockd/clnt4xdr.c
@@ -14,6 +14,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
+#include <uapi/linux/nfs3.h>
+
#define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c
index 9a55797a1cd4..3e9f7874b975 100644
--- a/fs/lockd/clntxdr.c
+++ b/fs/lockd/clntxdr.c
@@ -15,6 +15,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
+#include <uapi/linux/nfs2.h>
+
#define NLMDBG_FACILITY NLMDBG_XDR
#if (NLMCLNT_OHSIZE > XDR_MAX_NETOBJ)
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index de051cb1f553..8f27c93f8d2e 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -622,8 +622,8 @@ static int __init init_nlm(void)
err_pernet:
#ifdef CONFIG_SYSCTL
unregister_sysctl_table(nlm_sysctl_table);
-#endif
err_sysctl:
+#endif
return err;
}
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index dc5c75930f0f..b6f3b84b6e99 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -14,12 +14,11 @@
#include <linux/mutex.h>
#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/addr.h>
-#include <linux/nfsd/nfsfh.h>
-#include <linux/nfsd/export.h>
#include <linux/lockd/lockd.h>
#include <linux/lockd/share.h>
#include <linux/module.h>
#include <linux/mount.h>
+#include <uapi/linux/nfs2.h>
#define NLMDBG_FACILITY NLMDBG_SVCSUBS
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 964666c68a86..9340e7e10ef6 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -16,6 +16,8 @@
#include <linux/sunrpc/stats.h>
#include <linux/lockd/lockd.h>
+#include <uapi/linux/nfs2.h>
+
#define NLMDBG_FACILITY NLMDBG_XDR
diff --git a/fs/namei.c b/fs/namei.c
index 80168273396b..985c6f368485 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -332,10 +332,11 @@ int generic_permission(struct inode *inode, int mask)
if (S_ISDIR(inode->i_mode)) {
/* DACs are overridable for directories */
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
if (!(mask & MAY_WRITE))
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode,
+ CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
}
@@ -345,7 +346,7 @@ int generic_permission(struct inode *inode, int mask)
* at least one exec bit set.
*/
if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
- if (inode_capable(inode, CAP_DAC_OVERRIDE))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
return 0;
/*
@@ -353,7 +354,7 @@ int generic_permission(struct inode *inode, int mask)
*/
mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
if (mask == MAY_READ)
- if (inode_capable(inode, CAP_DAC_READ_SEARCH))
+ if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
return 0;
return -EACCES;
@@ -2379,7 +2380,7 @@ static inline int check_sticky(struct inode *dir, struct inode *inode)
return 0;
if (uid_eq(dir->i_uid, fsuid))
return 0;
- return !inode_capable(inode, CAP_FOWNER);
+ return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
}
/*
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 03192a66c143..4782e0840dcc 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -29,8 +29,6 @@ nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
nfsv4-$(CONFIG_SYSCTL) += nfs4sysctl.o
nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o
-obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
-nfs_layout_nfsv41_files-y := nfs4filelayout.o nfs4filelayoutdev.o
-
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 65d849bdf77a..9b431f44fad9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -210,7 +210,7 @@ static void bl_end_io_read(struct bio *bio, int err)
SetPageUptodate(bvec->bv_page);
if (err) {
- struct nfs_read_data *rdata = par->data;
+ struct nfs_pgio_data *rdata = par->data;
struct nfs_pgio_header *header = rdata->header;
if (!header->pnfs_error)
@@ -224,17 +224,17 @@ static void bl_end_io_read(struct bio *bio, int err)
static void bl_read_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
static void
bl_end_par_io_read(void *data, int unused)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rdata->task.tk_status = rdata->header->pnfs_error;
INIT_WORK(&rdata->task.u.tk_work, bl_read_cleanup);
@@ -242,7 +242,7 @@ bl_end_par_io_read(void *data, int unused)
}
static enum pnfs_try_status
-bl_read_pagelist(struct nfs_read_data *rdata)
+bl_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *header = rdata->header;
int i, hole;
@@ -390,7 +390,7 @@ static void bl_end_io_write_zero(struct bio *bio, int err)
}
if (unlikely(err)) {
- struct nfs_write_data *data = par->data;
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!header->pnfs_error)
@@ -405,7 +405,7 @@ static void bl_end_io_write(struct bio *bio, int err)
{
struct parallel_io *par = bio->bi_private;
const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
- struct nfs_write_data *data = par->data;
+ struct nfs_pgio_data *data = par->data;
struct nfs_pgio_header *header = data->header;
if (!uptodate) {
@@ -423,10 +423,10 @@ static void bl_end_io_write(struct bio *bio, int err)
static void bl_write_cleanup(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
if (likely(!wdata->header->pnfs_error)) {
/* Marks for LAYOUTCOMMIT */
mark_extents_written(BLK_LSEG2EXT(wdata->header->lseg),
@@ -438,7 +438,7 @@ static void bl_write_cleanup(struct work_struct *work)
/* Called when last of bios associated with a bl_write_pagelist call finishes */
static void bl_end_par_io_write(void *data, int num_se)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(wdata->header->pnfs_error)) {
bl_free_short_extents(&BLK_LSEG2EXT(wdata->header->lseg)->bl_inval,
@@ -673,7 +673,7 @@ check_page:
}
static enum pnfs_try_status
-bl_write_pagelist(struct nfs_write_data *wdata, int sync)
+bl_write_pagelist(struct nfs_pgio_data *wdata, int sync)
{
struct nfs_pgio_header *header = wdata->header;
int i, ret, npg_zero, pg_index, last = 0;
@@ -1189,13 +1189,17 @@ bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
pnfs_generic_pg_init_read(pgio, req);
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, SECTOR_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -1241,13 +1245,17 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
}
-static bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
if (pgio->pg_dreq != NULL &&
!is_aligned_req(req, PAGE_CACHE_SIZE))
- return false;
+ return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index b8797ae6831f..4ad7bc388679 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -108,6 +108,97 @@ static inline int put_dreq(struct nfs_direct_req *dreq)
return atomic_dec_and_test(&dreq->io_count);
}
+/*
+ * nfs_direct_select_verf - select the right verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @ds_clp - nfs_client of data server or NULL if MDS / non-pnfs
+ * @ds_idx - index of data server in data server list, only valid if ds_clp set
+ *
+ * returns the correct verifier to use given the role of the server
+ */
+static struct nfs_writeverf *
+nfs_direct_select_verf(struct nfs_direct_req *dreq,
+ struct nfs_client *ds_clp,
+ int ds_idx)
+{
+ struct nfs_writeverf *verfp = &dreq->verf;
+
+#ifdef CONFIG_NFS_V4_1
+ if (ds_clp) {
+ /* pNFS is in use, use the DS verf */
+ if (ds_idx >= 0 && ds_idx < dreq->ds_cinfo.nbuckets)
+ verfp = &dreq->ds_cinfo.buckets[ds_idx].direct_verf;
+ else
+ WARN_ON_ONCE(1);
+ }
+#endif
+ return verfp;
+}
+
+
+/*
+ * nfs_direct_set_hdr_verf - set the write/commit verifier
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verfs
+ *
+ * Set the server's (MDS or DS) "seen" verifier
+ */
+static void nfs_direct_set_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ WARN_ON_ONCE(verfp->committed >= 0);
+ memcpy(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+ WARN_ON_ONCE(verfp->committed < 0);
+}
+
+/*
+ * nfs_direct_cmp_hdr_verf - compare verifier for pgio header
+ * @dreq - direct request possibly spanning multiple servers
+ * @hdr - pageio header to validate against previously seen verf
+ *
+ * set the server's "seen" verf if not initialized.
+ * returns result of comparison between @hdr->verf and the "seen"
+ * verf of the server used by @hdr (DS or MDS)
+ */
+static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, hdr->data->ds_clp,
+ hdr->data->ds_idx);
+ if (verfp->committed < 0) {
+ nfs_direct_set_hdr_verf(dreq, hdr);
+ return 0;
+ }
+ return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
+}
+
+#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
+/*
+ * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
+ * @dreq - direct request possibly spanning multiple servers
+ * @data - commit data to validate against previously seen verf
+ *
+ * returns result of comparison between @data->verf and the verf of
+ * the server used by @data (DS or MDS)
+ */
+static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
+ struct nfs_commit_data *data)
+{
+ struct nfs_writeverf *verfp;
+
+ verfp = nfs_direct_select_verf(dreq, data->ds_clp,
+ data->ds_commit_index);
+ WARN_ON_ONCE(verfp->committed < 0);
+ return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
+}
+#endif
+
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @rw: direction (read or write)
@@ -168,6 +259,7 @@ static inline struct nfs_direct_req *nfs_direct_req_alloc(void)
kref_get(&dreq->kref);
init_completion(&dreq->completion);
INIT_LIST_HEAD(&dreq->mds_cinfo.list);
+ dreq->verf.committed = NFS_INVALID_STABLE_HOW; /* not set yet */
INIT_WORK(&dreq->work, nfs_direct_write_schedule_work);
spin_lock_init(&dreq->lock);
@@ -380,8 +472,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -424,7 +515,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
size_t requested_bytes = 0;
unsigned long seg;
- NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode,
+ nfs_pageio_init_read(&desc, dreq->inode, false,
&nfs_direct_read_completion_ops);
get_dreq(dreq);
desc.pg_dreq = dreq;
@@ -564,7 +655,7 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
dreq->count = 0;
get_dreq(dreq);
- NFS_PROTO(dreq->inode)->write_pageio_init(&desc, dreq->inode, FLUSH_STABLE,
+ nfs_pageio_init_write(&desc, dreq->inode, FLUSH_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
@@ -603,7 +694,7 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
dprintk("NFS: %5u commit failed with error %d.\n",
data->task.tk_pid, status);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
- } else if (memcmp(&dreq->verf, &data->verf, sizeof(data->verf))) {
+ } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
}
@@ -750,8 +841,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
- req = nfs_create_request(dreq->ctx, dreq->inode,
- pagevec[i],
+ req = nfs_create_request(dreq->ctx, pagevec[i], NULL,
pgbase, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
@@ -813,13 +903,13 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
if (dreq->flags == NFS_ODIRECT_RESCHED_WRITES)
bit = NFS_IOHDR_NEED_RESCHED;
else if (dreq->flags == 0) {
- memcpy(&dreq->verf, hdr->verf,
- sizeof(dreq->verf));
+ nfs_direct_set_hdr_verf(dreq, hdr);
bit = NFS_IOHDR_NEED_COMMIT;
dreq->flags = NFS_ODIRECT_DO_COMMIT;
} else if (dreq->flags == NFS_ODIRECT_DO_COMMIT) {
- if (memcmp(&dreq->verf, hdr->verf, sizeof(dreq->verf))) {
- dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
+ if (nfs_direct_set_or_cmp_hdr_verf(dreq, hdr)) {
+ dreq->flags =
+ NFS_ODIRECT_RESCHED_WRITES;
bit = NFS_IOHDR_NEED_RESCHED;
} else
bit = NFS_IOHDR_NEED_COMMIT;
@@ -829,6 +919,8 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
+ bool do_destroy = true;
+
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
switch (bit) {
@@ -836,6 +928,7 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
case NFS_IOHDR_NEED_COMMIT:
kref_get(&req->wb_kref);
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
+ do_destroy = false;
}
nfs_unlock_and_release_request(req);
}
@@ -874,7 +967,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
size_t requested_bytes = 0;
unsigned long seg;
- NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,
+ nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
get_dreq(dreq);
diff --git a/fs/nfs/filelayout/Makefile b/fs/nfs/filelayout/Makefile
new file mode 100644
index 000000000000..8516cdffb9e9
--- /dev/null
+++ b/fs/nfs/filelayout/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the pNFS Files Layout Driver kernel module
+#
+obj-$(CONFIG_PNFS_FILE_LAYOUT) += nfs_layout_nfsv41_files.o
+nfs_layout_nfsv41_files-y := filelayout.o filelayoutdev.o
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/filelayout/filelayout.c
index b9a35c05b60f..d2eba1c13b7e 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -35,11 +35,11 @@
#include <linux/sunrpc/metrics.h>
-#include "nfs4session.h"
-#include "internal.h"
-#include "delegation.h"
-#include "nfs4filelayout.h"
-#include "nfs4trace.h"
+#include "../nfs4session.h"
+#include "../internal.h"
+#include "../delegation.h"
+#include "filelayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -84,7 +84,7 @@ filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
BUG();
}
-static void filelayout_reset_write(struct nfs_write_data *data)
+static void filelayout_reset_write(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
@@ -105,7 +105,7 @@ static void filelayout_reset_write(struct nfs_write_data *data)
}
}
-static void filelayout_reset_read(struct nfs_read_data *data)
+static void filelayout_reset_read(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct rpc_task *task = &data->task;
@@ -243,7 +243,7 @@ wait_on_recovery:
/* NFS_PROTO call done callback routines */
static int filelayout_read_done_cb(struct rpc_task *task,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
@@ -270,7 +270,7 @@ static int filelayout_read_done_cb(struct rpc_task *task,
* rfc5661 is not clear about which credential should be used.
*/
static void
-filelayout_set_layoutcommit(struct nfs_write_data *wdata)
+filelayout_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
@@ -279,7 +279,7 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
return;
pnfs_set_layoutcommit(wdata);
- dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
+ dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino,
(unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
}
@@ -305,7 +305,7 @@ filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
*/
static void filelayout_read_prepare(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &rdata->args.context->flags))) {
rpc_exit(task, -EIO);
@@ -317,7 +317,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
rpc_exit(task, 0);
return;
}
- rdata->read_done_cb = filelayout_read_done_cb;
+ rdata->pgio_done_cb = filelayout_read_done_cb;
if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
&rdata->args.seq_args,
@@ -331,7 +331,7 @@ static void filelayout_read_prepare(struct rpc_task *task, void *data)
static void filelayout_read_call_done(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
@@ -347,14 +347,14 @@ static void filelayout_read_call_done(struct rpc_task *task, void *data)
static void filelayout_read_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
rpc_count_iostats(task, NFS_SERVER(rdata->header->inode)->client->cl_metrics);
}
static void filelayout_read_release(void *data)
{
- struct nfs_read_data *rdata = data;
+ struct nfs_pgio_data *rdata = data;
struct pnfs_layout_hdr *lo = rdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -363,7 +363,7 @@ static void filelayout_read_release(void *data)
}
static int filelayout_write_done_cb(struct rpc_task *task,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
int err;
@@ -419,7 +419,7 @@ static int filelayout_commit_done_cb(struct rpc_task *task,
static void filelayout_write_prepare(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &wdata->args.context->flags))) {
rpc_exit(task, -EIO);
@@ -443,7 +443,7 @@ static void filelayout_write_prepare(struct rpc_task *task, void *data)
static void filelayout_write_call_done(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
if (test_bit(NFS_IOHDR_REDO, &wdata->header->flags) &&
task->tk_status == 0) {
@@ -457,14 +457,14 @@ static void filelayout_write_call_done(struct rpc_task *task, void *data)
static void filelayout_write_count_stats(struct rpc_task *task, void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
rpc_count_iostats(task, NFS_SERVER(wdata->header->inode)->client->cl_metrics);
}
static void filelayout_write_release(void *data)
{
- struct nfs_write_data *wdata = data;
+ struct nfs_pgio_data *wdata = data;
struct pnfs_layout_hdr *lo = wdata->header->lseg->pls_layout;
filelayout_fenceme(lo->plh_inode, lo);
@@ -529,7 +529,7 @@ static const struct rpc_call_ops filelayout_commit_call_ops = {
};
static enum pnfs_try_status
-filelayout_read_pagelist(struct nfs_read_data *data)
+filelayout_read_pagelist(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -560,6 +560,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
/* No multipath support. Use first DS */
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
@@ -568,14 +569,14 @@ filelayout_read_pagelist(struct nfs_read_data *data)
data->mds_offset = offset;
/* Perform an asynchronous read to ds */
- nfs_initiate_read(ds_clnt, data,
- &filelayout_read_call_ops, RPC_TASK_SOFTCONN);
+ nfs_initiate_pgio(ds_clnt, data,
+ &filelayout_read_call_ops, 0, RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
}
/* Perform async writes. */
static enum pnfs_try_status
-filelayout_write_pagelist(struct nfs_write_data *data, int sync)
+filelayout_write_pagelist(struct nfs_pgio_data *data, int sync)
{
struct nfs_pgio_header *hdr = data->header;
struct pnfs_layout_segment *lseg = hdr->lseg;
@@ -600,20 +601,18 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
__func__, hdr->inode->i_ino, sync, (size_t) data->args.count,
offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count));
- data->write_done_cb = filelayout_write_done_cb;
+ data->pgio_done_cb = filelayout_write_done_cb;
atomic_inc(&ds->ds_clp->cl_count);
data->ds_clp = ds->ds_clp;
+ data->ds_idx = idx;
fh = nfs4_fl_select_ds_fh(lseg, j);
if (fh)
data->args.fh = fh;
- /*
- * Get the file offset on the dserver. Set the write offset to
- * this offset and save the original offset.
- */
+
data->args.offset = filelayout_get_dserver_offset(lseg, offset);
/* Perform an asynchronous write */
- nfs_initiate_write(ds_clnt, data,
+ nfs_initiate_pgio(ds_clnt, data,
&filelayout_write_call_ops, sync,
RPC_TASK_SOFTCONN);
return PNFS_ATTEMPTED;
@@ -637,7 +636,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
struct nfs4_deviceid_node *d;
struct nfs4_file_layout_dsaddr *dsaddr;
int status = -EINVAL;
- struct nfs_server *nfss = NFS_SERVER(lo->plh_inode);
dprintk("--> %s\n", __func__);
@@ -655,7 +653,7 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out;
}
- if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
+ if (!fl->stripe_unit) {
dprintk("%s Invalid stripe unit (%u)\n",
__func__, fl->stripe_unit);
goto out;
@@ -692,12 +690,6 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
goto out_put;
}
- if (fl->stripe_unit % nfss->rsize || fl->stripe_unit % nfss->wsize) {
- dprintk("%s Stripe unit (%u) not aligned with rsize %u "
- "wsize %u\n", __func__, fl->stripe_unit, nfss->rsize,
- nfss->wsize);
- }
-
status = 0;
out:
dprintk("--> %s returns %d\n", __func__, status);
@@ -850,11 +842,15 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
{
struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
struct pnfs_commit_bucket *buckets;
- int size;
+ int size, i;
if (fl->commit_through_mds)
return 0;
- if (cinfo->ds->nbuckets != 0) {
+
+ size = (fl->stripe_type == STRIPE_SPARSE) ?
+ fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
+
+ if (cinfo->ds->nbuckets >= size) {
/* This assumes there is only one IOMODE_RW lseg. What
* we really want to do is have a layout_hdr level
* dictionary of <multipath_list4, fh> keys, each
@@ -864,30 +860,36 @@ filelayout_alloc_commit_info(struct pnfs_layout_segment *lseg,
return 0;
}
- size = (fl->stripe_type == STRIPE_SPARSE) ?
- fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
-
buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket),
gfp_flags);
if (!buckets)
return -ENOMEM;
- else {
- int i;
+ for (i = 0; i < size; i++) {
+ INIT_LIST_HEAD(&buckets[i].written);
+ INIT_LIST_HEAD(&buckets[i].committing);
+ /* mark direct verifier as unset */
+ buckets[i].direct_verf.committed = NFS_INVALID_STABLE_HOW;
+ }
- spin_lock(cinfo->lock);
- if (cinfo->ds->nbuckets != 0)
- kfree(buckets);
- else {
- cinfo->ds->buckets = buckets;
- cinfo->ds->nbuckets = size;
- for (i = 0; i < size; i++) {
- INIT_LIST_HEAD(&buckets[i].written);
- INIT_LIST_HEAD(&buckets[i].committing);
- }
- }
- spin_unlock(cinfo->lock);
- return 0;
+ spin_lock(cinfo->lock);
+ if (cinfo->ds->nbuckets >= size)
+ goto out;
+ for (i = 0; i < cinfo->ds->nbuckets; i++) {
+ list_splice(&cinfo->ds->buckets[i].written,
+ &buckets[i].written);
+ list_splice(&cinfo->ds->buckets[i].committing,
+ &buckets[i].committing);
+ buckets[i].direct_verf.committed =
+ cinfo->ds->buckets[i].direct_verf.committed;
+ buckets[i].wlseg = cinfo->ds->buckets[i].wlseg;
+ buckets[i].clseg = cinfo->ds->buckets[i].clseg;
}
+ swap(cinfo->ds->buckets, buckets);
+ cinfo->ds->nbuckets = size;
+out:
+ spin_unlock(cinfo->lock);
+ kfree(buckets);
+ return 0;
}
static struct pnfs_layout_segment *
@@ -915,47 +917,51 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
/*
* filelayout_pg_test(). Called by nfs_can_coalesce_requests()
*
- * return true : coalesce page
- * return false : don't coalesce page
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
*/
-static bool
+static size_t
filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
+ unsigned int size;
u64 p_stripe, r_stripe;
- u32 stripe_unit;
+ u32 stripe_offset;
+ u64 segment_offset = pgio->pg_lseg->pls_range.offset;
+ u32 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
- if (!pnfs_generic_pg_test(pgio, prev, req) ||
- !nfs_generic_pg_test(pgio, prev, req))
- return false;
+ /* calls nfs_generic_pg_test */
+ size = pnfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
- p_stripe = (u64)req_offset(prev);
- r_stripe = (u64)req_offset(req);
- stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
+ /* see if req and prev are in the same stripe */
+ if (prev) {
+ p_stripe = (u64)req_offset(prev) - segment_offset;
+ r_stripe = (u64)req_offset(req) - segment_offset;
+ do_div(p_stripe, stripe_unit);
+ do_div(r_stripe, stripe_unit);
- do_div(p_stripe, stripe_unit);
- do_div(r_stripe, stripe_unit);
+ if (p_stripe != r_stripe)
+ return 0;
+ }
- return (p_stripe == r_stripe);
+ /* calculate remaining bytes in the current stripe */
+ div_u64_rem((u64)req_offset(req) - segment_offset,
+ stripe_unit,
+ &stripe_offset);
+ WARN_ON_ONCE(stripe_offset > stripe_unit);
+ if (stripe_offset >= stripe_unit)
+ return 0;
+ return min(stripe_unit - (unsigned int)stripe_offset, size);
}
static void
filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req)
{
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase) {
- /*
- * Handling unaligned pages is difficult, because have to
- * somehow split a req in two in certain cases in the
- * pg.test code. Avoid this by just not using pnfs
- * in this case.
- */
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -973,11 +979,8 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_commit_info cinfo;
int status;
- WARN_ON_ONCE(pgio->pg_lseg != NULL);
-
- if (req->wb_offset != req->wb_pgbase)
- goto out_mds;
- pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
+ if (!pgio->pg_lseg)
+ pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
0,
NFS4_MAX_UINT64,
@@ -1067,6 +1070,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
*/
j = nfs4_fl_calc_j_index(lseg, req_offset(req));
i = select_bucket_index(fl, j);
+ spin_lock(cinfo->lock);
buckets = cinfo->ds->buckets;
list = &buckets[i].written;
if (list_empty(list)) {
@@ -1080,6 +1084,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
}
set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
cinfo->ds->nwritten++;
+ spin_unlock(cinfo->lock);
return list;
}
@@ -1176,6 +1181,7 @@ transfer_commit_list(struct list_head *src, struct list_head *dst,
return ret;
}
+/* Note called with cinfo->lock held. */
static int
filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
struct nfs_commit_info *cinfo,
@@ -1220,15 +1226,18 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
struct nfs_commit_info *cinfo)
{
struct pnfs_commit_bucket *b;
+ struct pnfs_layout_segment *freeme;
int i;
+restart:
spin_lock(cinfo->lock);
for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
- spin_unlock(cinfo->lock);
- pnfs_put_lseg(b->wlseg);
+ freeme = b->wlseg;
b->wlseg = NULL;
- spin_lock(cinfo->lock);
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
+ goto restart;
}
}
cinfo->ds->nwritten = 0;
@@ -1243,6 +1252,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
struct nfs_commit_data *data;
int i, j;
unsigned int nreq = 0;
+ struct pnfs_layout_segment *freeme;
fl_cinfo = cinfo->ds;
bucket = fl_cinfo->buckets;
@@ -1253,8 +1263,10 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (!data)
break;
data->ds_commit_index = i;
+ spin_lock(cinfo->lock);
data->lseg = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
list_add(&data->pages, list);
nreq++;
}
@@ -1264,8 +1276,11 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
if (list_empty(&bucket->committing))
continue;
nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
- pnfs_put_lseg(bucket->clseg);
+ spin_lock(cinfo->lock);
+ freeme = bucket->clseg;
bucket->clseg = NULL;
+ spin_unlock(cinfo->lock);
+ pnfs_put_lseg(freeme);
}
/* Caller will clean up entries put on list */
return nreq;
@@ -1330,7 +1345,7 @@ filelayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct nfs4_filelayout *flo;
flo = kzalloc(sizeof(*flo), gfp_flags);
- return &flo->generic_hdr;
+ return flo != NULL ? &flo->generic_hdr : NULL;
}
static void
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/filelayout/filelayout.h
index cebd20e7e923..ffbddf2219ea 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/filelayout/filelayout.h
@@ -30,7 +30,7 @@
#ifndef FS_NFS_NFS4FILELAYOUT_H
#define FS_NFS_NFS4FILELAYOUT_H
-#include "pnfs.h"
+#include "../pnfs.h"
/*
* Default data server connection timeout and retrans vaules.
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index b9c61efe9660..44bf0140a4c7 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -33,9 +33,9 @@
#include <linux/module.h>
#include <linux/sunrpc/addr.h>
-#include "internal.h"
-#include "nfs4session.h"
-#include "nfs4filelayout.h"
+#include "../internal.h"
+#include "../nfs4session.h"
+#include "filelayout.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 66984a9aafaa..b94f80420a58 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -120,7 +120,8 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
security_d_instantiate(ret, inode);
spin_lock(&ret->d_lock);
- if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
+ if (IS_ROOT(ret) && !ret->d_fsdata &&
+ !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
ret->d_fsdata = name;
name = NULL;
}
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e6f7398d2b3c..c496f8a74639 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1575,18 +1575,20 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_version = fattr->change_attr;
}
} else if (server->caps & NFS_CAP_CHANGE_ATTR)
- invalid |= save_cache_validity;
+ nfsi->cache_validity |= save_cache_validity;
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
} else if (server->caps & NFS_CAP_MTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
} else if (server->caps & NFS_CAP_CTIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
/* Check if our cached file size is stale */
@@ -1608,7 +1610,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
(long long)new_isize);
}
} else
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_PAGECACHE
| NFS_INO_REVAL_FORCED);
@@ -1616,7 +1619,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
else if (server->caps & NFS_CAP_ATIME)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATIME
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_MODE) {
@@ -1627,7 +1631,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ACCESS|NFS_INO_INVALID_ACL;
}
} else if (server->caps & NFS_CAP_MODE)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1638,7 +1643,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_uid = fattr->uid;
}
} else if (server->caps & NFS_CAP_OWNER)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1649,7 +1655,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
inode->i_gid = fattr->gid;
}
} else if (server->caps & NFS_CAP_OWNER_GROUP)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ACCESS
| NFS_INO_INVALID_ACL
| NFS_INO_REVAL_FORCED);
@@ -1662,7 +1669,8 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
set_nlink(inode, fattr->nlink);
}
} else if (server->caps & NFS_CAP_NLINK)
- invalid |= save_cache_validity & (NFS_INO_INVALID_ATTR
+ nfsi->cache_validity |= save_cache_validity &
+ (NFS_INO_INVALID_ATTR
| NFS_INO_REVAL_FORCED);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index dd8bfc2e2464..8b69cba1bb04 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -231,13 +231,20 @@ extern void nfs_destroy_writepagecache(void);
extern int __init nfs_init_directcache(void);
extern void nfs_destroy_directcache(void);
-extern bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount);
extern void nfs_pgheader_init(struct nfs_pageio_descriptor *desc,
struct nfs_pgio_header *hdr,
void (*release)(struct nfs_pgio_header *hdr));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos);
int nfs_iocounter_wait(struct nfs_io_counter *c);
+extern const struct nfs_pageio_ops nfs_pgio_rw_ops;
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *);
+void nfs_rw_header_free(struct nfs_pgio_header *);
+void nfs_pgio_data_release(struct nfs_pgio_data *);
+int nfs_generic_pgio(struct nfs_pageio_descriptor *, struct nfs_pgio_header *);
+int nfs_initiate_pgio(struct rpc_clnt *, struct nfs_pgio_data *,
+ const struct rpc_call_ops *, int, int);
+
static inline void nfs_iocounter_init(struct nfs_io_counter *c)
{
c->flags = 0;
@@ -395,19 +402,11 @@ extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool
struct nfs_pgio_completion_ops;
/* read.c */
-extern struct nfs_read_header *nfs_readhdr_alloc(void);
-extern void nfs_readhdr_free(struct nfs_pgio_header *hdr);
extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags);
extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
-extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_readdata_release(struct nfs_read_data *rdata);
/* super.c */
void nfs_clone_super(struct super_block *, struct nfs_mount_info *);
@@ -422,19 +421,10 @@ int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
/* write.c */
extern void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops);
-extern struct nfs_write_header *nfs_writehdr_alloc(void);
-extern void nfs_writehdr_free(struct nfs_pgio_header *hdr);
-extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr);
extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
-extern void nfs_writedata_release(struct nfs_write_data *wdata);
extern void nfs_commit_free(struct nfs_commit_data *p);
-extern int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags);
extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
extern void nfs_commit_prepare(struct rpc_task *task, void *calldata);
extern int nfs_initiate_commit(struct rpc_clnt *clnt,
@@ -447,6 +437,7 @@ extern void nfs_init_commit(struct nfs_commit_data *data,
struct nfs_commit_info *cinfo);
int nfs_scan_commit_list(struct list_head *src, struct list_head *dst,
struct nfs_commit_info *cinfo, int max);
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *);
int nfs_scan_commit(struct inode *inode, struct list_head *dst,
struct nfs_commit_info *cinfo);
void nfs_mark_request_commit(struct nfs_page *req,
@@ -492,7 +483,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
-extern void __nfs4_read_done_cb(struct nfs_read_data *);
+extern void __nfs4_read_done_cb(struct nfs_pgio_data *);
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
const struct rpc_timeout *timeparms,
const char *ip_addr);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 62db136339ea..5f61b83f4a1c 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -103,7 +103,7 @@ static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
/*
* typedef opaque nfsdata<>;
*/
-static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_readres *result)
+static int decode_nfsdata(struct xdr_stream *xdr, struct nfs_pgio_res *result)
{
u32 recvd, count;
__be32 *p;
@@ -613,7 +613,7 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
* };
*/
static void encode_readargs(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -629,7 +629,7 @@ static void encode_readargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_readargs(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -649,7 +649,7 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
* };
*/
static void encode_writeargs(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
u32 offset = args->offset;
u32 count = args->count;
@@ -669,7 +669,7 @@ static void encode_writeargs(struct xdr_stream *xdr,
static void nfs2_xdr_enc_writeargs(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_writeargs(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -857,7 +857,7 @@ out_default:
* };
*/
static int nfs2_xdr_dec_readres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -878,7 +878,7 @@ out_default:
}
static int nfs2_xdr_dec_writeres(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
/* All NFSv2 writes are "file sync" writes */
result->verf->committed = NFS_FILE_SYNC;
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index db60149c4579..e7daa42bbc86 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -795,7 +795,7 @@ nfs3_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return status;
}
-static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -807,18 +807,18 @@ static int nfs3_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs3_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs3_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
}
-static int nfs3_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
return 0;
}
-static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs3_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -829,17 +829,11 @@ static int nfs3_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs3_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs3_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_WRITE];
}
-static int nfs3_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
- return 0;
-}
-
static void nfs3_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
rpc_call_start(task);
@@ -946,13 +940,10 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
.fsinfo = nfs3_proc_fsinfo,
.pathconf = nfs3_proc_pathconf,
.decode_dirent = nfs3_decode_dirent,
+ .pgio_rpc_prepare = nfs3_proc_pgio_rpc_prepare,
.read_setup = nfs3_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs3_proc_read_rpc_prepare,
.read_done = nfs3_read_done,
.write_setup = nfs3_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs3_proc_write_rpc_prepare,
.write_done = nfs3_write_done,
.commit_setup = nfs3_proc_commit_setup,
.commit_rpc_prepare = nfs3_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index fa6d72131c19..8f4cbe7f4aa8 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -953,7 +953,7 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
* };
*/
static void encode_read3args(struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -966,7 +966,7 @@ static void encode_read3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_readargs *args)
+ const struct nfs_pgio_args *args)
{
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
@@ -992,7 +992,7 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
* };
*/
static void encode_write3args(struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
__be32 *p;
@@ -1008,7 +1008,7 @@ static void encode_write3args(struct xdr_stream *xdr,
static void nfs3_xdr_enc_write3args(struct rpc_rqst *req,
struct xdr_stream *xdr,
- const struct nfs_writeargs *args)
+ const struct nfs_pgio_args *args)
{
encode_write3args(xdr, args);
xdr->buf->flags |= XDRBUF_WRITE;
@@ -1589,7 +1589,7 @@ out_default:
* };
*/
static int decode_read3resok(struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
u32 eof, count, ocount, recvd;
__be32 *p;
@@ -1625,7 +1625,7 @@ out_overflow:
}
static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
@@ -1673,7 +1673,7 @@ out_status:
* };
*/
static int decode_write3resok(struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
__be32 *p;
@@ -1697,7 +1697,7 @@ out_eio:
}
static int nfs3_xdr_dec_write3res(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeres *result)
+ struct nfs_pgio_res *result)
{
enum nfs_stat status;
int error;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index e1d1badbe53c..f63cb87cd730 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -337,7 +337,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_mode,
*/
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_write_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
{
if (_nfs4_state_protect(clp, NFS_SP4_MACH_CRED_WRITE, clntp, msg) &&
!test_bit(NFS_SP4_MACH_CRED_COMMIT, &clp->cl_sp4_flags))
@@ -369,7 +369,7 @@ nfs4_state_protect(struct nfs_client *clp, unsigned long sp4_flags,
static inline void
nfs4_state_protect_write(struct nfs_client *clp, struct rpc_clnt **clntp,
- struct rpc_message *msg, struct nfs_write_data *wdata)
+ struct rpc_message *msg, struct nfs_pgio_data *wdata)
{
}
#endif /* CONFIG_NFS_V4_1 */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 8de3407e0360..464db9dd6318 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -100,8 +100,7 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
break;
mutex_lock(&inode->i_mutex);
ret = nfs_file_fsync_commit(file, start, end, datasync);
- if (!ret && !datasync)
- /* application has asked for meta-data sync */
+ if (!ret)
ret = pnfs_layoutcommit_inode(inode, true);
mutex_unlock(&inode->i_mutex);
/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 397be39c6dc8..285ad5334018 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2027,7 +2027,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
return status;
}
if (!(o_res->f_attr->valid & NFS_ATTR_FATTR))
- _nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
+ nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
return 0;
}
@@ -2750,7 +2750,7 @@ static void nfs4_close_context(struct nfs_open_context *ctx, int is_sync)
#define FATTR4_WORD1_NFS40_MASK (2*FATTR4_WORD1_MOUNTED_ON_FILEID - 1UL)
#define FATTR4_WORD2_NFS41_MASK (2*FATTR4_WORD2_SUPPATTR_EXCLCREAT - 1UL)
-#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_CHANGE_SECURITY_LABEL - 1UL)
+#define FATTR4_WORD2_NFS42_MASK (2*FATTR4_WORD2_SECURITY_LABEL - 1UL)
static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle)
{
@@ -4033,12 +4033,12 @@ static bool nfs4_error_stateid_expired(int err)
return false;
}
-void __nfs4_read_done_cb(struct nfs_read_data *data)
+void __nfs4_read_done_cb(struct nfs_pgio_data *data)
{
nfs_invalidate_atime(data->header->inode);
}
-static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
@@ -4055,7 +4055,7 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
}
static bool nfs4_read_stateid_changed(struct rpc_task *task,
- struct nfs_readargs *args)
+ struct nfs_pgio_args *args)
{
if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4068,7 +4068,7 @@ static bool nfs4_read_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
dprintk("--> %s\n", __func__);
@@ -4077,19 +4077,19 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
return -EAGAIN;
if (nfs4_read_stateid_changed(task, &data->args))
return -EAGAIN;
- return data->read_done_cb ? data->read_done_cb(task, data) :
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_read_done_cb(task, data);
}
-static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs4_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
data->timestamp = jiffies;
- data->read_done_cb = nfs4_read_done_cb;
+ data->pgio_done_cb = nfs4_read_done_cb;
msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 0);
}
-static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
&data->args.seq_args,
@@ -4097,14 +4097,14 @@ static int nfs4_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_dat
task))
return 0;
if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, FMODE_READ) == -EIO)
+ data->args.lock_context, data->header->rw_ops->rw_mode) == -EIO)
return -EIO;
if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
return -EIO;
return 0;
}
-static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -4121,7 +4121,7 @@ static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data
}
static bool nfs4_write_stateid_changed(struct rpc_task *task,
- struct nfs_writeargs *args)
+ struct nfs_pgio_args *args)
{
if (!nfs4_error_stateid_expired(task->tk_status) ||
@@ -4134,18 +4134,18 @@ static bool nfs4_write_stateid_changed(struct rpc_task *task,
return true;
}
-static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs4_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
if (!nfs4_sequence_done(task, &data->res.seq_res))
return -EAGAIN;
if (nfs4_write_stateid_changed(task, &data->args))
return -EAGAIN;
- return data->write_done_cb ? data->write_done_cb(task, data) :
+ return data->pgio_done_cb ? data->pgio_done_cb(task, data) :
nfs4_write_done_cb(task, data);
}
static
-bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
+bool nfs4_write_need_cache_consistency_data(const struct nfs_pgio_data *data)
{
const struct nfs_pgio_header *hdr = data->header;
@@ -4158,7 +4158,7 @@ bool nfs4_write_need_cache_consistency_data(const struct nfs_write_data *data)
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs4_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
struct nfs_server *server = NFS_SERVER(data->header->inode);
@@ -4168,8 +4168,8 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
} else
data->args.bitmask = server->cache_consistency_bitmask;
- if (!data->write_done_cb)
- data->write_done_cb = nfs4_write_done_cb;
+ if (!data->pgio_done_cb)
+ data->pgio_done_cb = nfs4_write_done_cb;
data->res.server = server;
data->timestamp = jiffies;
@@ -4177,21 +4177,6 @@ static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_messag
nfs4_init_sequence(&data->args.seq_args, &data->res.seq_res, 1);
}
-static int nfs4_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- if (nfs4_setup_sequence(NFS_SERVER(data->header->inode),
- &data->args.seq_args,
- &data->res.seq_res,
- task))
- return 0;
- if (nfs4_set_rw_stateid(&data->args.stateid, data->args.context,
- data->args.lock_context, FMODE_WRITE) == -EIO)
- return -EIO;
- if (unlikely(test_bit(NFS_CONTEXT_BAD, &data->args.context->flags)))
- return -EIO;
- return 0;
-}
-
static void nfs4_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
nfs4_setup_sequence(NFS_SERVER(data->inode),
@@ -8432,13 +8417,10 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
.pathconf = nfs4_proc_pathconf,
.set_capabilities = nfs4_server_capabilities,
.decode_dirent = nfs4_decode_dirent,
+ .pgio_rpc_prepare = nfs4_proc_pgio_rpc_prepare,
.read_setup = nfs4_proc_read_setup,
- .read_pageio_init = pnfs_pageio_init_read,
- .read_rpc_prepare = nfs4_proc_read_rpc_prepare,
.read_done = nfs4_read_done,
.write_setup = nfs4_proc_write_setup,
- .write_pageio_init = pnfs_pageio_init_write,
- .write_rpc_prepare = nfs4_proc_write_rpc_prepare,
.write_done = nfs4_write_done,
.commit_setup = nfs4_proc_commit_setup,
.commit_rpc_prepare = nfs4_proc_commit_rpc_prepare,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index c0583b9bef71..848f6853c59e 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1456,7 +1456,7 @@ static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs
* server that doesn't support a grace period.
*/
spin_lock(&sp->so_lock);
- write_seqcount_begin(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_begin(&sp->so_reclaim_seqcount);
restart:
list_for_each_entry(state, &sp->so_states, open_states) {
if (!test_and_clear_bit(ops->state_flag_bit, &state->flags))
@@ -1519,13 +1519,13 @@ restart:
spin_lock(&sp->so_lock);
goto restart;
}
- write_seqcount_end(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return 0;
out_err:
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
- write_seqcount_end(&sp->so_reclaim_seqcount);
+ raw_write_seqcount_end(&sp->so_reclaim_seqcount);
spin_unlock(&sp->so_lock);
return status;
}
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 849cf146db30..0a744f3a86f6 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -932,7 +932,7 @@ DEFINE_NFS4_IDMAP_EVENT(nfs4_map_gid_to_group);
DECLARE_EVENT_CLASS(nfs4_read_event,
TP_PROTO(
- const struct nfs_read_data *data,
+ const struct nfs_pgio_data *data,
int error
),
@@ -972,7 +972,7 @@ DECLARE_EVENT_CLASS(nfs4_read_event,
#define DEFINE_NFS4_READ_EVENT(name) \
DEFINE_EVENT(nfs4_read_event, name, \
TP_PROTO( \
- const struct nfs_read_data *data, \
+ const struct nfs_pgio_data *data, \
int error \
), \
TP_ARGS(data, error))
@@ -983,7 +983,7 @@ DEFINE_NFS4_READ_EVENT(nfs4_pnfs_read);
DECLARE_EVENT_CLASS(nfs4_write_event,
TP_PROTO(
- const struct nfs_write_data *data,
+ const struct nfs_pgio_data *data,
int error
),
@@ -1024,7 +1024,7 @@ DECLARE_EVENT_CLASS(nfs4_write_event,
#define DEFINE_NFS4_WRITE_EVENT(name) \
DEFINE_EVENT(nfs4_write_event, name, \
TP_PROTO( \
- const struct nfs_write_data *data, \
+ const struct nfs_pgio_data *data, \
int error \
), \
TP_ARGS(data, error))
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 73ce8d4fe2c8..939ae606cfa4 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1556,7 +1556,8 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
encode_op_hdr(xdr, OP_PUTROOTFH, decode_putrootfh_maxsz, hdr);
}
-static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, struct compound_hdr *hdr)
+static void encode_read(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
@@ -1701,7 +1702,8 @@ static void encode_setclientid_confirm(struct xdr_stream *xdr, const struct nfs4
encode_nfs4_verifier(xdr, &arg->confirm);
}
-static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *args, struct compound_hdr *hdr)
+static void encode_write(struct xdr_stream *xdr, const struct nfs_pgio_args *args,
+ struct compound_hdr *hdr)
{
__be32 *p;
@@ -2451,7 +2453,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a READ request
*/
static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_readargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -2513,7 +2515,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
* Encode a WRITE request
*/
static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
- struct nfs_writeargs *args)
+ struct nfs_pgio_args *args)
{
struct compound_hdr hdr = {
.minorversion = nfs4_xdr_minorversion(&args->seq_args),
@@ -5085,7 +5087,8 @@ static int decode_putrootfh(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_PUTROOTFH);
}
-static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs_readres *res)
+static int decode_read(struct xdr_stream *xdr, struct rpc_rqst *req,
+ struct nfs_pgio_res *res)
{
__be32 *p;
uint32_t count, eof, recvd;
@@ -5339,7 +5342,7 @@ static int decode_setclientid_confirm(struct xdr_stream *xdr)
return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
}
-static int decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
+static int decode_write(struct xdr_stream *xdr, struct nfs_pgio_res *res)
{
__be32 *p;
int status;
@@ -6636,7 +6639,7 @@ out:
* Decode Read response
*/
static int nfs4_xdr_dec_read(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_readres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
@@ -6661,7 +6664,7 @@ out:
* Decode WRITE response
*/
static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
- struct nfs_writeres *res)
+ struct nfs_pgio_res *res)
{
struct compound_hdr hdr;
int status;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 5457745dd4f1..611320753db2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -439,7 +439,7 @@ static void _read_done(struct ore_io_state *ios, void *private)
objlayout_read_done(&objios->oir, status, objios->sync);
}
-int objio_read_pagelist(struct nfs_read_data *rdata)
+int objio_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct objio_state *objios;
@@ -487,7 +487,7 @@ static void _write_done(struct ore_io_state *ios, void *private)
static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
{
struct objio_state *objios = priv;
- struct nfs_write_data *wdata = objios->oir.rpcdata;
+ struct nfs_pgio_data *wdata = objios->oir.rpcdata;
struct address_space *mapping = wdata->header->inode->i_mapping;
pgoff_t index = offset / PAGE_SIZE;
struct page *page;
@@ -531,7 +531,7 @@ static const struct _ore_r4w_op _r4w_op = {
.put_page = &__r4w_put_page,
};
-int objio_write_pagelist(struct nfs_write_data *wdata, int how)
+int objio_write_pagelist(struct nfs_pgio_data *wdata, int how)
{
struct nfs_pgio_header *hdr = wdata->header;
struct objio_state *objios;
@@ -564,14 +564,22 @@ int objio_write_pagelist(struct nfs_write_data *wdata, int how)
return 0;
}
-static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
struct nfs_page *prev, struct nfs_page *req)
{
- if (!pnfs_generic_pg_test(pgio, prev, req))
- return false;
+ unsigned int size;
+
+ size = pnfs_generic_pg_test(pgio, prev, req);
+
+ if (!size || pgio->pg_count + req->wb_bytes >
+ (unsigned long)pgio->pg_layout_private)
+ return 0;
- return pgio->pg_count + req->wb_bytes <=
- (unsigned long)pgio->pg_layout_private;
+ return min(size, req->wb_bytes);
}
static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index e4f9cbfec67b..765d3f54e986 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -53,10 +53,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
struct objlayout *objlay;
objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
- if (objlay) {
- spin_lock_init(&objlay->lock);
- INIT_LIST_HEAD(&objlay->err_list);
- }
+ if (!objlay)
+ return NULL;
+ spin_lock_init(&objlay->lock);
+ INIT_LIST_HEAD(&objlay->err_list);
dprintk("%s: Return %p\n", __func__, objlay);
return &objlay->pnfs_layout;
}
@@ -229,11 +229,11 @@ objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
static void _rpc_read_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_read_data *rdata;
+ struct nfs_pgio_data *rdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- rdata = container_of(task, struct nfs_read_data, task);
+ rdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_read_done(rdata);
}
@@ -241,7 +241,7 @@ static void _rpc_read_complete(struct work_struct *work)
void
objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_read_data *rdata = oir->rpcdata;
+ struct nfs_pgio_data *rdata = oir->rpcdata;
oir->status = rdata->task.tk_status = status;
if (status >= 0)
@@ -266,7 +266,7 @@ objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async reads.
*/
enum pnfs_try_status
-objlayout_read_pagelist(struct nfs_read_data *rdata)
+objlayout_read_pagelist(struct nfs_pgio_data *rdata)
{
struct nfs_pgio_header *hdr = rdata->header;
struct inode *inode = hdr->inode;
@@ -312,11 +312,11 @@ objlayout_read_pagelist(struct nfs_read_data *rdata)
static void _rpc_write_complete(struct work_struct *work)
{
struct rpc_task *task;
- struct nfs_write_data *wdata;
+ struct nfs_pgio_data *wdata;
dprintk("%s enter\n", __func__);
task = container_of(work, struct rpc_task, u.tk_work);
- wdata = container_of(task, struct nfs_write_data, task);
+ wdata = container_of(task, struct nfs_pgio_data, task);
pnfs_ld_write_done(wdata);
}
@@ -324,7 +324,7 @@ static void _rpc_write_complete(struct work_struct *work)
void
objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
{
- struct nfs_write_data *wdata = oir->rpcdata;
+ struct nfs_pgio_data *wdata = oir->rpcdata;
oir->status = wdata->task.tk_status = status;
if (status >= 0) {
@@ -351,7 +351,7 @@ objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
* Perform sync or async writes.
*/
enum pnfs_try_status
-objlayout_write_pagelist(struct nfs_write_data *wdata,
+objlayout_write_pagelist(struct nfs_pgio_data *wdata,
int how)
{
struct nfs_pgio_header *hdr = wdata->header;
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 87aa1dec6120..01e041029a6c 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -119,8 +119,8 @@ extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
*/
extern void objio_free_result(struct objlayout_io_res *oir);
-extern int objio_read_pagelist(struct nfs_read_data *rdata);
-extern int objio_write_pagelist(struct nfs_write_data *wdata, int how);
+extern int objio_read_pagelist(struct nfs_pgio_data *rdata);
+extern int objio_write_pagelist(struct nfs_pgio_data *wdata, int how);
/*
* callback API
@@ -168,10 +168,10 @@ extern struct pnfs_layout_segment *objlayout_alloc_lseg(
extern void objlayout_free_lseg(struct pnfs_layout_segment *);
extern enum pnfs_try_status objlayout_read_pagelist(
- struct nfs_read_data *);
+ struct nfs_pgio_data *);
extern enum pnfs_try_status objlayout_write_pagelist(
- struct nfs_write_data *,
+ struct nfs_pgio_data *,
int how);
extern void objlayout_encode_layoutcommit(
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 03ed984ab4d8..b6ee3a6ee96d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -24,9 +24,14 @@
#include "internal.h"
#include "pnfs.h"
+#define NFSDBG_FACILITY NFSDBG_PAGECACHE
+
static struct kmem_cache *nfs_page_cachep;
+static const struct rpc_call_ops nfs_pgio_common_ops;
+
+static void nfs_free_request(struct nfs_page *);
-bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
+static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
{
p->npages = pagecount;
if (pagecount <= ARRAY_SIZE(p->page_array))
@@ -133,11 +138,156 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
return __nfs_iocounter_wait(c);
}
+static int nfs_wait_bit_uninterruptible(void *word)
+{
+ io_schedule();
+ return 0;
+}
+
+/*
+ * nfs_page_group_lock - lock the head of the page group
+ * @req - request in group that is to be locked
+ *
+ * this lock must be held if modifying the page group list
+ */
+void
+nfs_page_group_lock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK,
+ nfs_wait_bit_uninterruptible,
+ TASK_UNINTERRUPTIBLE);
+}
+
+/*
+ * nfs_page_group_unlock - unlock the head of the page group
+ * @req - request in group that is to be unlocked
+ */
+void
+nfs_page_group_unlock(struct nfs_page *req)
+{
+ struct nfs_page *head = req->wb_head;
+
+ WARN_ON_ONCE(head != head->wb_head);
+
+ smp_mb__before_atomic();
+ clear_bit(PG_HEADLOCK, &head->wb_flags);
+ smp_mb__after_atomic();
+ wake_up_bit(&head->wb_flags, PG_HEADLOCK);
+}
+
+/*
+ * nfs_page_group_sync_on_bit_locked
+ *
+ * must be called with page group lock held
+ */
+static bool
+nfs_page_group_sync_on_bit_locked(struct nfs_page *req, unsigned int bit)
+{
+ struct nfs_page *head = req->wb_head;
+ struct nfs_page *tmp;
+
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_flags));
+ WARN_ON_ONCE(test_and_set_bit(bit, &req->wb_flags));
+
+ tmp = req->wb_this_page;
+ while (tmp != req) {
+ if (!test_bit(bit, &tmp->wb_flags))
+ return false;
+ tmp = tmp->wb_this_page;
+ }
+
+ /* true! reset all bits */
+ tmp = req;
+ do {
+ clear_bit(bit, &tmp->wb_flags);
+ tmp = tmp->wb_this_page;
+ } while (tmp != req);
+
+ return true;
+}
+
+/*
+ * nfs_page_group_sync_on_bit - set bit on current request, but only
+ * return true if the bit is set for all requests in page group
+ * @req - request in page group
+ * @bit - PG_* bit that is used to sync page group
+ */
+bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
+{
+ bool ret;
+
+ nfs_page_group_lock(req);
+ ret = nfs_page_group_sync_on_bit_locked(req, bit);
+ nfs_page_group_unlock(req);
+
+ return ret;
+}
+
+/*
+ * nfs_page_group_init - Initialize the page group linkage for @req
+ * @req - a new nfs request
+ * @prev - the previous request in page group, or NULL if @req is the first
+ * or only request in the group (the head).
+ */
+static inline void
+nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
+{
+ WARN_ON_ONCE(prev == req);
+
+ if (!prev) {
+ req->wb_head = req;
+ req->wb_this_page = req;
+ } else {
+ WARN_ON_ONCE(prev->wb_this_page != prev->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &prev->wb_head->wb_flags));
+ req->wb_head = prev->wb_head;
+ req->wb_this_page = prev->wb_this_page;
+ prev->wb_this_page = req;
+
+ /* grab extra ref if head request has extra ref from
+ * the write/commit path to handle handoff between write
+ * and commit lists */
+ if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags))
+ kref_get(&req->wb_kref);
+ }
+}
+
+/*
+ * nfs_page_group_destroy - sync the destruction of page groups
+ * @req - request that no longer needs the page group
+ *
+ * releases the page group reference from each member once all
+ * members have called this function.
+ */
+static void
+nfs_page_group_destroy(struct kref *kref)
+{
+ struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ struct nfs_page *tmp, *next;
+
+ if (!nfs_page_group_sync_on_bit(req, PG_TEARDOWN))
+ return;
+
+ tmp = req;
+ do {
+ next = tmp->wb_this_page;
+ /* unlink and free */
+ tmp->wb_this_page = tmp;
+ tmp->wb_head = tmp;
+ nfs_free_request(tmp);
+ tmp = next;
+ } while (tmp != req);
+}
+
/**
* nfs_create_request - Create an NFS read/write request.
* @ctx: open context to use
- * @inode: inode to which the request is attached
* @page: page to write
+ * @last: last nfs request created for this page group or NULL if head
* @offset: starting offset within the page for the write
* @count: number of bytes to read/write
*
@@ -146,9 +296,9 @@ nfs_iocounter_wait(struct nfs_io_counter *c)
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
-nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
- struct page *page,
- unsigned int offset, unsigned int count)
+nfs_create_request(struct nfs_open_context *ctx, struct page *page,
+ struct nfs_page *last, unsigned int offset,
+ unsigned int count)
{
struct nfs_page *req;
struct nfs_lock_context *l_ctx;
@@ -180,6 +330,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
req->wb_bytes = count;
req->wb_context = get_nfs_open_context(ctx);
kref_init(&req->wb_kref);
+ nfs_page_group_init(req, last);
return req;
}
@@ -237,16 +388,22 @@ static void nfs_clear_request(struct nfs_page *req)
}
}
-
/**
* nfs_release_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
*/
-static void nfs_free_request(struct kref *kref)
+static void nfs_free_request(struct nfs_page *req)
{
- struct nfs_page *req = container_of(kref, struct nfs_page, wb_kref);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
+ /* extra debug: make sure no sync bits are still set */
+ WARN_ON_ONCE(test_bit(PG_TEARDOWN, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UNLOCKPAGE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_UPTODATE, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_WB_END, &req->wb_flags));
+ WARN_ON_ONCE(test_bit(PG_REMOVE, &req->wb_flags));
/* Release struct file and open context */
nfs_clear_request(req);
@@ -255,13 +412,7 @@ static void nfs_free_request(struct kref *kref)
void nfs_release_request(struct nfs_page *req)
{
- kref_put(&req->wb_kref, nfs_free_request);
-}
-
-static int nfs_wait_bit_uninterruptible(void *word)
-{
- io_schedule();
- return 0;
+ kref_put(&req->wb_kref, nfs_page_group_destroy);
}
/**
@@ -279,22 +430,249 @@ nfs_wait_on_request(struct nfs_page *req)
TASK_UNINTERRUPTIBLE);
}
-bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+/*
+ * nfs_generic_pg_test - determine if requests can be coalesced
+ * @desc: pointer to descriptor
+ * @prev: previous request in desc, or NULL
+ * @req: this request
+ *
+ * Returns zero if @req can be coalesced into @desc, otherwise it returns
+ * the size of the request.
+ */
+size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+ struct nfs_page *prev, struct nfs_page *req)
{
- /*
- * FIXME: ideally we should be able to coalesce all requests
- * that are not block boundary aligned, but currently this
- * is problematic for the case of bsize < PAGE_CACHE_SIZE,
- * since nfs_flush_multi and nfs_pagein_multi assume you
- * can have only one struct nfs_page.
- */
- if (desc->pg_bsize < PAGE_SIZE)
+ if (desc->pg_count > desc->pg_bsize) {
+ /* should never happen */
+ WARN_ON_ONCE(1);
return 0;
+ }
- return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
+ return min(desc->pg_bsize - desc->pg_count, (size_t)req->wb_bytes);
}
EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
+static inline struct nfs_rw_header *NFS_RW_HEADER(struct nfs_pgio_header *hdr)
+{
+ return container_of(hdr, struct nfs_rw_header, header);
+}
+
+/**
+ * nfs_rw_header_alloc - Allocate a header for a read or write
+ * @ops: Read or write function vector
+ */
+struct nfs_rw_header *nfs_rw_header_alloc(const struct nfs_rw_ops *ops)
+{
+ struct nfs_rw_header *header = ops->rw_alloc_header();
+
+ if (header) {
+ struct nfs_pgio_header *hdr = &header->header;
+
+ INIT_LIST_HEAD(&hdr->pages);
+ spin_lock_init(&hdr->lock);
+ atomic_set(&hdr->refcnt, 0);
+ hdr->rw_ops = ops;
+ }
+ return header;
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_alloc);
+
+/*
+ * nfs_rw_header_free - Free a read or write header
+ * @hdr: The header to free
+ */
+void nfs_rw_header_free(struct nfs_pgio_header *hdr)
+{
+ hdr->rw_ops->rw_free_header(NFS_RW_HEADER(hdr));
+}
+EXPORT_SYMBOL_GPL(nfs_rw_header_free);
+
+/**
+ * nfs_pgio_data_alloc - Allocate pageio data
+ * @hdr: The header making a request
+ * @pagecount: Number of pages to create
+ */
+static struct nfs_pgio_data *nfs_pgio_data_alloc(struct nfs_pgio_header *hdr,
+ unsigned int pagecount)
+{
+ struct nfs_pgio_data *data, *prealloc;
+
+ prealloc = &NFS_RW_HEADER(hdr)->rpc_data;
+ if (prealloc->header == NULL)
+ data = prealloc;
+ else
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto out;
+
+ if (nfs_pgarray_set(&data->pages, pagecount)) {
+ data->header = hdr;
+ atomic_inc(&hdr->refcnt);
+ } else {
+ if (data != prealloc)
+ kfree(data);
+ data = NULL;
+ }
+out:
+ return data;
+}
+
+/**
+ * nfs_pgio_data_release - Properly free pageio data
+ * @data: The data to release
+ */
+void nfs_pgio_data_release(struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_header *hdr = data->header;
+ struct nfs_rw_header *pageio_header = NFS_RW_HEADER(hdr);
+
+ put_nfs_open_context(data->args.context);
+ if (data->pages.pagevec != data->pages.page_array)
+ kfree(data->pages.pagevec);
+ if (data == &pageio_header->rpc_data) {
+ data->header = NULL;
+ data = NULL;
+ }
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ /* Note: we only free the rpc_task after callbacks are done.
+ * See the comment in rpc_free_task() for why
+ */
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(nfs_pgio_data_release);
+
+/**
+ * nfs_pgio_rpcsetup - Set up arguments for a pageio call
+ * @data: The pageio data
+ * @count: Number of bytes to read
+ * @offset: Initial offset
+ * @how: How to commit data (writes only)
+ * @cinfo: Commit information for the call (writes only)
+ */
+static void nfs_pgio_rpcsetup(struct nfs_pgio_data *data,
+ unsigned int count, unsigned int offset,
+ int how, struct nfs_commit_info *cinfo)
+{
+ struct nfs_page *req = data->header->req;
+
+ /* Set up the RPC argument and reply structs
+ * NB: take care not to mess about with data->commit et al. */
+
+ data->args.fh = NFS_FH(data->header->inode);
+ data->args.offset = req_offset(req) + offset;
+ /* pnfs_set_layoutcommit needs this */
+ data->mds_offset = data->args.offset;
+ data->args.pgbase = req->wb_pgbase + offset;
+ data->args.pages = data->pages.pagevec;
+ data->args.count = count;
+ data->args.context = get_nfs_open_context(req->wb_context);
+ data->args.lock_context = req->wb_lock_context;
+ data->args.stable = NFS_UNSTABLE;
+ switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
+ case 0:
+ break;
+ case FLUSH_COND_STABLE:
+ if (nfs_reqs_to_commit(cinfo))
+ break;
+ default:
+ data->args.stable = NFS_FILE_SYNC;
+ }
+
+ data->res.fattr = &data->fattr;
+ data->res.count = count;
+ data->res.eof = 0;
+ data->res.verf = &data->verf;
+ nfs_fattr_init(&data->fattr);
+}
+
+/**
+ * nfs_pgio_prepare - Prepare pageio data to go over the wire
+ * @task: The current task
+ * @calldata: pageio data to prepare
+ */
+static void nfs_pgio_prepare(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ int err;
+ err = NFS_PROTO(data->header->inode)->pgio_rpc_prepare(task, data);
+ if (err)
+ rpc_exit(task, err);
+}
+
+int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_data *data,
+ const struct rpc_call_ops *call_ops, int how, int flags)
+{
+ struct rpc_task *task;
+ struct rpc_message msg = {
+ .rpc_argp = &data->args,
+ .rpc_resp = &data->res,
+ .rpc_cred = data->header->cred,
+ };
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .task = &data->task,
+ .rpc_message = &msg,
+ .callback_ops = call_ops,
+ .callback_data = data,
+ .workqueue = nfsiod_workqueue,
+ .flags = RPC_TASK_ASYNC | flags,
+ };
+ int ret = 0;
+
+ data->header->rw_ops->rw_initiate(data, &msg, &task_setup_data, how);
+
+ dprintk("NFS: %5u initiated pgio call "
+ "(req %s/%llu, %u bytes @ offset %llu)\n",
+ data->task.tk_pid,
+ data->header->inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(data->header->inode),
+ data->args.count,
+ (unsigned long long)data->args.offset);
+
+ task = rpc_run_task(&task_setup_data);
+ if (IS_ERR(task)) {
+ ret = PTR_ERR(task);
+ goto out;
+ }
+ if (how & FLUSH_SYNC) {
+ ret = rpc_wait_for_completion_task(task);
+ if (ret == 0)
+ ret = task->tk_status;
+ }
+ rpc_put_task(task);
+out:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nfs_initiate_pgio);
+
+/**
+ * nfs_pgio_error - Clean up from a pageio error
+ * @desc: IO descriptor
+ * @hdr: pageio header
+ */
+static int nfs_pgio_error(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ set_bit(NFS_IOHDR_REDO, &hdr->flags);
+ nfs_pgio_data_release(hdr->data);
+ hdr->data = NULL;
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+}
+
+/**
+ * nfs_pgio_release - Release pageio data
+ * @calldata: The pageio data to release
+ */
+static void nfs_pgio_release(void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ if (data->header->rw_ops->rw_release)
+ data->header->rw_ops->rw_release(data);
+ nfs_pgio_data_release(data);
+}
+
/**
* nfs_pageio_init - initialise a page io descriptor
* @desc: pointer to descriptor
@@ -307,6 +685,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
const struct nfs_pgio_completion_ops *compl_ops,
+ const struct nfs_rw_ops *rw_ops,
size_t bsize,
int io_flags)
{
@@ -320,6 +699,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
desc->pg_inode = inode;
desc->pg_ops = pg_ops;
desc->pg_completion_ops = compl_ops;
+ desc->pg_rw_ops = rw_ops;
desc->pg_ioflags = io_flags;
desc->pg_error = 0;
desc->pg_lseg = NULL;
@@ -328,6 +708,94 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
}
EXPORT_SYMBOL_GPL(nfs_pageio_init);
+/**
+ * nfs_pgio_result - Basic pageio error handling
+ * @task: The task that ran
+ * @calldata: Pageio data to check
+ */
+static void nfs_pgio_result(struct rpc_task *task, void *calldata)
+{
+ struct nfs_pgio_data *data = calldata;
+ struct inode *inode = data->header->inode;
+
+ dprintk("NFS: %s: %5u, (status %d)\n", __func__,
+ task->tk_pid, task->tk_status);
+
+ if (data->header->rw_ops->rw_done(task, data, inode) != 0)
+ return;
+ if (task->tk_status < 0)
+ nfs_set_pgio_error(data->header, task->tk_status, data->args.offset);
+ else
+ data->header->rw_ops->rw_result(task, data);
+}
+
+/*
+ * Create an RPC task for the given read or write request and kick it.
+ * The page must have been locked by the caller.
+ *
+ * It may happen that the page we're passed is not marked dirty.
+ * This is the case if nfs_updatepage detects a conflicting request
+ * that has been written but not committed.
+ */
+int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr)
+{
+ struct nfs_page *req;
+ struct page **pages;
+ struct nfs_pgio_data *data;
+ struct list_head *head = &desc->pg_list;
+ struct nfs_commit_info cinfo;
+
+ data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
+ desc->pg_count));
+ if (!data)
+ return nfs_pgio_error(desc, hdr);
+
+ nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
+ pages = data->pages.pagevec;
+ while (!list_empty(head)) {
+ req = nfs_list_entry(head->next);
+ nfs_list_remove_request(req);
+ nfs_list_add_request(req, &hdr->pages);
+ *pages++ = req->wb_page;
+ }
+
+ if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
+ (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
+ desc->pg_ioflags &= ~FLUSH_COND_STABLE;
+
+ /* Set up the argument struct */
+ nfs_pgio_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
+ hdr->data = data;
+ desc->pg_rpc_callops = &nfs_pgio_common_ops;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nfs_generic_pgio);
+
+static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc)
+{
+ struct nfs_rw_header *rw_hdr;
+ struct nfs_pgio_header *hdr;
+ int ret;
+
+ rw_hdr = nfs_rw_header_alloc(desc->pg_rw_ops);
+ if (!rw_hdr) {
+ desc->pg_completion_ops->error_cleanup(&desc->pg_list);
+ return -ENOMEM;
+ }
+ hdr = &rw_hdr->header;
+ nfs_pgheader_init(desc, hdr, nfs_rw_header_free);
+ atomic_inc(&hdr->refcnt);
+ ret = nfs_generic_pgio(desc, hdr);
+ if (ret == 0)
+ ret = nfs_initiate_pgio(NFS_CLIENT(hdr->inode),
+ hdr->data, desc->pg_rpc_callops,
+ desc->pg_ioflags, 0);
+ if (atomic_dec_and_test(&hdr->refcnt))
+ hdr->completion_ops->completion(hdr);
+ return ret;
+}
+
static bool nfs_match_open_context(const struct nfs_open_context *ctx1,
const struct nfs_open_context *ctx2)
{
@@ -356,18 +824,23 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
struct nfs_page *req,
struct nfs_pageio_descriptor *pgio)
{
- if (!nfs_match_open_context(req->wb_context, prev->wb_context))
- return false;
- if (req->wb_context->dentry->d_inode->i_flock != NULL &&
- !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context))
- return false;
- if (req->wb_pgbase != 0)
- return false;
- if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
- return false;
- if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
- return false;
- return pgio->pg_ops->pg_test(pgio, prev, req);
+ size_t size;
+
+ if (prev) {
+ if (!nfs_match_open_context(req->wb_context, prev->wb_context))
+ return false;
+ if (req->wb_context->dentry->d_inode->i_flock != NULL &&
+ !nfs_match_lock_context(req->wb_lock_context,
+ prev->wb_lock_context))
+ return false;
+ if (req_offset(req) != req_offset(prev) + prev->wb_bytes)
+ return false;
+ }
+ size = pgio->pg_ops->pg_test(pgio, prev, req);
+ WARN_ON_ONCE(size > req->wb_bytes);
+ if (size && size < req->wb_bytes)
+ req->wb_bytes = size;
+ return size > 0;
}
/**
@@ -381,17 +854,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
+ struct nfs_page *prev = NULL;
if (desc->pg_count != 0) {
- struct nfs_page *prev;
-
prev = nfs_list_entry(desc->pg_list.prev);
- if (!nfs_can_coalesce_requests(prev, req, desc))
- return 0;
} else {
if (desc->pg_ops->pg_init)
desc->pg_ops->pg_init(desc, req);
desc->pg_base = req->wb_pgbase;
}
+ if (!nfs_can_coalesce_requests(prev, req, desc))
+ return 0;
nfs_list_remove_request(req);
nfs_list_add_request(req, &desc->pg_list);
desc->pg_count += req->wb_bytes;
@@ -421,22 +893,73 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
* @desc: destination io descriptor
* @req: request
*
+ * This may split a request into subrequests which are all part of the
+ * same page group.
+ *
* Returns true if the request 'req' was successfully coalesced into the
* existing list of pages 'desc'.
*/
static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
- while (!nfs_pageio_do_add_request(desc, req)) {
- desc->pg_moreio = 1;
- nfs_pageio_doio(desc);
- if (desc->pg_error < 0)
- return 0;
- desc->pg_moreio = 0;
- if (desc->pg_recoalesce)
- return 0;
- }
+ struct nfs_page *subreq;
+ unsigned int bytes_left = 0;
+ unsigned int offset, pgbase;
+
+ nfs_page_group_lock(req);
+
+ subreq = req;
+ bytes_left = subreq->wb_bytes;
+ offset = subreq->wb_offset;
+ pgbase = subreq->wb_pgbase;
+
+ do {
+ if (!nfs_pageio_do_add_request(desc, subreq)) {
+ /* make sure pg_test call(s) did nothing */
+ WARN_ON_ONCE(subreq->wb_bytes != bytes_left);
+ WARN_ON_ONCE(subreq->wb_offset != offset);
+ WARN_ON_ONCE(subreq->wb_pgbase != pgbase);
+
+ nfs_page_group_unlock(req);
+ desc->pg_moreio = 1;
+ nfs_pageio_doio(desc);
+ if (desc->pg_error < 0)
+ return 0;
+ desc->pg_moreio = 0;
+ if (desc->pg_recoalesce)
+ return 0;
+ /* retry add_request for this subreq */
+ nfs_page_group_lock(req);
+ continue;
+ }
+
+ /* check for buggy pg_test call(s) */
+ WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE);
+ WARN_ON_ONCE(subreq->wb_bytes > bytes_left);
+ WARN_ON_ONCE(subreq->wb_bytes == 0);
+
+ bytes_left -= subreq->wb_bytes;
+ offset += subreq->wb_bytes;
+ pgbase += subreq->wb_bytes;
+
+ if (bytes_left) {
+ subreq = nfs_create_request(req->wb_context,
+ req->wb_page,
+ subreq, pgbase, bytes_left);
+ if (IS_ERR(subreq))
+ goto err_ptr;
+ nfs_lock_request(subreq);
+ subreq->wb_offset = offset;
+ subreq->wb_index = req->wb_index;
+ }
+ } while (bytes_left > 0);
+
+ nfs_page_group_unlock(req);
return 1;
+err_ptr:
+ desc->pg_error = PTR_ERR(subreq);
+ nfs_page_group_unlock(req);
+ return 0;
}
static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
@@ -535,3 +1058,13 @@ void nfs_destroy_nfspagecache(void)
kmem_cache_destroy(nfs_page_cachep);
}
+static const struct rpc_call_ops nfs_pgio_common_ops = {
+ .rpc_call_prepare = nfs_pgio_prepare,
+ .rpc_call_done = nfs_pgio_result,
+ .rpc_release = nfs_pgio_release,
+};
+
+const struct nfs_pageio_ops nfs_pgio_rw_ops = {
+ .pg_test = nfs_generic_pg_test,
+ .pg_doio = nfs_generic_pg_pgios,
+};
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index fd9536e494bc..6fdcd233d6f7 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1388,11 +1388,6 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_read_mds(pgio);
- return;
- }
-
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
@@ -1417,11 +1412,6 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
{
WARN_ON_ONCE(pgio->pg_lseg != NULL);
- if (req->wb_offset != req->wb_pgbase) {
- nfs_pageio_reset_write_mds(pgio);
- return;
- }
-
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
req->wb_context,
req_offset(req),
@@ -1434,56 +1424,49 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
-void
-pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_read(pgio, inode, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_read_ops, compl_ops, server->rsize, 0);
-}
-
-void
-pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- struct nfs_server *server = NFS_SERVER(inode);
- struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
-
- if (ld == NULL)
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
- else
- nfs_pageio_init(pgio, inode, ld->pg_write_ops, compl_ops, server->wsize, ioflags);
-}
-
-bool
+/*
+ * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
+ * of bytes (maximum @req->wb_bytes) that can be coalesced.
+ */
+size_t
pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (pgio->pg_lseg == NULL)
- return nfs_generic_pg_test(pgio, prev, req);
+ unsigned int size;
+ u64 seg_end, req_start, seg_left;
+
+ size = nfs_generic_pg_test(pgio, prev, req);
+ if (!size)
+ return 0;
/*
- * Test if a nfs_page is fully contained in the pnfs_layout_range.
- * Note that this test makes several assumptions:
- * - that the previous nfs_page in the struct nfs_pageio_descriptor
- * is known to lie within the range.
- * - that the nfs_page being tested is known to be contiguous with the
- * previous nfs_page.
- * - Layout ranges are page aligned, so we only have to test the
- * start offset of the request.
+ * 'size' contains the number of bytes left in the current page (up
+ * to the original size asked for in @req->wb_bytes).
+ *
+ * Calculate how many bytes are left in the layout segment
+ * and if there are less bytes than 'size', return that instead.
*
* Please also note that 'end_offset' is actually the offset of the
* first byte that lies outside the pnfs_layout_range. FIXME?
*
*/
- return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
- pgio->pg_lseg->pls_range.length);
+ if (pgio->pg_lseg) {
+ seg_end = end_offset(pgio->pg_lseg->pls_range.offset,
+ pgio->pg_lseg->pls_range.length);
+ req_start = req_offset(req);
+ WARN_ON_ONCE(req_start > seg_end);
+ /* start of request is past the last byte of this segment */
+ if (req_start >= seg_end)
+ return 0;
+
+ /* adjust 'size' iff there are fewer bytes left in the
+ * segment than what nfs_generic_pg_test returned */
+ seg_left = seg_end - req_start;
+ if (seg_left < size)
+ size = (unsigned int)seg_left;
+ }
+
+ return size;
}
EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
@@ -1496,7 +1479,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, compl_ops);
+ nfs_pageio_init_write(&pgio, inode, FLUSH_STABLE, true, compl_ops);
pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1519,7 +1502,7 @@ int pnfs_write_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_write_done_resend_to_mds);
-static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
+static void pnfs_ld_handle_write_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1538,7 +1521,7 @@ static void pnfs_ld_handle_write_error(struct nfs_write_data *data)
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_write_done(struct nfs_write_data *data)
+void pnfs_ld_write_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1554,7 +1537,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
static void
pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_write_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1563,11 +1546,11 @@ pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_write_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_writedata_release(data);
+ nfs_pgio_data_release(data);
}
static enum pnfs_try_status
-pnfs_try_to_write_data(struct nfs_write_data *wdata,
+pnfs_try_to_write_data(struct nfs_pgio_data *wdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg,
int how)
@@ -1589,41 +1572,36 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
}
static void
-pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
+pnfs_do_write(struct nfs_pageio_descriptor *desc,
+ struct nfs_pgio_header *hdr, int how)
{
- struct nfs_write_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_write_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_write_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_writehdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
int
pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_write_header *whdr;
+ struct nfs_rw_header *whdr;
struct nfs_pgio_header *hdr;
int ret;
- whdr = nfs_writehdr_alloc();
+ whdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!whdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
pnfs_put_lseg(desc->pg_lseg);
@@ -1634,12 +1612,12 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
+ pnfs_do_write(desc, hdr, desc->pg_ioflags);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
@@ -1655,7 +1633,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
LIST_HEAD(failed);
/* Resend all requests through the MDS */
- nfs_pageio_init_read(&pgio, inode, compl_ops);
+ nfs_pageio_init_read(&pgio, inode, true, compl_ops);
pgio.pg_dreq = dreq;
while (!list_empty(head)) {
struct nfs_page *req = nfs_list_entry(head->next);
@@ -1674,7 +1652,7 @@ int pnfs_read_done_resend_to_mds(struct inode *inode,
}
EXPORT_SYMBOL_GPL(pnfs_read_done_resend_to_mds);
-static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
+static void pnfs_ld_handle_read_error(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1693,7 +1671,7 @@ static void pnfs_ld_handle_read_error(struct nfs_read_data *data)
/*
* Called by non rpc-based layout drivers
*/
-void pnfs_ld_read_done(struct nfs_read_data *data)
+void pnfs_ld_read_done(struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1709,7 +1687,7 @@ EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
static void
pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
- struct nfs_read_data *data)
+ struct nfs_pgio_data *data)
{
struct nfs_pgio_header *hdr = data->header;
@@ -1718,14 +1696,14 @@ pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
nfs_pageio_reset_read_mds(desc);
desc->pg_recoalesce = 1;
}
- nfs_readdata_release(data);
+ nfs_pgio_data_release(data);
}
/*
* Call the appropriate parallel I/O subsystem read function.
*/
static enum pnfs_try_status
-pnfs_try_to_read_data(struct nfs_read_data *rdata,
+pnfs_try_to_read_data(struct nfs_pgio_data *rdata,
const struct rpc_call_ops *call_ops,
struct pnfs_layout_segment *lseg)
{
@@ -1747,41 +1725,35 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
}
static void
-pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
+pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
{
- struct nfs_read_data *data;
+ struct nfs_pgio_data *data = hdr->data;
const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
struct pnfs_layout_segment *lseg = desc->pg_lseg;
+ enum pnfs_try_status trypnfs;
desc->pg_lseg = NULL;
- while (!list_empty(head)) {
- enum pnfs_try_status trypnfs;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
- if (trypnfs == PNFS_NOT_ATTEMPTED)
- pnfs_read_through_mds(desc, data);
- }
+ trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
+ if (trypnfs == PNFS_NOT_ATTEMPTED)
+ pnfs_read_through_mds(desc, data);
pnfs_put_lseg(lseg);
}
static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
{
pnfs_put_lseg(hdr->lseg);
- nfs_readhdr_free(hdr);
+ nfs_rw_header_free(hdr);
}
EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
int
pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
{
- struct nfs_read_header *rhdr;
+ struct nfs_rw_header *rhdr;
struct nfs_pgio_header *hdr;
int ret;
- rhdr = nfs_readhdr_alloc();
+ rhdr = nfs_rw_header_alloc(desc->pg_rw_ops);
if (!rhdr) {
desc->pg_completion_ops->error_cleanup(&desc->pg_list);
ret = -ENOMEM;
@@ -1793,12 +1765,12 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
+ ret = nfs_generic_pgio(desc, hdr);
if (ret != 0) {
pnfs_put_lseg(desc->pg_lseg);
desc->pg_lseg = NULL;
} else
- pnfs_do_multiple_reads(desc, &hdr->rpc_list);
+ pnfs_do_read(desc, hdr);
if (atomic_dec_and_test(&hdr->refcnt))
hdr->completion_ops->completion(hdr);
return ret;
@@ -1848,7 +1820,7 @@ void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
void
-pnfs_set_layoutcommit(struct nfs_write_data *wdata)
+pnfs_set_layoutcommit(struct nfs_pgio_data *wdata)
{
struct nfs_pgio_header *hdr = wdata->header;
struct inode *inode = hdr->inode;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index c3058a076596..4fb309a2b4c4 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -113,8 +113,8 @@ struct pnfs_layoutdriver_type {
* Return PNFS_ATTEMPTED to indicate the layout code has attempted
* I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
*/
- enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
- enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
+ enum pnfs_try_status (*read_pagelist) (struct nfs_pgio_data *nfs_data);
+ enum pnfs_try_status (*write_pagelist) (struct nfs_pgio_data *nfs_data, int how);
void (*free_deviceid_node) (struct nfs4_deviceid_node *);
@@ -180,11 +180,6 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
-void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
- const struct nfs_pgio_completion_ops *);
-void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *,
- int, const struct nfs_pgio_completion_ops *);
-
void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
void unset_pnfs_layoutdriver(struct nfs_server *);
void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
@@ -192,7 +187,8 @@ int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
struct nfs_page *req, u64 wb_size);
int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
-bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
+size_t pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio,
+ struct nfs_page *prev, struct nfs_page *req);
void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -217,13 +213,13 @@ bool pnfs_roc(struct inode *ino);
void pnfs_roc_release(struct inode *ino);
void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
-void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
+void pnfs_set_layoutcommit(struct nfs_pgio_data *wdata);
void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
int _pnfs_return_layout(struct inode *);
int pnfs_commit_and_return_layout(struct inode *);
-void pnfs_ld_write_done(struct nfs_write_data *);
-void pnfs_ld_read_done(struct nfs_read_data *);
+void pnfs_ld_write_done(struct nfs_pgio_data *);
+void pnfs_ld_read_done(struct nfs_pgio_data *);
struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
struct nfs_open_context *ctx,
loff_t pos,
@@ -461,18 +457,6 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
{
}
-static inline void pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_read(pgio, inode, compl_ops);
-}
-
-static inline void pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags,
- const struct nfs_pgio_completion_ops *compl_ops)
-{
- nfs_pageio_init_write(pgio, inode, ioflags, compl_ops);
-}
-
static inline int
pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how,
struct nfs_commit_info *cinfo)
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index e55ce9e8b034..c171ce1a8a30 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -578,7 +578,7 @@ nfs_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
return 0;
}
-static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_read_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -594,18 +594,18 @@ static int nfs_read_done(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
+static void nfs_proc_read_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
msg->rpc_proc = &nfs_procedures[NFSPROC_READ];
}
-static int nfs_proc_read_rpc_prepare(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_proc_pgio_rpc_prepare(struct rpc_task *task, struct nfs_pgio_data *data)
{
rpc_call_start(task);
return 0;
}
-static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_write_done(struct rpc_task *task, struct nfs_pgio_data *data)
{
struct inode *inode = data->header->inode;
@@ -614,19 +614,13 @@ static int nfs_write_done(struct rpc_task *task, struct nfs_write_data *data)
return 0;
}
-static void nfs_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
+static void nfs_proc_write_setup(struct nfs_pgio_data *data, struct rpc_message *msg)
{
/* Note: NFSv2 ignores @stable and always uses NFS_FILE_SYNC */
data->args.stable = NFS_FILE_SYNC;
msg->rpc_proc = &nfs_procedures[NFSPROC_WRITE];
}
-static int nfs_proc_write_rpc_prepare(struct rpc_task *task, struct nfs_write_data *data)
-{
- rpc_call_start(task);
- return 0;
-}
-
static void nfs_proc_commit_rpc_prepare(struct rpc_task *task, struct nfs_commit_data *data)
{
BUG();
@@ -734,13 +728,10 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
.fsinfo = nfs_proc_fsinfo,
.pathconf = nfs_proc_pathconf,
.decode_dirent = nfs2_decode_dirent,
+ .pgio_rpc_prepare = nfs_proc_pgio_rpc_prepare,
.read_setup = nfs_proc_read_setup,
- .read_pageio_init = nfs_pageio_init_read,
- .read_rpc_prepare = nfs_proc_read_rpc_prepare,
.read_done = nfs_read_done,
.write_setup = nfs_proc_write_setup,
- .write_pageio_init = nfs_pageio_init_write,
- .write_rpc_prepare = nfs_proc_write_rpc_prepare,
.write_done = nfs_write_done,
.commit_setup = nfs_proc_commit_setup,
.commit_rpc_prepare = nfs_proc_commit_rpc_prepare,
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index 411aedda14bb..e818a475ca64 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -24,85 +24,24 @@
#include "internal.h"
#include "iostat.h"
#include "fscache.h"
+#include "pnfs.h"
#define NFSDBG_FACILITY NFSDBG_PAGECACHE
-static const struct nfs_pageio_ops nfs_pageio_read_ops;
-static const struct rpc_call_ops nfs_read_common_ops;
static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
+static const struct nfs_rw_ops nfs_rw_read_ops;
static struct kmem_cache *nfs_rdata_cachep;
-struct nfs_read_header *nfs_readhdr_alloc(void)
+static struct nfs_rw_header *nfs_readhdr_alloc(void)
{
- struct nfs_read_header *rhdr;
-
- rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
- if (rhdr) {
- struct nfs_pgio_header *hdr = &rhdr->header;
-
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- }
- return rhdr;
+ return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
-static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
+static void nfs_readhdr_free(struct nfs_rw_header *rhdr)
{
- struct nfs_read_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-
-void nfs_readhdr_free(struct nfs_pgio_header *hdr)
-{
- struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header);
-
kmem_cache_free(nfs_rdata_cachep, rhdr);
}
-EXPORT_SYMBOL_GPL(nfs_readhdr_free);
-
-void nfs_readdata_release(struct nfs_read_data *rdata)
-{
- struct nfs_pgio_header *hdr = rdata->header;
- struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header);
-
- put_nfs_open_context(rdata->args.context);
- if (rdata->pages.pagevec != rdata->pages.page_array)
- kfree(rdata->pages.pagevec);
- if (rdata == &read_header->rpc_data) {
- rdata->header = NULL;
- rdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(rdata);
-}
-EXPORT_SYMBOL_GPL(nfs_readdata_release);
static
int nfs_return_empty_page(struct page *page)
@@ -114,17 +53,24 @@ int nfs_return_empty_page(struct page *page)
}
void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
- struct inode *inode,
+ struct inode *inode, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops,
- NFS_SERVER(inode)->rsize, 0);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_read_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
+ server->rsize, 0);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_read_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
@@ -139,7 +85,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
len = nfs_page_length(page);
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(ctx, inode, page, 0, len);
+ new = nfs_create_request(ctx, page, NULL, 0, len);
if (IS_ERR(new)) {
unlock_page(page);
return PTR_ERR(new);
@@ -147,7 +93,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
if (len < PAGE_CACHE_SIZE)
zero_user_segment(page, len, PAGE_CACHE_SIZE);
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
nfs_pageio_add_request(&pgio, new);
nfs_pageio_complete(&pgio);
NFS_I(inode)->read_io += pgio.pg_bytes_written;
@@ -158,10 +105,16 @@ static void nfs_readpage_release(struct nfs_page *req)
{
struct inode *d_inode = req->wb_context->dentry->d_inode;
- if (PageUptodate(req->wb_page))
- nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+ dprintk("NFS: read done (%s/%llu %d@%lld)\n", d_inode->i_sb->s_id,
+ (unsigned long long)NFS_FILEID(d_inode), req->wb_bytes,
+ (long long)req_offset(req));
- unlock_page(req->wb_page);
+ if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) {
+ if (PageUptodate(req->wb_page))
+ nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
+
+ unlock_page(req->wb_page);
+ }
dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
req->wb_context->dentry->d_inode->i_sb->s_id,
@@ -171,7 +124,12 @@ static void nfs_readpage_release(struct nfs_page *req)
nfs_release_request(req);
}
-/* Note io was page aligned */
+static void nfs_page_group_set_uptodate(struct nfs_page *req)
+{
+ if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
+ SetPageUptodate(req->wb_page);
+}
+
static void nfs_read_completion(struct nfs_pgio_header *hdr)
{
unsigned long bytes = 0;
@@ -181,21 +139,32 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
while (!list_empty(&hdr->pages)) {
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct page *page = req->wb_page;
+ unsigned long start = req->wb_pgbase;
+ unsigned long end = req->wb_pgbase + req->wb_bytes;
if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
- if (bytes > hdr->good_bytes)
- zero_user(page, 0, PAGE_SIZE);
- else if (hdr->good_bytes - bytes < PAGE_SIZE)
- zero_user_segment(page,
- hdr->good_bytes & ~PAGE_MASK,
- PAGE_SIZE);
+ /* note: regions of the page not covered by a
+ * request are zeroed in nfs_readpage_async /
+ * readpage_async_filler */
+ if (bytes > hdr->good_bytes) {
+ /* nothing in this request was good, so zero
+ * the full extent of the request */
+ zero_user_segment(page, start, end);
+
+ } else if (hdr->good_bytes - bytes < req->wb_bytes) {
+ /* part of this request has good bytes, but
+ * not all. zero the bad bytes */
+ start += hdr->good_bytes - bytes;
+ WARN_ON(start < req->wb_pgbase);
+ zero_user_segment(page, start, end);
+ }
}
bytes += req->wb_bytes;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (bytes <= hdr->good_bytes)
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
} else
- SetPageUptodate(page);
+ nfs_page_group_set_uptodate(req);
nfs_list_remove_request(req);
nfs_readpage_release(req);
}
@@ -203,95 +172,14 @@ out:
hdr->release(hdr);
}
-int nfs_initiate_read(struct rpc_clnt *clnt,
- struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops, int flags)
+static void nfs_initiate_read(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .task = &data->task,
- .rpc_client = clnt,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | swap_flags | flags,
- };
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->read_setup(data, &msg);
-
- dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
- "offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task))
- return PTR_ERR(task);
- rpc_put_task(task);
- return 0;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_read);
-
-/*
- * Set up the NFS read request struct
- */
-static void nfs_read_rpcsetup(struct nfs_read_data *data,
- unsigned int count, unsigned int offset)
-{
- struct nfs_page *req = data->header->req;
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.eof = 0;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_read(struct nfs_read_data *data,
- const struct rpc_call_ops *call_ops)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
-}
-
-static int
-nfs_do_multiple_reads(struct list_head *head,
- const struct rpc_call_ops *call_ops)
-{
- struct nfs_read_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_read_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_read(data, call_ops);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ task_setup_data->flags |= swap_flags;
+ NFS_PROTO(inode)->read_setup(data, msg);
}
static void
@@ -311,143 +199,14 @@ static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
.completion = nfs_read_completion,
};
-static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_read_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_read_data, list);
- list_del(&data->list);
- nfs_readdata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple requests to fill a single page.
- *
- * We optimize to reduce the number of read operations on the wire. If we
- * detect that we're reading a page, or an area of a page, that is past the
- * end of file, we do not generate NFS read operations but just clear the
- * parts of the page that would have come back zero from the server anyway.
- *
- * We rely on the cached value of i_size to make this determination; another
- * client can fill pages on the server past our cached end-of-file, but we
- * won't see the new data until our attribute cache is updated. This is more
- * or less conventional NFS client behavior.
- */
-static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_read_data *data;
- size_t rsize = desc->pg_bsize, nbytes;
- unsigned int offset;
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes,rsize);
-
- data = nfs_readdata_alloc(hdr, 1);
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_read_rpcsetup(data, len, offset);
- list_add(&data->list, &hdr->rpc_list);
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
-
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_read_data *data;
- struct list_head *head = &desc->pg_list;
-
- data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_pagein_error(desc, hdr);
- return -ENOMEM;
- }
-
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- nfs_read_rpcsetup(data, desc->pg_count, 0);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_read_common_ops;
- return 0;
-}
-
-int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_pagein_multi(desc, hdr);
- return nfs_pagein_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_pagein);
-
-static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_read_header *rhdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- rhdr = nfs_readhdr_alloc();
- if (!rhdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &rhdr->header;
- nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_pagein(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_reads(&hdr->rpc_list,
- desc->pg_rpc_callops);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_read_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_readpages,
-};
-
/*
* This is the callback from RPC telling us whether a reply was
* received or some error occurred (timeout or socket shutdown).
*/
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+static int nfs_readpage_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct inode *inode = data->header->inode;
- int status;
-
- dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
- task->tk_status);
-
- status = NFS_PROTO(inode)->read_done(task, data);
+ int status = NFS_PROTO(inode)->read_done(task, data);
if (status != 0)
return status;
@@ -460,10 +219,10 @@ int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
return 0;
}
-static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_readargs *argp = &data->args;
- struct nfs_readres *resp = &data->res;
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
/* This is a short read! */
nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
@@ -480,17 +239,11 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
rpc_restart_call_prepare(task);
}
-static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
+static void nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
{
- struct nfs_read_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
- /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
- if (nfs_readpage_result(task, data) != 0)
- return;
- if (task->tk_status < 0)
- nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
- else if (data->res.eof) {
+ if (data->res.eof) {
loff_t bound;
bound = data->args.offset + data->res.count;
@@ -505,26 +258,6 @@ static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
nfs_readpage_retry(task, data);
}
-static void nfs_readpage_release_common(void *calldata)
-{
- nfs_readdata_release(calldata);
-}
-
-void nfs_read_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_read_data *data = calldata;
- int err;
- err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
- if (err)
- rpc_exit(task, err);
-}
-
-static const struct rpc_call_ops nfs_read_common_ops = {
- .rpc_call_prepare = nfs_read_prepare,
- .rpc_call_done = nfs_readpage_result_common,
- .rpc_release = nfs_readpage_release_common,
-};
-
/*
* Read a page over NFS.
* We read the page synchronously in the following case:
@@ -592,7 +325,6 @@ static int
readpage_async_filler(void *data, struct page *page)
{
struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
- struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *new;
unsigned int len;
int error;
@@ -601,7 +333,7 @@ readpage_async_filler(void *data, struct page *page)
if (len == 0)
return nfs_return_empty_page(page);
- new = nfs_create_request(desc->ctx, inode, page, 0, len);
+ new = nfs_create_request(desc->ctx, page, NULL, 0, len);
if (IS_ERR(new))
goto out_error;
@@ -654,7 +386,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
if (ret == 0)
goto read_complete; /* all pages were read */
- NFS_PROTO(inode)->read_pageio_init(&pgio, inode, &nfs_async_read_completion_ops);
+ nfs_pageio_init_read(&pgio, inode, false,
+ &nfs_async_read_completion_ops);
ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
@@ -671,7 +404,7 @@ out:
int __init nfs_init_readpagecache(void)
{
nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
- sizeof(struct nfs_read_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_rdata_cachep == NULL)
@@ -684,3 +417,12 @@ void nfs_destroy_readpagecache(void)
{
kmem_cache_destroy(nfs_rdata_cachep);
}
+
+static const struct nfs_rw_ops nfs_rw_read_ops = {
+ .rw_mode = FMODE_READ,
+ .rw_alloc_header = nfs_readhdr_alloc,
+ .rw_free_header = nfs_readhdr_free,
+ .rw_done = nfs_readpage_done,
+ .rw_result = nfs_readpage_result,
+ .rw_initiate = nfs_initiate_read,
+};
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2cb56943e232..084af1060d79 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2180,11 +2180,23 @@ out_no_address:
return -EINVAL;
}
+#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
+ | NFS_MOUNT_SECURE \
+ | NFS_MOUNT_TCP \
+ | NFS_MOUNT_VER3 \
+ | NFS_MOUNT_KERBEROS \
+ | NFS_MOUNT_NONLM \
+ | NFS_MOUNT_BROKEN_SUID \
+ | NFS_MOUNT_STRICTLOCK \
+ | NFS_MOUNT_UNSHARED \
+ | NFS_MOUNT_NORESVPORT \
+ | NFS_MOUNT_LEGACY_INTERFACE)
+
static int
nfs_compare_remount_data(struct nfs_server *nfss,
struct nfs_parsed_mount_data *data)
{
- if (data->flags != nfss->flags ||
+ if ((data->flags ^ nfss->flags) & NFS_MOUNT_CMP_FLAGMASK ||
data->rsize != nfss->rsize ||
data->wsize != nfss->wsize ||
data->version != nfss->nfs_client->rpc_ops->version ||
@@ -2248,6 +2260,7 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
data->nfs_server.addrlen = nfss->nfs_client->cl_addrlen;
data->version = nfsvers;
data->minorversion = nfss->nfs_client->cl_minorversion;
+ data->net = current->nsproxy->net_ns;
memcpy(&data->nfs_server.address, &nfss->nfs_client->cl_addr,
data->nfs_server.addrlen);
@@ -2347,18 +2360,6 @@ void nfs_clone_super(struct super_block *sb, struct nfs_mount_info *mount_info)
nfs_initialise_sb(sb);
}
-#define NFS_MOUNT_CMP_FLAGMASK ~(NFS_MOUNT_INTR \
- | NFS_MOUNT_SECURE \
- | NFS_MOUNT_TCP \
- | NFS_MOUNT_VER3 \
- | NFS_MOUNT_KERBEROS \
- | NFS_MOUNT_NONLM \
- | NFS_MOUNT_BROKEN_SUID \
- | NFS_MOUNT_STRICTLOCK \
- | NFS_MOUNT_UNSHARED \
- | NFS_MOUNT_NORESVPORT \
- | NFS_MOUNT_LEGACY_INTERFACE)
-
static int nfs_compare_mount_options(const struct super_block *s, const struct nfs_server *b, int flags)
{
const struct nfs_server *a = s->s_fs_info;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index ffb9459f180b..3ee5af4e738e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -42,10 +42,10 @@
* Local function declarations
*/
static void nfs_redirty_request(struct nfs_page *req);
-static const struct rpc_call_ops nfs_write_common_ops;
static const struct rpc_call_ops nfs_commit_ops;
static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
+static const struct nfs_rw_ops nfs_rw_write_ops;
static struct kmem_cache *nfs_wdata_cachep;
static mempool_t *nfs_wdata_mempool;
@@ -70,76 +70,19 @@ void nfs_commit_free(struct nfs_commit_data *p)
}
EXPORT_SYMBOL_GPL(nfs_commit_free);
-struct nfs_write_header *nfs_writehdr_alloc(void)
+static struct nfs_rw_header *nfs_writehdr_alloc(void)
{
- struct nfs_write_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
-
- if (p) {
- struct nfs_pgio_header *hdr = &p->header;
+ struct nfs_rw_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
+ if (p)
memset(p, 0, sizeof(*p));
- INIT_LIST_HEAD(&hdr->pages);
- INIT_LIST_HEAD(&hdr->rpc_list);
- spin_lock_init(&hdr->lock);
- atomic_set(&hdr->refcnt, 0);
- hdr->verf = &p->verf;
- }
return p;
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_alloc);
-
-static struct nfs_write_data *nfs_writedata_alloc(struct nfs_pgio_header *hdr,
- unsigned int pagecount)
-{
- struct nfs_write_data *data, *prealloc;
-
- prealloc = &container_of(hdr, struct nfs_write_header, header)->rpc_data;
- if (prealloc->header == NULL)
- data = prealloc;
- else
- data = kzalloc(sizeof(*data), GFP_KERNEL);
- if (!data)
- goto out;
-
- if (nfs_pgarray_set(&data->pages, pagecount)) {
- data->header = hdr;
- atomic_inc(&hdr->refcnt);
- } else {
- if (data != prealloc)
- kfree(data);
- data = NULL;
- }
-out:
- return data;
-}
-void nfs_writehdr_free(struct nfs_pgio_header *hdr)
+static void nfs_writehdr_free(struct nfs_rw_header *whdr)
{
- struct nfs_write_header *whdr = container_of(hdr, struct nfs_write_header, header);
mempool_free(whdr, nfs_wdata_mempool);
}
-EXPORT_SYMBOL_GPL(nfs_writehdr_free);
-
-void nfs_writedata_release(struct nfs_write_data *wdata)
-{
- struct nfs_pgio_header *hdr = wdata->header;
- struct nfs_write_header *write_header = container_of(hdr, struct nfs_write_header, header);
-
- put_nfs_open_context(wdata->args.context);
- if (wdata->pages.pagevec != wdata->pages.page_array)
- kfree(wdata->pages.pagevec);
- if (wdata == &write_header->rpc_data) {
- wdata->header = NULL;
- wdata = NULL;
- }
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- /* Note: we only free the rpc_task after callbacks are done.
- * See the comment in rpc_free_task() for why
- */
- kfree(wdata);
-}
-EXPORT_SYMBOL_GPL(nfs_writedata_release);
static void nfs_context_set_write_error(struct nfs_open_context *ctx, int error)
{
@@ -211,18 +154,78 @@ static void nfs_set_pageerror(struct page *page)
nfs_zap_mapping(page_file_mapping(page)->host, page_file_mapping(page));
}
+/*
+ * nfs_page_group_search_locked
+ * @head - head request of page group
+ * @page_offset - offset into page
+ *
+ * Search page group with head @head to find a request that contains the
+ * page offset @page_offset.
+ *
+ * Returns a pointer to the first matching nfs request, or NULL if no
+ * match is found.
+ *
+ * Must be called with the page group lock held
+ */
+static struct nfs_page *
+nfs_page_group_search_locked(struct nfs_page *head, unsigned int page_offset)
+{
+ struct nfs_page *req;
+
+ WARN_ON_ONCE(head != head->wb_head);
+ WARN_ON_ONCE(!test_bit(PG_HEADLOCK, &head->wb_head->wb_flags));
+
+ req = head;
+ do {
+ if (page_offset >= req->wb_pgbase &&
+ page_offset < (req->wb_pgbase + req->wb_bytes))
+ return req;
+
+ req = req->wb_this_page;
+ } while (req != head);
+
+ return NULL;
+}
+
+/*
+ * nfs_page_group_covers_page
+ * @head - head request of page group
+ *
+ * Return true if the page group with head @head covers the whole page,
+ * returns false otherwise
+ */
+static bool nfs_page_group_covers_page(struct nfs_page *req)
+{
+ struct nfs_page *tmp;
+ unsigned int pos = 0;
+ unsigned int len = nfs_page_length(req->wb_page);
+
+ nfs_page_group_lock(req);
+
+ do {
+ tmp = nfs_page_group_search_locked(req->wb_head, pos);
+ if (tmp) {
+ /* no way this should happen */
+ WARN_ON_ONCE(tmp->wb_pgbase != pos);
+ pos += tmp->wb_bytes - (pos - tmp->wb_pgbase);
+ }
+ } while (tmp && pos < len);
+
+ nfs_page_group_unlock(req);
+ WARN_ON_ONCE(pos > len);
+ return pos == len;
+}
+
/* We can set the PG_uptodate flag if we see that a write request
* covers the full page.
*/
-static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
+static void nfs_mark_uptodate(struct nfs_page *req)
{
- if (PageUptodate(page))
- return;
- if (base != 0)
+ if (PageUptodate(req->wb_page))
return;
- if (count != nfs_page_length(page))
+ if (!nfs_page_group_covers_page(req))
return;
- SetPageUptodate(page);
+ SetPageUptodate(req->wb_page);
}
static int wb_priority(struct writeback_control *wbc)
@@ -258,12 +261,15 @@ static void nfs_set_page_writeback(struct page *page)
}
}
-static void nfs_end_page_writeback(struct page *page)
+static void nfs_end_page_writeback(struct nfs_page *req)
{
- struct inode *inode = page_file_mapping(page)->host;
+ struct inode *inode = page_file_mapping(req->wb_page)->host;
struct nfs_server *nfss = NFS_SERVER(inode);
- end_page_writeback(page);
+ if (!nfs_page_group_sync_on_bit(req, PG_WB_END))
+ return;
+
+ end_page_writeback(req->wb_page);
if (atomic_long_dec_return(&nfss->writeback) < NFS_CONGESTION_OFF_THRESH)
clear_bdi_congested(&nfss->backing_dev_info, BLK_RW_ASYNC);
}
@@ -354,10 +360,8 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc
struct nfs_pageio_descriptor pgio;
int err;
- NFS_PROTO(page_file_mapping(page)->host)->write_pageio_init(&pgio,
- page->mapping->host,
- wb_priority(wbc),
- &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, page->mapping->host, wb_priority(wbc),
+ false, &nfs_async_write_completion_ops);
err = nfs_do_writepage(page, wbc, &pgio);
nfs_pageio_complete(&pgio);
if (err < 0)
@@ -400,7 +404,8 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
- NFS_PROTO(inode)->write_pageio_init(&pgio, inode, wb_priority(wbc), &nfs_async_write_completion_ops);
+ nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+ &nfs_async_write_completion_ops);
err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
nfs_pageio_complete(&pgio);
@@ -425,6 +430,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
/* Lock the request! */
nfs_lock_request(req);
@@ -441,6 +448,7 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
set_page_private(req->wb_page, (unsigned long)req);
}
nfsi->npages++;
+ set_bit(PG_INODE_REF, &req->wb_flags);
kref_get(&req->wb_kref);
spin_unlock(&inode->i_lock);
}
@@ -452,15 +460,20 @@ static void nfs_inode_remove_request(struct nfs_page *req)
{
struct inode *inode = req->wb_context->dentry->d_inode;
struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *head;
- spin_lock(&inode->i_lock);
- if (likely(!PageSwapCache(req->wb_page))) {
- set_page_private(req->wb_page, 0);
- ClearPagePrivate(req->wb_page);
- clear_bit(PG_MAPPED, &req->wb_flags);
+ if (nfs_page_group_sync_on_bit(req, PG_REMOVE)) {
+ head = req->wb_head;
+
+ spin_lock(&inode->i_lock);
+ if (likely(!PageSwapCache(head->wb_page))) {
+ set_page_private(head->wb_page, 0);
+ ClearPagePrivate(head->wb_page);
+ clear_bit(PG_MAPPED, &head->wb_flags);
+ }
+ nfsi->npages--;
+ spin_unlock(&inode->i_lock);
}
- nfsi->npages--;
- spin_unlock(&inode->i_lock);
nfs_release_request(req);
}
@@ -583,7 +596,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
if (data->verf.committed == NFS_DATA_SYNC)
return data->header->lseg == NULL;
@@ -614,7 +627,7 @@ nfs_clear_request_commit(struct nfs_page *req)
}
static inline
-int nfs_write_need_commit(struct nfs_write_data *data)
+int nfs_write_need_commit(struct nfs_pgio_data *data)
{
return 0;
}
@@ -625,6 +638,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
{
struct nfs_commit_info cinfo;
unsigned long bytes = 0;
+ bool do_destroy;
if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
goto out;
@@ -645,7 +659,7 @@ static void nfs_write_completion(struct nfs_pgio_header *hdr)
goto next;
}
if (test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags)) {
- memcpy(&req->wb_verf, &hdr->verf->verifier, sizeof(req->wb_verf));
+ memcpy(&req->wb_verf, &hdr->verf.verifier, sizeof(req->wb_verf));
nfs_mark_request_commit(req, hdr->lseg, &cinfo);
goto next;
}
@@ -653,7 +667,8 @@ remove_req:
nfs_inode_remove_request(req);
next:
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
+ do_destroy = !test_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags);
nfs_release_request(req);
}
out:
@@ -661,7 +676,7 @@ out:
}
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
-static unsigned long
+unsigned long
nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return cinfo->mds->ncommit;
@@ -718,7 +733,7 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
}
#else
-static unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
+unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
{
return 0;
}
@@ -758,6 +773,10 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
if (req == NULL)
goto out_unlock;
+ /* should be handled by nfs_flush_incompatible */
+ WARN_ON_ONCE(req->wb_head != req);
+ WARN_ON_ONCE(req->wb_this_page != req);
+
rqend = req->wb_offset + req->wb_bytes;
/*
* Tell the caller to flush out the request if
@@ -819,7 +838,7 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
- req = nfs_create_request(ctx, inode, page, offset, bytes);
+ req = nfs_create_request(ctx, page, NULL, offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@@ -837,7 +856,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
return PTR_ERR(req);
/* Update file length */
nfs_grow_file(page, offset, count);
- nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
+ nfs_mark_uptodate(req);
nfs_mark_request_dirty(req);
nfs_unlock_and_release_request(req);
return 0;
@@ -863,6 +882,8 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
return 0;
l_ctx = req->wb_lock_context;
do_flush = req->wb_page != page || req->wb_context != ctx;
+ /* for now, flush if more than 1 request in page_group */
+ do_flush |= req->wb_this_page != req;
if (l_ctx && ctx->dentry->d_inode->i_flock != NULL) {
do_flush |= l_ctx->lockowner.l_owner != current->files
|| l_ctx->lockowner.l_pid != current->tgid;
@@ -990,126 +1011,17 @@ static int flush_task_priority(int how)
return RPC_PRIORITY_NORMAL;
}
-int nfs_initiate_write(struct rpc_clnt *clnt,
- struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how, int flags)
+static void nfs_initiate_write(struct nfs_pgio_data *data, struct rpc_message *msg,
+ struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = data->header->inode;
int priority = flush_task_priority(how);
- struct rpc_task *task;
- struct rpc_message msg = {
- .rpc_argp = &data->args,
- .rpc_resp = &data->res,
- .rpc_cred = data->header->cred,
- };
- struct rpc_task_setup task_setup_data = {
- .rpc_client = clnt,
- .task = &data->task,
- .rpc_message = &msg,
- .callback_ops = call_ops,
- .callback_data = data,
- .workqueue = nfsiod_workqueue,
- .flags = RPC_TASK_ASYNC | flags,
- .priority = priority,
- };
- int ret = 0;
-
- /* Set up the initial task struct. */
- NFS_PROTO(inode)->write_setup(data, &msg);
- dprintk("NFS: %5u initiated write call "
- "(req %s/%llu, %u bytes @ offset %llu)\n",
- data->task.tk_pid,
- inode->i_sb->s_id,
- (unsigned long long)NFS_FILEID(inode),
- data->args.count,
- (unsigned long long)data->args.offset);
+ task_setup_data->priority = priority;
+ NFS_PROTO(inode)->write_setup(data, msg);
nfs4_state_protect_write(NFS_SERVER(inode)->nfs_client,
- &task_setup_data.rpc_client, &msg, data);
-
- task = rpc_run_task(&task_setup_data);
- if (IS_ERR(task)) {
- ret = PTR_ERR(task);
- goto out;
- }
- if (how & FLUSH_SYNC) {
- ret = rpc_wait_for_completion_task(task);
- if (ret == 0)
- ret = task->tk_status;
- }
- rpc_put_task(task);
-out:
- return ret;
-}
-EXPORT_SYMBOL_GPL(nfs_initiate_write);
-
-/*
- * Set up the argument/result storage required for the RPC call.
- */
-static void nfs_write_rpcsetup(struct nfs_write_data *data,
- unsigned int count, unsigned int offset,
- int how, struct nfs_commit_info *cinfo)
-{
- struct nfs_page *req = data->header->req;
-
- /* Set up the RPC argument and reply structs
- * NB: take care not to mess about with data->commit et al. */
-
- data->args.fh = NFS_FH(data->header->inode);
- data->args.offset = req_offset(req) + offset;
- /* pnfs_set_layoutcommit needs this */
- data->mds_offset = data->args.offset;
- data->args.pgbase = req->wb_pgbase + offset;
- data->args.pages = data->pages.pagevec;
- data->args.count = count;
- data->args.context = get_nfs_open_context(req->wb_context);
- data->args.lock_context = req->wb_lock_context;
- data->args.stable = NFS_UNSTABLE;
- switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
- case 0:
- break;
- case FLUSH_COND_STABLE:
- if (nfs_reqs_to_commit(cinfo))
- break;
- default:
- data->args.stable = NFS_FILE_SYNC;
- }
-
- data->res.fattr = &data->fattr;
- data->res.count = count;
- data->res.verf = &data->verf;
- nfs_fattr_init(&data->fattr);
-}
-
-static int nfs_do_write(struct nfs_write_data *data,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct inode *inode = data->header->inode;
-
- return nfs_initiate_write(NFS_CLIENT(inode), data, call_ops, how, 0);
-}
-
-static int nfs_do_multiple_writes(struct list_head *head,
- const struct rpc_call_ops *call_ops,
- int how)
-{
- struct nfs_write_data *data;
- int ret = 0;
-
- while (!list_empty(head)) {
- int ret2;
-
- data = list_first_entry(head, struct nfs_write_data, list);
- list_del_init(&data->list);
-
- ret2 = nfs_do_write(data, call_ops, how);
- if (ret == 0)
- ret = ret2;
- }
- return ret;
+ &task_setup_data->rpc_client, msg, data);
}
/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -1120,7 +1032,7 @@ static void nfs_redirty_request(struct nfs_page *req)
{
nfs_mark_request_dirty(req);
nfs_unlock_request(req);
- nfs_end_page_writeback(req->wb_page);
+ nfs_end_page_writeback(req);
nfs_release_request(req);
}
@@ -1140,173 +1052,30 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops = {
.completion = nfs_write_completion,
};
-static void nfs_flush_error(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- set_bit(NFS_IOHDR_REDO, &hdr->flags);
- while (!list_empty(&hdr->rpc_list)) {
- struct nfs_write_data *data = list_first_entry(&hdr->rpc_list,
- struct nfs_write_data, list);
- list_del(&data->list);
- nfs_writedata_release(data);
- }
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
-}
-
-/*
- * Generate multiple small requests to write out a single
- * contiguous dirty area on one page.
- */
-static int nfs_flush_multi(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req = hdr->req;
- struct page *page = req->wb_page;
- struct nfs_write_data *data;
- size_t wsize = desc->pg_bsize, nbytes;
- unsigned int offset;
- int requests = 0;
- struct nfs_commit_info cinfo;
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo) ||
- desc->pg_count > wsize))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
-
- offset = 0;
- nbytes = desc->pg_count;
- do {
- size_t len = min(nbytes, wsize);
-
- data = nfs_writedata_alloc(hdr, 1);
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
- data->pages.pagevec[0] = page;
- nfs_write_rpcsetup(data, len, offset, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- requests++;
- nbytes -= len;
- offset += len;
- } while (nbytes != 0);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-/*
- * Create an RPC task for the given write request and kick it.
- * The page must have been locked by the caller.
- *
- * It may happen that the page we're passed is not marked dirty.
- * This is the case if nfs_updatepage detects a conflicting request
- * that has been written but not committed.
- */
-static int nfs_flush_one(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- struct nfs_page *req;
- struct page **pages;
- struct nfs_write_data *data;
- struct list_head *head = &desc->pg_list;
- struct nfs_commit_info cinfo;
-
- data = nfs_writedata_alloc(hdr, nfs_page_array_len(desc->pg_base,
- desc->pg_count));
- if (!data) {
- nfs_flush_error(desc, hdr);
- return -ENOMEM;
- }
-
- nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
- pages = data->pages.pagevec;
- while (!list_empty(head)) {
- req = nfs_list_entry(head->next);
- nfs_list_remove_request(req);
- nfs_list_add_request(req, &hdr->pages);
- *pages++ = req->wb_page;
- }
-
- if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
- (desc->pg_moreio || nfs_reqs_to_commit(&cinfo)))
- desc->pg_ioflags &= ~FLUSH_COND_STABLE;
-
- /* Set up the argument struct */
- nfs_write_rpcsetup(data, desc->pg_count, 0, desc->pg_ioflags, &cinfo);
- list_add(&data->list, &hdr->rpc_list);
- desc->pg_rpc_callops = &nfs_write_common_ops;
- return 0;
-}
-
-int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
- struct nfs_pgio_header *hdr)
-{
- if (desc->pg_bsize < PAGE_CACHE_SIZE)
- return nfs_flush_multi(desc, hdr);
- return nfs_flush_one(desc, hdr);
-}
-EXPORT_SYMBOL_GPL(nfs_generic_flush);
-
-static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
-{
- struct nfs_write_header *whdr;
- struct nfs_pgio_header *hdr;
- int ret;
-
- whdr = nfs_writehdr_alloc();
- if (!whdr) {
- desc->pg_completion_ops->error_cleanup(&desc->pg_list);
- return -ENOMEM;
- }
- hdr = &whdr->header;
- nfs_pgheader_init(desc, hdr, nfs_writehdr_free);
- atomic_inc(&hdr->refcnt);
- ret = nfs_generic_flush(desc, hdr);
- if (ret == 0)
- ret = nfs_do_multiple_writes(&hdr->rpc_list,
- desc->pg_rpc_callops,
- desc->pg_ioflags);
- if (atomic_dec_and_test(&hdr->refcnt))
- hdr->completion_ops->completion(hdr);
- return ret;
-}
-
-static const struct nfs_pageio_ops nfs_pageio_write_ops = {
- .pg_test = nfs_generic_pg_test,
- .pg_doio = nfs_generic_pg_writepages,
-};
-
void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
- struct inode *inode, int ioflags,
+ struct inode *inode, int ioflags, bool force_mds,
const struct nfs_pgio_completion_ops *compl_ops)
{
- nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops, compl_ops,
- NFS_SERVER(inode)->wsize, ioflags);
+ struct nfs_server *server = NFS_SERVER(inode);
+ const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
+
+#ifdef CONFIG_NFS_V4_1
+ if (server->pnfs_curr_ld && !force_mds)
+ pg_ops = server->pnfs_curr_ld->pg_write_ops;
+#endif
+ nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
+ server->wsize, ioflags);
}
EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
{
- pgio->pg_ops = &nfs_pageio_write_ops;
+ pgio->pg_ops = &nfs_pgio_rw_ops;
pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
}
EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
-void nfs_write_prepare(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
- int err;
- err = NFS_PROTO(data->header->inode)->write_rpc_prepare(task, data);
- if (err)
- rpc_exit(task, err);
-}
-
void nfs_commit_prepare(struct rpc_task *task, void *calldata)
{
struct nfs_commit_data *data = calldata;
@@ -1314,23 +1083,8 @@ void nfs_commit_prepare(struct rpc_task *task, void *calldata)
NFS_PROTO(data->inode)->commit_rpc_prepare(task, data);
}
-/*
- * Handle a write reply that flushes a whole page.
- *
- * FIXME: There is an inherent race with invalidate_inode_pages and
- * writebacks since the page->count is kept > 1 for as long
- * as the page has a write request pending.
- */
-static void nfs_writeback_done_common(struct rpc_task *task, void *calldata)
-{
- struct nfs_write_data *data = calldata;
-
- nfs_writeback_done(task, data);
-}
-
-static void nfs_writeback_release_common(void *calldata)
+static void nfs_writeback_release_common(struct nfs_pgio_data *data)
{
- struct nfs_write_data *data = calldata;
struct nfs_pgio_header *hdr = data->header;
int status = data->task.tk_status;
@@ -1339,34 +1093,46 @@ static void nfs_writeback_release_common(void *calldata)
if (test_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags))
; /* Do nothing */
else if (!test_and_set_bit(NFS_IOHDR_NEED_COMMIT, &hdr->flags))
- memcpy(hdr->verf, &data->verf, sizeof(*hdr->verf));
- else if (memcmp(hdr->verf, &data->verf, sizeof(*hdr->verf)))
+ memcpy(&hdr->verf, &data->verf, sizeof(hdr->verf));
+ else if (memcmp(&hdr->verf, &data->verf, sizeof(hdr->verf)))
set_bit(NFS_IOHDR_NEED_RESCHED, &hdr->flags);
spin_unlock(&hdr->lock);
}
- nfs_writedata_release(data);
}
-static const struct rpc_call_ops nfs_write_common_ops = {
- .rpc_call_prepare = nfs_write_prepare,
- .rpc_call_done = nfs_writeback_done_common,
- .rpc_release = nfs_writeback_release_common,
-};
+/*
+ * Special version of should_remove_suid() that ignores capabilities.
+ */
+static int nfs_should_remove_suid(const struct inode *inode)
+{
+ umode_t mode = inode->i_mode;
+ int kill = 0;
+
+ /* suid always must be killed */
+ if (unlikely(mode & S_ISUID))
+ kill = ATTR_KILL_SUID;
+ /*
+ * sgid without any exec bits is just a mandatory locking mark; leave
+ * it alone. If some exec bits are set, it's a real sgid; kill it.
+ */
+ if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
+ kill |= ATTR_KILL_SGID;
+
+ if (unlikely(kill && S_ISREG(mode)))
+ return kill;
+
+ return 0;
+}
/*
* This function is called when the WRITE call is complete.
*/
-void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
+static int nfs_writeback_done(struct rpc_task *task, struct nfs_pgio_data *data,
+ struct inode *inode)
{
- struct nfs_writeargs *argp = &data->args;
- struct nfs_writeres *resp = &data->res;
- struct inode *inode = data->header->inode;
int status;
- dprintk("NFS: %5u nfs_writeback_done (status %d)\n",
- task->tk_pid, task->tk_status);
-
/*
* ->write_done will attempt to use post-op attributes to detect
* conflicting writes by other clients. A strict interpretation
@@ -1376,11 +1142,11 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
*/
status = NFS_PROTO(inode)->write_done(task, data);
if (status != 0)
- return;
- nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
+ return status;
+ nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, data->res.count);
#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
- if (resp->verf->committed < argp->stable && task->tk_status >= 0) {
+ if (data->res.verf->committed < data->args.stable && task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
@@ -1396,18 +1162,31 @@ void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
- resp->verf->committed, argp->stable);
+ data->res.verf->committed, data->args.stable);
complain = jiffies + 300 * HZ;
}
}
#endif
- if (task->tk_status < 0)
- nfs_set_pgio_error(data->header, task->tk_status, argp->offset);
- else if (resp->count < argp->count) {
+
+ /* Deal with the suid/sgid bit corner case */
+ if (nfs_should_remove_suid(inode))
+ nfs_mark_for_revalidate(inode);
+ return 0;
+}
+
+/*
+ * This function is called when the WRITE call is complete.
+ */
+static void nfs_writeback_result(struct rpc_task *task, struct nfs_pgio_data *data)
+{
+ struct nfs_pgio_args *argp = &data->args;
+ struct nfs_pgio_res *resp = &data->res;
+
+ if (resp->count < argp->count) {
static unsigned long complain;
/* This a short write! */
- nfs_inc_stats(inode, NFSIOS_SHORTWRITE);
+ nfs_inc_stats(data->header->inode, NFSIOS_SHORTWRITE);
/* Has the server at least made some progress? */
if (resp->count == 0) {
@@ -1874,7 +1653,7 @@ int nfs_migrate_page(struct address_space *mapping, struct page *newpage,
int __init nfs_init_writepagecache(void)
{
nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
- sizeof(struct nfs_write_header),
+ sizeof(struct nfs_rw_header),
0, SLAB_HWCACHE_ALIGN,
NULL);
if (nfs_wdata_cachep == NULL)
@@ -1936,3 +1715,12 @@ void nfs_destroy_writepagecache(void)
kmem_cache_destroy(nfs_wdata_cachep);
}
+static const struct nfs_rw_ops nfs_rw_write_ops = {
+ .rw_mode = FMODE_WRITE,
+ .rw_alloc_header = nfs_writehdr_alloc,
+ .rw_free_header = nfs_writehdr_free,
+ .rw_release = nfs_writeback_release_common,
+ .rw_done = nfs_writeback_done,
+ .rw_result = nfs_writeback_result,
+ .rw_initiate = nfs_initiate_write,
+};
diff --git a/fs/nfsd/acl.h b/fs/nfsd/acl.h
index b481e1f5eecc..a986ceb6fd0d 100644
--- a/fs/nfsd/acl.h
+++ b/fs/nfsd/acl.h
@@ -49,7 +49,7 @@ struct svc_rqst;
struct nfs4_acl *nfs4_acl_new(int);
int nfs4_acl_get_whotype(char *, u32);
-__be32 nfs4_acl_write_who(int who, __be32 **p, int *len);
+__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who);
int nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry,
struct nfs4_acl **acl);
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 2645be435e75..72f44823adbb 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -1,7 +1,6 @@
/* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> */
#include <linux/sched.h>
-#include <linux/user_namespace.h>
#include "nfsd.h"
#include "auth.h"
@@ -25,7 +24,6 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
struct cred *new;
int i;
int flags = nfsexp_flags(rqstp, exp);
- int ret;
validate_process_creds();
@@ -86,8 +84,7 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
return 0;
oom:
- ret = -ENOMEM;
abort_creds(new);
- return ret;
+ return -ENOMEM;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 8513c598fabf..13b85f94d9e2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -17,17 +17,12 @@
#include <linux/exportfs.h>
#include <linux/sunrpc/svc_xprt.h>
-#include <net/ipv6.h>
-
#include "nfsd.h"
#include "nfsfh.h"
#include "netns.h"
#define NFSDDBG_FACILITY NFSDDBG_EXPORT
-typedef struct auth_domain svc_client;
-typedef struct svc_export svc_export;
-
/*
* We have two caches.
* One maps client+vfsmnt+dentry to export options - the export map
@@ -73,7 +68,7 @@ static struct svc_expkey *svc_expkey_lookup(struct cache_detail *cd, struct svc_
static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
{
- /* client fsidtype fsid [path] */
+ /* client fsidtype fsid expiry [path] */
char *buf;
int len;
struct auth_domain *dom = NULL;
@@ -295,13 +290,19 @@ svc_expkey_update(struct cache_detail *cd, struct svc_expkey *new,
static void nfsd4_fslocs_free(struct nfsd4_fs_locations *fsloc)
{
+ struct nfsd4_fs_location *locations = fsloc->locations;
int i;
+ if (!locations)
+ return;
+
for (i = 0; i < fsloc->locations_count; i++) {
- kfree(fsloc->locations[i].path);
- kfree(fsloc->locations[i].hosts);
+ kfree(locations[i].path);
+ kfree(locations[i].hosts);
}
- kfree(fsloc->locations);
+
+ kfree(locations);
+ fsloc->locations = NULL;
}
static void svc_export_put(struct kref *ref)
@@ -388,6 +389,10 @@ fsloc_parse(char **mesg, char *buf, struct nfsd4_fs_locations *fsloc)
int len;
int migrated, i, err;
+ /* more than one fsloc */
+ if (fsloc->locations)
+ return -EINVAL;
+
/* listsize */
err = get_uint(mesg, &fsloc->locations_count);
if (err)
@@ -437,13 +442,18 @@ out_free_all:
static int secinfo_parse(char **mesg, char *buf, struct svc_export *exp)
{
- int listsize, err;
struct exp_flavor_info *f;
+ u32 listsize;
+ int err;
+
+ /* more than one secinfo */
+ if (exp->ex_nflavors)
+ return -EINVAL;
- err = get_int(mesg, &listsize);
+ err = get_uint(mesg, &listsize);
if (err)
return err;
- if (listsize < 0 || listsize > MAX_SECINFO_LIST)
+ if (listsize > MAX_SECINFO_LIST)
return -EINVAL;
for (f = exp->ex_flavors; f < exp->ex_flavors + listsize; f++) {
@@ -474,6 +484,27 @@ static inline int
secinfo_parse(char **mesg, char *buf, struct svc_export *exp) { return 0; }
#endif
+static inline int
+uuid_parse(char **mesg, char *buf, unsigned char **puuid)
+{
+ int len;
+
+ /* more than one uuid */
+ if (*puuid)
+ return -EINVAL;
+
+ /* expect a 16 byte uuid encoded as \xXXXX... */
+ len = qword_get(mesg, buf, PAGE_SIZE);
+ if (len != EX_UUID_LEN)
+ return -EINVAL;
+
+ *puuid = kmemdup(buf, EX_UUID_LEN, GFP_KERNEL);
+ if (*puuid == NULL)
+ return -ENOMEM;
+
+ return 0;
+}
+
static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
{
/* client path expiry [flags anonuid anongid fsid] */
@@ -552,18 +583,9 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
while ((len = qword_get(&mesg, buf, PAGE_SIZE)) > 0) {
if (strcmp(buf, "fsloc") == 0)
err = fsloc_parse(&mesg, buf, &exp.ex_fslocs);
- else if (strcmp(buf, "uuid") == 0) {
- /* expect a 16 byte uuid encoded as \xXXXX... */
- len = qword_get(&mesg, buf, PAGE_SIZE);
- if (len != 16)
- err = -EINVAL;
- else {
- exp.ex_uuid =
- kmemdup(buf, 16, GFP_KERNEL);
- if (exp.ex_uuid == NULL)
- err = -ENOMEM;
- }
- } else if (strcmp(buf, "secinfo") == 0)
+ else if (strcmp(buf, "uuid") == 0)
+ err = uuid_parse(&mesg, buf, &exp.ex_uuid);
+ else if (strcmp(buf, "secinfo") == 0)
err = secinfo_parse(&mesg, buf, &exp);
else
/* quietly ignore unknown words and anything
@@ -649,7 +671,7 @@ static int svc_export_show(struct seq_file *m,
if (exp->ex_uuid) {
int i;
seq_puts(m, ",uuid=");
- for (i=0; i<16; i++) {
+ for (i = 0; i < EX_UUID_LEN; i++) {
if ((i&3) == 0 && i)
seq_putc(m, ':');
seq_printf(m, "%02x", exp->ex_uuid[i]);
@@ -771,7 +793,7 @@ svc_export_update(struct svc_export *new, struct svc_export *old)
static struct svc_expkey *
-exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
+exp_find_key(struct cache_detail *cd, struct auth_domain *clp, int fsid_type,
u32 *fsidv, struct cache_req *reqp)
{
struct svc_expkey key, *ek;
@@ -793,9 +815,9 @@ exp_find_key(struct cache_detail *cd, svc_client *clp, int fsid_type,
return ek;
}
-
-static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
- const struct path *path, struct cache_req *reqp)
+static struct svc_export *
+exp_get_by_name(struct cache_detail *cd, struct auth_domain *clp,
+ const struct path *path, struct cache_req *reqp)
{
struct svc_export *exp, key;
int err;
@@ -819,11 +841,11 @@ static svc_export *exp_get_by_name(struct cache_detail *cd, svc_client *clp,
/*
* Find the export entry for a given dentry.
*/
-static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
- struct path *path)
+static struct svc_export *
+exp_parent(struct cache_detail *cd, struct auth_domain *clp, struct path *path)
{
struct dentry *saved = dget(path->dentry);
- svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
+ struct svc_export *exp = exp_get_by_name(cd, clp, path, NULL);
while (PTR_ERR(exp) == -ENOENT && !IS_ROOT(path->dentry)) {
struct dentry *parent = dget_parent(path->dentry);
@@ -844,7 +866,7 @@ static struct svc_export *exp_parent(struct cache_detail *cd, svc_client *clp,
* since its harder to fool a kernel module than a user space program.
*/
int
-exp_rootfh(struct net *net, svc_client *clp, char *name,
+exp_rootfh(struct net *net, struct auth_domain *clp, char *name,
struct knfsd_fh *f, int maxsize)
{
struct svc_export *exp;
diff --git a/include/linux/nfsd/export.h b/fs/nfsd/export.h
index 7898c997dfea..cfeea85c5bed 100644
--- a/include/linux/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -1,17 +1,16 @@
/*
- * include/linux/nfsd/export.h
- *
- * Public declarations for NFS exports. The definitions for the
- * syscall interface are in nfsctl.h
- *
* Copyright (C) 1995-1997 Olaf Kirch <okir@monad.swb.de>
*/
#ifndef NFSD_EXPORT_H
#define NFSD_EXPORT_H
-# include <linux/nfsd/nfsfh.h>
+#include <linux/sunrpc/cache.h>
#include <uapi/linux/nfsd/export.h>
+struct knfsd_fh;
+struct svc_fh;
+struct svc_rqst;
+
/*
* FS Locations
*/
@@ -38,6 +37,7 @@ struct nfsd4_fs_locations {
* spkm3i, and spkm3p (and using all 8 at once should be rare).
*/
#define MAX_SECINFO_LIST 8
+#define EX_UUID_LEN 16
struct exp_flavor_info {
u32 pseudoflavor;
@@ -54,7 +54,7 @@ struct svc_export {
int ex_fsid;
unsigned char * ex_uuid; /* 16 byte fsid */
struct nfsd4_fs_locations ex_fslocs;
- int ex_nflavors;
+ uint32_t ex_nflavors;
struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
struct cache_detail *cd;
};
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
index d620e7f81429..2ed05c3cd43d 100644
--- a/fs/nfsd/fault_inject.c
+++ b/fs/nfsd/fault_inject.c
@@ -97,25 +97,14 @@ static ssize_t fault_inject_read(struct file *file, char __user *buf,
{
static u64 val;
char read_buf[25];
- size_t size, ret;
+ size_t size;
loff_t pos = *ppos;
if (!pos)
nfsd_inject_get(file_inode(file)->i_private, &val);
size = scnprintf(read_buf, sizeof(read_buf), "%llu\n", val);
- if (pos < 0)
- return -EINVAL;
- if (pos >= size || !len)
- return 0;
- if (len > size - pos)
- len = size - pos;
- ret = copy_to_user(buf, read_buf + pos, len);
- if (ret == len)
- return -EFAULT;
- len -= ret;
- *ppos = pos + len;
- return len;
+ return simple_read_from_buffer(buf, len, ppos, read_buf, size);
}
static ssize_t fault_inject_write(struct file *file, const char __user *buf,
diff --git a/fs/nfsd/idmap.h b/fs/nfsd/idmap.h
index 66e58db01936..a3f34900091f 100644
--- a/fs/nfsd/idmap.h
+++ b/fs/nfsd/idmap.h
@@ -56,7 +56,7 @@ static inline void nfsd_idmap_shutdown(struct net *net)
__be32 nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, kuid_t *);
__be32 nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, kgid_t *);
-__be32 nfsd4_encode_user(struct svc_rqst *, kuid_t, __be32 **, int *);
-__be32 nfsd4_encode_group(struct svc_rqst *, kgid_t, __be32 **, int *);
+__be32 nfsd4_encode_user(struct xdr_stream *, struct svc_rqst *, kuid_t);
+__be32 nfsd4_encode_group(struct xdr_stream *, struct svc_rqst *, kgid_t);
#endif /* LINUX_NFSD_IDMAP_H */
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 11c1fba29312..12b023a7ab7d 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -182,7 +182,8 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *argp)
{
- if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ p = nfs2svc_decode_fh(p, &argp->fh);
+ if (!p)
return 0;
argp->mask = ntohl(*p); p++;
@@ -197,7 +198,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base;
int n;
- if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ p = nfs2svc_decode_fh(p, &argp->fh);
+ if (!p)
return 0;
argp->mask = ntohl(*p++);
if (argp->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
@@ -218,7 +220,8 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_fhandle *argp)
{
- if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ p = nfs2svc_decode_fh(p, &argp->fh);
+ if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
@@ -226,7 +229,8 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *argp)
{
- if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
+ p = nfs2svc_decode_fh(p, &argp->fh);
+ if (!p)
return 0;
argp->access = ntohl(*p++);
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index adc5f1b1dc26..2a514e21dc74 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -128,7 +128,8 @@ out:
static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_getaclargs *args)
{
- if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+ p = nfs3svc_decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->mask = ntohl(*p); p++;
@@ -143,7 +144,8 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int base;
int n;
- if (!(p = nfs3svc_decode_fh(p, &args->fh)))
+ p = nfs3svc_decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->mask = ntohl(*p++);
if (args->mask & ~(NFS_ACL|NFS_ACLCNT|NFS_DFACL|NFS_DFACLCNT) ||
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index de6e39e12cb3..e6c01e80325e 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -278,7 +278,8 @@ void fill_post_wcc(struct svc_fh *fhp)
int
nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
@@ -287,7 +288,8 @@ int
nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_sattrargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = decode_sattr3(p, &args->attrs);
@@ -315,7 +317,8 @@ int
nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_accessargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->access = ntohl(*p++);
@@ -330,7 +333,8 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
int v;
u32 max_blocksize = svc_max_payload(rqstp);
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
@@ -360,7 +364,8 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, v, hdr, dlen;
u32 max_blocksize = svc_max_payload(rqstp);
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
@@ -535,7 +540,8 @@ int
nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readlinkargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
@@ -558,7 +564,8 @@ int
nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_readdirargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2;
@@ -580,7 +587,8 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
int len;
u32 max_blocksize = svc_max_payload(rqstp);
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = xdr_decode_hyper(p, &args->cookie);
args->verf = p; p += 2;
@@ -605,7 +613,8 @@ int
nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd3_commitargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p = xdr_decode_hyper(p, &args->offset);
args->count = ntohl(*p++);
diff --git a/fs/nfsd/nfs4acl.c b/fs/nfsd/nfs4acl.c
index f66c66b9f182..d714156a19fd 100644
--- a/fs/nfsd/nfs4acl.c
+++ b/fs/nfsd/nfs4acl.c
@@ -36,7 +36,6 @@
#include <linux/slab.h>
#include <linux/nfs_fs.h>
-#include <linux/export.h>
#include "nfsfh.h"
#include "nfsd.h"
#include "acl.h"
@@ -920,20 +919,19 @@ nfs4_acl_get_whotype(char *p, u32 len)
return NFS4_ACL_WHO_NAMED;
}
-__be32 nfs4_acl_write_who(int who, __be32 **p, int *len)
+__be32 nfs4_acl_write_who(struct xdr_stream *xdr, int who)
{
+ __be32 *p;
int i;
- int bytes;
for (i = 0; i < ARRAY_SIZE(s2t_map); i++) {
if (s2t_map[i].type != who)
continue;
- bytes = 4 + (XDR_QUADLEN(s2t_map[i].stringlen) << 2);
- if (bytes > *len)
+ p = xdr_reserve_space(xdr, s2t_map[i].stringlen + 4);
+ if (!p)
return nfserr_resource;
- *p = xdr_encode_opaque(*p, s2t_map[i].string,
+ p = xdr_encode_opaque(p, s2t_map[i].string,
s2t_map[i].stringlen);
- *len -= bytes;
return 0;
}
WARN_ON_ONCE(1);
diff --git a/fs/nfsd/nfs4idmap.c b/fs/nfsd/nfs4idmap.c
index c0dfde68742e..a0ab0a847d69 100644
--- a/fs/nfsd/nfs4idmap.c
+++ b/fs/nfsd/nfs4idmap.c
@@ -551,44 +551,43 @@ idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen
return 0;
}
-static __be32 encode_ascii_id(u32 id, __be32 **p, int *buflen)
+static __be32 encode_ascii_id(struct xdr_stream *xdr, u32 id)
{
char buf[11];
int len;
- int bytes;
+ __be32 *p;
len = sprintf(buf, "%u", id);
- bytes = 4 + (XDR_QUADLEN(len) << 2);
- if (bytes > *buflen)
+ p = xdr_reserve_space(xdr, len + 4);
+ if (!p)
return nfserr_resource;
- *p = xdr_encode_opaque(*p, buf, len);
- *buflen -= bytes;
+ p = xdr_encode_opaque(p, buf, len);
return 0;
}
-static __be32 idmap_id_to_name(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
+static __be32 idmap_id_to_name(struct xdr_stream *xdr,
+ struct svc_rqst *rqstp, int type, u32 id)
{
struct ent *item, key = {
.id = id,
.type = type,
};
+ __be32 *p;
int ret;
- int bytes;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
strlcpy(key.authname, rqst_authname(rqstp), sizeof(key.authname));
ret = idmap_lookup(rqstp, idtoname_lookup, &key, nn->idtoname_cache, &item);
if (ret == -ENOENT)
- return encode_ascii_id(id, p, buflen);
+ return encode_ascii_id(xdr, id);
if (ret)
return nfserrno(ret);
ret = strlen(item->name);
WARN_ON_ONCE(ret > IDMAP_NAMESZ);
- bytes = 4 + (XDR_QUADLEN(ret) << 2);
- if (bytes > *buflen)
+ p = xdr_reserve_space(xdr, ret + 4);
+ if (!p)
return nfserr_resource;
- *p = xdr_encode_opaque(*p, item->name, ret);
- *buflen -= bytes;
+ p = xdr_encode_opaque(p, item->name, ret);
cache_put(&item->h, nn->idtoname_cache);
return 0;
}
@@ -622,11 +621,12 @@ do_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, u
return idmap_name_to_id(rqstp, type, name, namelen, id);
}
-static __be32 encode_name_from_id(struct svc_rqst *rqstp, int type, u32 id, __be32 **p, int *buflen)
+static __be32 encode_name_from_id(struct xdr_stream *xdr,
+ struct svc_rqst *rqstp, int type, u32 id)
{
if (nfs4_disable_idmapping && rqstp->rq_cred.cr_flavor < RPC_AUTH_GSS)
- return encode_ascii_id(id, p, buflen);
- return idmap_id_to_name(rqstp, type, id, p, buflen);
+ return encode_ascii_id(xdr, id);
+ return idmap_id_to_name(xdr, rqstp, type, id);
}
__be32
@@ -655,14 +655,16 @@ nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
return status;
}
-__be32 nfsd4_encode_user(struct svc_rqst *rqstp, kuid_t uid, __be32 **p, int *buflen)
+__be32 nfsd4_encode_user(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ kuid_t uid)
{
u32 id = from_kuid(&init_user_ns, uid);
- return encode_name_from_id(rqstp, IDMAP_TYPE_USER, id, p, buflen);
+ return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_USER, id);
}
-__be32 nfsd4_encode_group(struct svc_rqst *rqstp, kgid_t gid, __be32 **p, int *buflen)
+__be32 nfsd4_encode_group(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ kgid_t gid)
{
u32 id = from_kgid(&init_user_ns, gid);
- return encode_name_from_id(rqstp, IDMAP_TYPE_GROUP, id, p, buflen);
+ return encode_name_from_id(xdr, rqstp, IDMAP_TYPE_GROUP, id);
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d543222babf3..6851b003f2a4 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -430,12 +430,12 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
goto out;
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
status = nfs4_check_open_reclaim(&open->op_clientid,
cstate->minorversion,
nn);
if (status)
goto out;
+ open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
status = do_open_fhandle(rqstp, cstate, open);
@@ -445,7 +445,6 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
break;
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
- open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
dprintk("NFSD: unsupported OPEN claim type %d\n",
open->op_claim_type);
status = nfserr_notsupp;
@@ -786,7 +785,6 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (!nfsd4_last_compound_op(rqstp))
rqstp->rq_splice_ok = false;
- nfs4_lock_state();
/* check stateid */
if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, &read->rd_stateid,
@@ -794,11 +792,8 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
dprintk("NFSD: nfsd4_read: couldn't process stateid!\n");
goto out;
}
- if (read->rd_filp)
- get_file(read->rd_filp);
status = nfs_ok;
out:
- nfs4_unlock_state();
read->rd_rqstp = rqstp;
read->rd_fhp = &cstate->current_fh;
return status;
@@ -937,10 +932,8 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
int err;
if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
- nfs4_lock_state();
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
&setattr->sa_stateid, WR_STATE, NULL);
- nfs4_unlock_state();
if (status) {
dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
return status;
@@ -1006,17 +999,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
if (write->wr_offset >= OFFSET_MAX)
return nfserr_inval;
- nfs4_lock_state();
status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
cstate, stateid, WR_STATE, &filp);
if (status) {
- nfs4_unlock_state();
dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
return status;
}
- if (filp)
- get_file(filp);
- nfs4_unlock_state();
cnt = write->wr_buflen;
write->wr_how_written = write->wr_stable_how;
@@ -1072,10 +1060,10 @@ _nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
return nfserr_jukebox;
p = buf;
- status = nfsd4_encode_fattr(&cstate->current_fh,
+ status = nfsd4_encode_fattr_to_buf(&p, count, &cstate->current_fh,
cstate->current_fh.fh_export,
- cstate->current_fh.fh_dentry, &p,
- count, verify->ve_bmval,
+ cstate->current_fh.fh_dentry,
+ verify->ve_bmval,
rqstp, 0);
/*
* If nfsd4_encode_fattr() ran out of space, assume that's because
@@ -1182,9 +1170,7 @@ struct nfsd4_operation {
static struct nfsd4_operation nfsd4_ops[];
-#ifdef NFSD_DEBUG
static const char *nfsd4_op_name(unsigned opnum);
-#endif
/*
* Enforce NFSv4.1 COMPOUND ordering rules:
@@ -1226,6 +1212,8 @@ static inline struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
bool nfsd4_cache_this_op(struct nfsd4_op *op)
{
+ if (op->opnum == OP_ILLEGAL)
+ return false;
return OPDESC(op)->op_flags & OP_CACHEME;
}
@@ -1262,6 +1250,25 @@ static bool need_wrongsec_check(struct svc_rqst *rqstp)
return !(nextd->op_flags & OP_HANDLES_WRONGSEC);
}
+static void svcxdr_init_encode(struct svc_rqst *rqstp,
+ struct nfsd4_compoundres *resp)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_buf *buf = &rqstp->rq_res;
+ struct kvec *head = buf->head;
+
+ xdr->buf = buf;
+ xdr->iov = head;
+ xdr->p = head->iov_base + head->iov_len;
+ xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
+ /* Tail and page_len should be zero at this point: */
+ buf->len = buf->head[0].iov_len;
+ xdr->scratch.iov_len = 0;
+ xdr->page_ptr = buf->pages - 1;
+ buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
+ - rqstp->rq_auth_slack;
+}
+
/*
* COMPOUND call.
*/
@@ -1275,24 +1282,16 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
- int slack_bytes;
- u32 plen = 0;
__be32 status;
- resp->xbuf = &rqstp->rq_res;
- resp->p = rqstp->rq_res.head[0].iov_base +
- rqstp->rq_res.head[0].iov_len;
- resp->tagp = resp->p;
+ svcxdr_init_encode(rqstp, resp);
+ resp->tagp = resp->xdr.p;
/* reserve space for: taglen, tag, and opcnt */
- resp->p += 2 + XDR_QUADLEN(args->taglen);
- resp->end = rqstp->rq_res.head[0].iov_base + PAGE_SIZE;
+ xdr_reserve_space(&resp->xdr, 8 + args->taglen);
resp->taglen = args->taglen;
resp->tag = args->tag;
- resp->opcnt = 0;
resp->rqstp = rqstp;
cstate->minorversion = args->minorversion;
- cstate->replay_owner = NULL;
- cstate->session = NULL;
fh_init(current_fh, NFS4_FHSIZE);
fh_init(save_fh, NFS4_FHSIZE);
/*
@@ -1332,19 +1331,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
goto encode_op;
}
- /* We must be able to encode a successful response to
- * this operation, with enough room left over to encode a
- * failed response to the next operation. If we don't
- * have enough room, fail with ERR_RESOURCE.
- */
- slack_bytes = (char *)resp->end - (char *)resp->p;
- if (slack_bytes < COMPOUND_SLACK_SPACE
- + COMPOUND_ERR_SLACK_SPACE) {
- BUG_ON(slack_bytes < COMPOUND_ERR_SLACK_SPACE);
- op->status = nfserr_resource;
- goto encode_op;
- }
-
opdesc = OPDESC(op);
if (!current_fh->fh_dentry) {
@@ -1362,9 +1348,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
/* If op is non-idempotent */
if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
- plen = opdesc->op_rsize_bop(rqstp, op);
/*
- * If there's still another operation, make sure
+ * Don't execute this op if we couldn't encode a
+ * succesful reply:
+ */
+ u32 plen = opdesc->op_rsize_bop(rqstp, op);
+ /*
+ * Plus if there's another operation, make sure
* we'll have space to at least encode an error:
*/
if (resp->opcnt < args->opcnt)
@@ -1399,7 +1389,7 @@ encode_op:
}
if (op->status == nfserr_replay_me) {
op->replay = &cstate->replay_owner->so_replay;
- nfsd4_encode_replay(resp, op);
+ nfsd4_encode_replay(&resp->xdr, op);
status = op->status = op->replay->rp_status;
} else {
nfsd4_encode_operation(resp, op);
@@ -1438,7 +1428,8 @@ out:
#define op_encode_change_info_maxsz (5)
#define nfs4_fattr_bitmap_maxsz (4)
-#define op_encode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
+/* We'll fall back on returning no lockowner if run out of space: */
+#define op_encode_lockowner_maxsz (0)
#define op_encode_lock_denied_maxsz (8 + op_encode_lockowner_maxsz)
#define nfs4_owner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ))
@@ -1470,6 +1461,49 @@ static inline u32 nfsd4_create_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
+ nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
}
+/*
+ * Note since this is an idempotent operation we won't insist on failing
+ * the op prematurely if the estimate is too large. We may turn off splice
+ * reads unnecessarily.
+ */
+static inline u32 nfsd4_getattr_rsize(struct svc_rqst *rqstp,
+ struct nfsd4_op *op)
+{
+ u32 *bmap = op->u.getattr.ga_bmval;
+ u32 bmap0 = bmap[0], bmap1 = bmap[1], bmap2 = bmap[2];
+ u32 ret = 0;
+
+ if (bmap0 & FATTR4_WORD0_ACL)
+ return svc_max_payload(rqstp);
+ if (bmap0 & FATTR4_WORD0_FS_LOCATIONS)
+ return svc_max_payload(rqstp);
+
+ if (bmap1 & FATTR4_WORD1_OWNER) {
+ ret += IDMAP_NAMESZ + 4;
+ bmap1 &= ~FATTR4_WORD1_OWNER;
+ }
+ if (bmap1 & FATTR4_WORD1_OWNER_GROUP) {
+ ret += IDMAP_NAMESZ + 4;
+ bmap1 &= ~FATTR4_WORD1_OWNER_GROUP;
+ }
+ if (bmap0 & FATTR4_WORD0_FILEHANDLE) {
+ ret += NFS4_FHSIZE + 4;
+ bmap0 &= ~FATTR4_WORD0_FILEHANDLE;
+ }
+ if (bmap2 & FATTR4_WORD2_SECURITY_LABEL) {
+ ret += NFSD4_MAX_SEC_LABEL_LEN + 12;
+ bmap2 &= ~FATTR4_WORD2_SECURITY_LABEL;
+ }
+ /*
+ * Largest of remaining attributes are 16 bytes (e.g.,
+ * supported_attributes)
+ */
+ ret += 16 * (hweight32(bmap0) + hweight32(bmap1) + hweight32(bmap2));
+ /* bitmask, length */
+ ret += 20;
+ return ret;
+}
+
static inline u32 nfsd4_link_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + op_encode_change_info_maxsz)
@@ -1500,18 +1534,19 @@ static inline u32 nfsd4_read_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
if (rlen > maxcount)
rlen = maxcount;
- return (op_encode_hdr_size + 2) * sizeof(__be32) + rlen;
+ return (op_encode_hdr_size + 2 + XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_readdir_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
+ u32 maxcount = svc_max_payload(rqstp);
u32 rlen = op->u.readdir.rd_maxcount;
- if (rlen > PAGE_SIZE)
- rlen = PAGE_SIZE;
+ if (rlen > maxcount)
+ rlen = maxcount;
- return (op_encode_hdr_size + op_encode_verifier_maxsz)
- * sizeof(__be32) + rlen;
+ return (op_encode_hdr_size + op_encode_verifier_maxsz +
+ XDR_QUADLEN(rlen)) * sizeof(__be32);
}
static inline u32 nfsd4_remove_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1526,6 +1561,12 @@ static inline u32 nfsd4_rename_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op
+ op_encode_change_info_maxsz) * sizeof(__be32);
}
+static inline u32 nfsd4_sequence_rsize(struct svc_rqst *rqstp,
+ struct nfsd4_op *op)
+{
+ return NFS4_MAX_SESSIONID_LEN + 20;
+}
+
static inline u32 nfsd4_setattr_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
return (op_encode_hdr_size + nfs4_fattr_bitmap_maxsz) * sizeof(__be32);
@@ -1539,7 +1580,7 @@ static inline u32 nfsd4_setclientid_rsize(struct svc_rqst *rqstp, struct nfsd4_o
static inline u32 nfsd4_write_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
{
- return (op_encode_hdr_size + op_encode_verifier_maxsz) * sizeof(__be32);
+ return (op_encode_hdr_size + 2 + op_encode_verifier_maxsz) * sizeof(__be32);
}
static inline u32 nfsd4_exchange_id_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
@@ -1607,6 +1648,7 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_GETATTR] = {
.op_func = (nfsd4op_func)nfsd4_getattr,
.op_flags = ALLOWED_ON_ABSENT_FS,
+ .op_rsize_bop = nfsd4_getattr_rsize,
.op_name = "OP_GETATTR",
},
[OP_GETFH] = {
@@ -1676,37 +1718,32 @@ static struct nfsd4_operation nfsd4_ops[] = {
[OP_PUTFH] = {
.op_func = (nfsd4op_func)nfsd4_putfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
- | OP_CLEAR_STATEID,
+ | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
.op_name = "OP_PUTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTPUBFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
- | OP_CLEAR_STATEID,
+ | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
.op_name = "OP_PUTPUBFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_PUTROOTFH] = {
.op_func = (nfsd4op_func)nfsd4_putrootfh,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_IS_PUTFH_LIKE | OP_MODIFIES_SOMETHING
- | OP_CLEAR_STATEID,
+ | OP_IS_PUTFH_LIKE | OP_CLEAR_STATEID,
.op_name = "OP_PUTROOTFH",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
},
[OP_READ] = {
.op_func = (nfsd4op_func)nfsd4_read,
- .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READ",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_read_rsize,
.op_get_currentstateid = (stateid_getter)nfsd4_get_readstateid,
},
[OP_READDIR] = {
.op_func = (nfsd4op_func)nfsd4_readdir,
- .op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_READDIR",
.op_rsize_bop = (nfsd4op_rsize)nfsd4_readdir_rsize,
},
@@ -1864,14 +1901,33 @@ static struct nfsd4_operation nfsd4_ops[] = {
},
};
-#ifdef NFSD_DEBUG
+int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
+{
+ struct nfsd4_operation *opdesc;
+ nfsd4op_rsize estimator;
+
+ if (op->opnum == OP_ILLEGAL)
+ return op_encode_hdr_size * sizeof(__be32);
+ opdesc = OPDESC(op);
+ estimator = opdesc->op_rsize_bop;
+ return estimator ? estimator(rqstp, op) : PAGE_SIZE;
+}
+
+void warn_on_nonidempotent_op(struct nfsd4_op *op)
+{
+ if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
+ pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
+ op->opnum, nfsd4_op_name(op->opnum));
+ WARN_ON_ONCE(1);
+ }
+}
+
static const char *nfsd4_op_name(unsigned opnum)
{
if (opnum < ARRAY_SIZE(nfsd4_ops))
return nfsd4_ops[opnum].op_name;
return "unknown_operation";
}
-#endif
#define nfsd4_voidres nfsd4_voidargs
struct nfsd4_voidargs { int dummy; };
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 9a77a5a21557..c0d45cec9958 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -81,13 +81,13 @@ static DEFINE_MUTEX(client_mutex);
* effort to decrease the scope of the client_mutex, this spinlock may
* eventually cover more:
*/
-static DEFINE_SPINLOCK(recall_lock);
+static DEFINE_SPINLOCK(state_lock);
-static struct kmem_cache *openowner_slab = NULL;
-static struct kmem_cache *lockowner_slab = NULL;
-static struct kmem_cache *file_slab = NULL;
-static struct kmem_cache *stateid_slab = NULL;
-static struct kmem_cache *deleg_slab = NULL;
+static struct kmem_cache *openowner_slab;
+static struct kmem_cache *lockowner_slab;
+static struct kmem_cache *file_slab;
+static struct kmem_cache *stateid_slab;
+static struct kmem_cache *deleg_slab;
void
nfs4_lock_state(void)
@@ -235,9 +235,9 @@ static void nfsd4_free_file(struct nfs4_file *f)
static inline void
put_nfs4_file(struct nfs4_file *fi)
{
- if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
+ if (atomic_dec_and_lock(&fi->fi_ref, &state_lock)) {
hlist_del(&fi->fi_hash);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
iput(fi->fi_inode);
nfsd4_free_file(fi);
}
@@ -375,7 +375,6 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
dp = delegstateid(nfs4_alloc_stid(clp, deleg_slab));
if (dp == NULL)
return dp;
- dp->dl_stid.sc_type = NFS4_DELEG_STID;
/*
* delegation seqid's are never incremented. The 4.1 special
* meaning of seqid 0 isn't meaningful, really, but let's avoid
@@ -418,6 +417,8 @@ nfs4_put_delegation(struct nfs4_delegation *dp)
static void nfs4_put_deleg_lease(struct nfs4_file *fp)
{
+ if (!fp->fi_lease)
+ return;
if (atomic_dec_and_test(&fp->fi_delegees)) {
vfs_setlease(fp->fi_deleg_file, F_UNLCK, &fp->fi_lease);
fp->fi_lease = NULL;
@@ -431,18 +432,30 @@ static void unhash_stid(struct nfs4_stid *s)
s->sc_type = 0;
}
+static void
+hash_delegation_locked(struct nfs4_delegation *dp, struct nfs4_file *fp)
+{
+ lockdep_assert_held(&state_lock);
+
+ dp->dl_stid.sc_type = NFS4_DELEG_STID;
+ list_add(&dp->dl_perfile, &fp->fi_delegations);
+ list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+}
+
/* Called under the state lock. */
static void
unhash_delegation(struct nfs4_delegation *dp)
{
+ spin_lock(&state_lock);
list_del_init(&dp->dl_perclnt);
- spin_lock(&recall_lock);
list_del_init(&dp->dl_perfile);
list_del_init(&dp->dl_recall_lru);
- spin_unlock(&recall_lock);
- nfs4_put_deleg_lease(dp->dl_file);
- put_nfs4_file(dp->dl_file);
- dp->dl_file = NULL;
+ spin_unlock(&state_lock);
+ if (dp->dl_file) {
+ nfs4_put_deleg_lease(dp->dl_file);
+ put_nfs4_file(dp->dl_file);
+ dp->dl_file = NULL;
+ }
}
@@ -645,6 +658,12 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
}
}
+static void nfs4_free_lockowner(struct nfs4_lockowner *lo)
+{
+ kfree(lo->lo_owner.so_owner.data);
+ kmem_cache_free(lockowner_slab, lo);
+}
+
static void release_lockowner(struct nfs4_lockowner *lo)
{
unhash_lockowner(lo);
@@ -699,6 +718,12 @@ static void release_last_closed_stateid(struct nfs4_openowner *oo)
}
}
+static void nfs4_free_openowner(struct nfs4_openowner *oo)
+{
+ kfree(oo->oo_owner.so_owner.data);
+ kmem_cache_free(openowner_slab, oo);
+}
+
static void release_openowner(struct nfs4_openowner *oo)
{
unhash_openowner(oo);
@@ -1093,7 +1118,7 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
return clp;
}
-static inline void
+static void
free_client(struct nfs4_client *clp)
{
struct nfsd_net __maybe_unused *nn = net_generic(clp->net, nfsd_net_id);
@@ -1136,13 +1161,13 @@ destroy_client(struct nfs4_client *clp)
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
INIT_LIST_HEAD(&reaplist);
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
while (!list_empty(&clp->cl_delegations)) {
dp = list_entry(clp->cl_delegations.next, struct nfs4_delegation, dl_perclnt);
list_del_init(&dp->dl_perclnt);
list_move(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
while (!list_empty(&reaplist)) {
dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
destroy_delegation(dp);
@@ -1544,6 +1569,7 @@ out_err:
void
nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
{
+ struct xdr_buf *buf = resp->xdr.buf;
struct nfsd4_slot *slot = resp->cstate.slot;
unsigned int base;
@@ -1557,11 +1583,9 @@ nfsd4_store_cache_entry(struct nfsd4_compoundres *resp)
slot->sl_datalen = 0;
return;
}
- slot->sl_datalen = (char *)resp->p - (char *)resp->cstate.datap;
- base = (char *)resp->cstate.datap -
- (char *)resp->xbuf->head[0].iov_base;
- if (read_bytes_from_xdr_buf(resp->xbuf, base, slot->sl_data,
- slot->sl_datalen))
+ base = resp->cstate.data_offset;
+ slot->sl_datalen = buf->len - base;
+ if (read_bytes_from_xdr_buf(buf, base, slot->sl_data, slot->sl_datalen))
WARN("%s: sessions DRC could not cache compound\n", __func__);
return;
}
@@ -1602,6 +1626,8 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
struct nfsd4_sequence *seq)
{
struct nfsd4_slot *slot = resp->cstate.slot;
+ struct xdr_stream *xdr = &resp->xdr;
+ __be32 *p;
__be32 status;
dprintk("--> %s slot %p\n", __func__, slot);
@@ -1610,14 +1636,16 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
if (status)
return status;
- /* The sequence operation has been encoded, cstate->datap set. */
- memcpy(resp->cstate.datap, slot->sl_data, slot->sl_datalen);
+ p = xdr_reserve_space(xdr, slot->sl_datalen);
+ if (!p) {
+ WARN_ON_ONCE(1);
+ return nfserr_serverfault;
+ }
+ xdr_encode_opaque_fixed(p, slot->sl_data, slot->sl_datalen);
+ xdr_commit_encode(xdr);
resp->opcnt = slot->sl_opcnt;
- resp->p = resp->cstate.datap + XDR_QUADLEN(slot->sl_datalen);
- status = slot->sl_status;
-
- return status;
+ return slot->sl_status;
}
/*
@@ -2189,11 +2217,13 @@ nfsd4_sequence(struct svc_rqst *rqstp,
struct nfsd4_sequence *seq)
{
struct nfsd4_compoundres *resp = rqstp->rq_resp;
+ struct xdr_stream *xdr = &resp->xdr;
struct nfsd4_session *session;
struct nfs4_client *clp;
struct nfsd4_slot *slot;
struct nfsd4_conn *conn;
__be32 status;
+ int buflen;
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
if (resp->opcnt != 1)
@@ -2262,6 +2292,16 @@ nfsd4_sequence(struct svc_rqst *rqstp,
if (status)
goto out_put_session;
+ buflen = (seq->cachethis) ?
+ session->se_fchannel.maxresp_cached :
+ session->se_fchannel.maxresp_sz;
+ status = (seq->cachethis) ? nfserr_rep_too_big_to_cache :
+ nfserr_rep_too_big;
+ if (xdr_restrict_buflen(xdr, buflen - rqstp->rq_auth_slack))
+ goto out_put_session;
+ svc_reserve(rqstp, buflen);
+
+ status = nfs_ok;
/* Success! bump slot seqid */
slot->sl_seqid = seq->seqid;
slot->sl_flags |= NFSD4_SLOT_INUSE;
@@ -2499,28 +2539,19 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
fp->fi_lease = NULL;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
- spin_unlock(&recall_lock);
-}
-
-static void
-nfsd4_free_slab(struct kmem_cache **slab)
-{
- if (*slab == NULL)
- return;
- kmem_cache_destroy(*slab);
- *slab = NULL;
+ spin_unlock(&state_lock);
}
void
nfsd4_free_slabs(void)
{
- nfsd4_free_slab(&openowner_slab);
- nfsd4_free_slab(&lockowner_slab);
- nfsd4_free_slab(&file_slab);
- nfsd4_free_slab(&stateid_slab);
- nfsd4_free_slab(&deleg_slab);
+ kmem_cache_destroy(openowner_slab);
+ kmem_cache_destroy(lockowner_slab);
+ kmem_cache_destroy(file_slab);
+ kmem_cache_destroy(stateid_slab);
+ kmem_cache_destroy(deleg_slab);
}
int
@@ -2529,42 +2560,38 @@ nfsd4_init_slabs(void)
openowner_slab = kmem_cache_create("nfsd4_openowners",
sizeof(struct nfs4_openowner), 0, 0, NULL);
if (openowner_slab == NULL)
- goto out_nomem;
+ goto out;
lockowner_slab = kmem_cache_create("nfsd4_lockowners",
sizeof(struct nfs4_lockowner), 0, 0, NULL);
if (lockowner_slab == NULL)
- goto out_nomem;
+ goto out_free_openowner_slab;
file_slab = kmem_cache_create("nfsd4_files",
sizeof(struct nfs4_file), 0, 0, NULL);
if (file_slab == NULL)
- goto out_nomem;
+ goto out_free_lockowner_slab;
stateid_slab = kmem_cache_create("nfsd4_stateids",
sizeof(struct nfs4_ol_stateid), 0, 0, NULL);
if (stateid_slab == NULL)
- goto out_nomem;
+ goto out_free_file_slab;
deleg_slab = kmem_cache_create("nfsd4_delegations",
sizeof(struct nfs4_delegation), 0, 0, NULL);
if (deleg_slab == NULL)
- goto out_nomem;
+ goto out_free_stateid_slab;
return 0;
-out_nomem:
- nfsd4_free_slabs();
+
+out_free_stateid_slab:
+ kmem_cache_destroy(stateid_slab);
+out_free_file_slab:
+ kmem_cache_destroy(file_slab);
+out_free_lockowner_slab:
+ kmem_cache_destroy(lockowner_slab);
+out_free_openowner_slab:
+ kmem_cache_destroy(openowner_slab);
+out:
dprintk("nfsd4: out of memory while initializing nfsv4\n");
return -ENOMEM;
}
-void nfs4_free_openowner(struct nfs4_openowner *oo)
-{
- kfree(oo->oo_owner.so_owner.data);
- kmem_cache_free(openowner_slab, oo);
-}
-
-void nfs4_free_lockowner(struct nfs4_lockowner *lo)
-{
- kfree(lo->lo_owner.so_owner.data);
- kmem_cache_free(lockowner_slab, lo);
-}
-
static void init_nfs4_replay(struct nfs4_replay *rp)
{
rp->rp_status = nfserr_serverfault;
@@ -2685,15 +2712,15 @@ find_file(struct inode *ino)
unsigned int hashval = file_hashval(ino);
struct nfs4_file *fp;
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
if (fp->fi_inode == ino) {
get_nfs4_file(fp);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
return fp;
}
}
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
return NULL;
}
@@ -2730,6 +2757,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
struct nfs4_client *clp = dp->dl_stid.sc_client;
struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+ lockdep_assert_held(&state_lock);
/* We're assuming the state code never drops its reference
* without first removing the lease. Since we're in this lease
* callback (and since the lease code is serialized by the kernel
@@ -2766,11 +2794,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
*/
fl->fl_break_time = 0;
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
fp->fi_had_conflict = true;
list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
nfsd_break_one_deleg(dp);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
}
static
@@ -3047,11 +3075,12 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
status = vfs_setlease(fl->fl_file, fl->fl_type, &fl);
if (status)
goto out_free;
- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
fp->fi_lease = fl;
fp->fi_deleg_file = get_file(fl->fl_file);
atomic_set(&fp->fi_delegees, 1);
- list_add(&dp->dl_perfile, &fp->fi_delegations);
+ spin_lock(&state_lock);
+ hash_delegation_locked(dp, fp);
+ spin_unlock(&state_lock);
return 0;
out_free:
locks_free_lock(fl);
@@ -3060,33 +3089,21 @@ out_free:
static int nfs4_set_delegation(struct nfs4_delegation *dp, struct nfs4_file *fp)
{
- int status;
-
if (fp->fi_had_conflict)
return -EAGAIN;
get_nfs4_file(fp);
dp->dl_file = fp;
- if (!fp->fi_lease) {
- status = nfs4_setlease(dp);
- if (status)
- goto out_free;
- return 0;
- }
- spin_lock(&recall_lock);
+ if (!fp->fi_lease)
+ return nfs4_setlease(dp);
+ spin_lock(&state_lock);
+ atomic_inc(&fp->fi_delegees);
if (fp->fi_had_conflict) {
- spin_unlock(&recall_lock);
- status = -EAGAIN;
- goto out_free;
+ spin_unlock(&state_lock);
+ return -EAGAIN;
}
- atomic_inc(&fp->fi_delegees);
- list_add(&dp->dl_perfile, &fp->fi_delegations);
- spin_unlock(&recall_lock);
- list_add(&dp->dl_perclnt, &dp->dl_stid.sc_client->cl_delegations);
+ hash_delegation_locked(dp, fp);
+ spin_unlock(&state_lock);
return 0;
-out_free:
- put_nfs4_file(fp);
- dp->dl_file = fp;
- return status;
}
static void nfsd4_open_deleg_none_ext(struct nfsd4_open *open, int status)
@@ -3173,8 +3190,7 @@ nfs4_open_delegation(struct net *net, struct svc_fh *fh,
open->op_delegate_type = NFS4_OPEN_DELEGATE_READ;
return;
out_free:
- remove_stid(&dp->dl_stid);
- nfs4_put_delegation(dp);
+ destroy_delegation(dp);
out_no_deleg:
open->op_delegate_type = NFS4_OPEN_DELEGATE_NONE;
if (open->op_claim_type == NFS4_OPEN_CLAIM_PREVIOUS &&
@@ -3391,8 +3407,7 @@ nfs4_laundromat(struct nfsd_net *nn)
struct nfs4_delegation *dp;
struct list_head *pos, *next, reaplist;
time_t cutoff = get_seconds() - nn->nfsd4_lease;
- time_t t, clientid_val = nn->nfsd4_lease;
- time_t u, test_val = nn->nfsd4_lease;
+ time_t t, new_timeo = nn->nfsd4_lease;
nfs4_lock_state();
@@ -3404,8 +3419,7 @@ nfs4_laundromat(struct nfsd_net *nn)
clp = list_entry(pos, struct nfs4_client, cl_lru);
if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
t = clp->cl_time - cutoff;
- if (clientid_val > t)
- clientid_val = t;
+ new_timeo = min(new_timeo, t);
break;
}
if (mark_client_expired_locked(clp)) {
@@ -3422,39 +3436,35 @@ nfs4_laundromat(struct nfsd_net *nn)
clp->cl_clientid.cl_id);
expire_client(clp);
}
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
continue;
if (time_after((unsigned long)dp->dl_time, (unsigned long)cutoff)) {
- u = dp->dl_time - cutoff;
- if (test_val > u)
- test_val = u;
+ t = dp->dl_time - cutoff;
+ new_timeo = min(new_timeo, t);
break;
}
list_move(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
revoke_delegation(dp);
}
- test_val = nn->nfsd4_lease;
list_for_each_safe(pos, next, &nn->close_lru) {
oo = container_of(pos, struct nfs4_openowner, oo_close_lru);
if (time_after((unsigned long)oo->oo_time, (unsigned long)cutoff)) {
- u = oo->oo_time - cutoff;
- if (test_val > u)
- test_val = u;
+ t = oo->oo_time - cutoff;
+ new_timeo = min(new_timeo, t);
break;
}
release_openowner(oo);
}
- if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
- clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+ new_timeo = max_t(time_t, new_timeo, NFSD_LAUNDROMAT_MINTIMEOUT);
nfs4_unlock_state();
- return clientid_val;
+ return new_timeo;
}
static struct workqueue_struct *laundry_wq;
@@ -3654,6 +3664,7 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
struct svc_fh *current_fh = &cstate->current_fh;
struct inode *ino = current_fh->fh_dentry->d_inode;
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
+ struct file *file = NULL;
__be32 status;
if (filpp)
@@ -3665,10 +3676,12 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
return check_special_stateids(net, current_fh, stateid, flags);
+ nfs4_lock_state();
+
status = nfsd4_lookup_stateid(stateid, NFS4_DELEG_STID|NFS4_OPEN_STID|NFS4_LOCK_STID,
&s, cstate->minorversion, nn);
if (status)
- return status;
+ goto out;
status = check_stateid_generation(stateid, &s->sc_stateid, nfsd4_has_session(cstate));
if (status)
goto out;
@@ -3679,8 +3692,8 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
if (status)
goto out;
if (filpp) {
- *filpp = dp->dl_file->fi_deleg_file;
- if (!*filpp) {
+ file = dp->dl_file->fi_deleg_file;
+ if (!file) {
WARN_ON_ONCE(1);
status = nfserr_serverfault;
goto out;
@@ -3701,16 +3714,20 @@ nfs4_preprocess_stateid_op(struct net *net, struct nfsd4_compound_state *cstate,
goto out;
if (filpp) {
if (flags & RD_STATE)
- *filpp = find_readable_file(stp->st_file);
+ file = find_readable_file(stp->st_file);
else
- *filpp = find_writeable_file(stp->st_file);
+ file = find_writeable_file(stp->st_file);
}
break;
default:
- return nfserr_bad_stateid;
+ status = nfserr_bad_stateid;
+ goto out;
}
status = nfs_ok;
+ if (file)
+ *filpp = get_file(file);
out:
+ nfs4_unlock_state();
return status;
}
@@ -3726,7 +3743,7 @@ nfsd4_free_lock_stateid(struct nfs4_ol_stateid *stp)
* correspondance, and we have to delete the lockowner when we
* delete the lock stateid:
*/
- unhash_lockowner(lo);
+ release_lockowner(lo);
return nfs_ok;
}
@@ -4896,6 +4913,7 @@ static u64 nfsd_find_all_delegations(struct nfs4_client *clp, u64 max,
struct nfs4_delegation *dp, *next;
u64 count = 0;
+ lockdep_assert_held(&state_lock);
list_for_each_entry_safe(dp, next, &clp->cl_delegations, dl_perclnt) {
if (victims)
list_move(&dp->dl_recall_lru, victims);
@@ -4911,9 +4929,9 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
LIST_HEAD(victims);
u64 count;
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, &victims);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
revoke_delegation(dp);
@@ -4927,11 +4945,11 @@ u64 nfsd_recall_client_delegations(struct nfs4_client *clp, u64 max)
LIST_HEAD(victims);
u64 count;
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, &victims);
list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
nfsd_break_one_deleg(dp);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
return count;
}
@@ -4940,9 +4958,9 @@ u64 nfsd_print_client_delegations(struct nfs4_client *clp, u64 max)
{
u64 count = 0;
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
count = nfsd_find_all_delegations(clp, max, NULL);
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
nfsd_print_count(clp, count, "delegations");
return count;
@@ -4983,13 +5001,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_
#endif /* CONFIG_NFSD_FAULT_INJECTION */
-/* initialization to perform at module load time: */
-
-void
-nfs4_state_init(void)
-{
-}
-
/*
* Since the lifetime of a delegation isn't limited to that of an open, a
* client may quite reasonably hang on to a delegation as long as it has
@@ -5160,12 +5171,12 @@ nfs4_state_shutdown_net(struct net *net)
nfs4_lock_state();
INIT_LIST_HEAD(&reaplist);
- spin_lock(&recall_lock);
+ spin_lock(&state_lock);
list_for_each_safe(pos, next, &nn->del_recall_lru) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
list_move(&dp->dl_recall_lru, &reaplist);
}
- spin_unlock(&recall_lock);
+ spin_unlock(&state_lock);
list_for_each_safe(pos, next, &reaplist) {
dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
destroy_delegation(dp);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 18881f34737a..2d305a121f37 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -98,11 +98,6 @@ xdr_error: \
status = nfserr_bad_xdr; \
goto out
-#define READ32(x) (x) = ntohl(*p++)
-#define READ64(x) do { \
- (x) = (u64)ntohl(*p++) << 32; \
- (x) |= ntohl(*p++); \
-} while (0)
#define READMEM(x,nbytes) do { \
x = (char *)p; \
p += XDR_QUADLEN(nbytes); \
@@ -248,17 +243,17 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
bmval[2] = 0;
READ_BUF(4);
- READ32(bmlen);
+ bmlen = be32_to_cpup(p++);
if (bmlen > 1000)
goto xdr_error;
READ_BUF(bmlen << 2);
if (bmlen > 0)
- READ32(bmval[0]);
+ bmval[0] = be32_to_cpup(p++);
if (bmlen > 1)
- READ32(bmval[1]);
+ bmval[1] = be32_to_cpup(p++);
if (bmlen > 2)
- READ32(bmval[2]);
+ bmval[2] = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -270,6 +265,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
{
int expected_len, len = 0;
u32 dummy32;
+ u64 sec;
char *buf;
DECODE_HEAD;
@@ -278,12 +274,12 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return status;
READ_BUF(4);
- READ32(expected_len);
+ expected_len = be32_to_cpup(p++);
if (bmval[0] & FATTR4_WORD0_SIZE) {
READ_BUF(8);
len += 8;
- READ64(iattr->ia_size);
+ p = xdr_decode_hyper(p, &iattr->ia_size);
iattr->ia_valid |= ATTR_SIZE;
}
if (bmval[0] & FATTR4_WORD0_ACL) {
@@ -291,7 +287,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
struct nfs4_ace *ace;
READ_BUF(4); len += 4;
- READ32(nace);
+ nace = be32_to_cpup(p++);
if (nace > NFS4_ACL_MAX)
return nfserr_fbig;
@@ -305,10 +301,10 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
(*acl)->naces = nace;
for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
READ_BUF(16); len += 16;
- READ32(ace->type);
- READ32(ace->flag);
- READ32(ace->access_mask);
- READ32(dummy32);
+ ace->type = be32_to_cpup(p++);
+ ace->flag = be32_to_cpup(p++);
+ ace->access_mask = be32_to_cpup(p++);
+ dummy32 = be32_to_cpup(p++);
READ_BUF(dummy32);
len += XDR_QUADLEN(dummy32) << 2;
READMEM(buf, dummy32);
@@ -330,14 +326,14 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (bmval[1] & FATTR4_WORD1_MODE) {
READ_BUF(4);
len += 4;
- READ32(iattr->ia_mode);
+ iattr->ia_mode = be32_to_cpup(p++);
iattr->ia_mode &= (S_IFMT | S_IALLUGO);
iattr->ia_valid |= ATTR_MODE;
}
if (bmval[1] & FATTR4_WORD1_OWNER) {
READ_BUF(4);
len += 4;
- READ32(dummy32);
+ dummy32 = be32_to_cpup(p++);
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
@@ -348,7 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
READ_BUF(4);
len += 4;
- READ32(dummy32);
+ dummy32 = be32_to_cpup(p++);
READ_BUF(dummy32);
len += (XDR_QUADLEN(dummy32) << 2);
READMEM(buf, dummy32);
@@ -359,15 +355,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
READ_BUF(4);
len += 4;
- READ32(dummy32);
+ dummy32 = be32_to_cpup(p++);
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
/* We require the high 32 bits of 'seconds' to be 0, and we ignore
all 32 bits of 'nseconds'. */
READ_BUF(12);
len += 12;
- READ64(iattr->ia_atime.tv_sec);
- READ32(iattr->ia_atime.tv_nsec);
+ p = xdr_decode_hyper(p, &sec);
+ iattr->ia_atime.tv_sec = (time_t)sec;
+ iattr->ia_atime.tv_nsec = be32_to_cpup(p++);
if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
return nfserr_inval;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -382,15 +379,16 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
READ_BUF(4);
len += 4;
- READ32(dummy32);
+ dummy32 = be32_to_cpup(p++);
switch (dummy32) {
case NFS4_SET_TO_CLIENT_TIME:
/* We require the high 32 bits of 'seconds' to be 0, and we ignore
all 32 bits of 'nseconds'. */
READ_BUF(12);
len += 12;
- READ64(iattr->ia_mtime.tv_sec);
- READ32(iattr->ia_mtime.tv_nsec);
+ p = xdr_decode_hyper(p, &sec);
+ iattr->ia_mtime.tv_sec = sec;
+ iattr->ia_mtime.tv_nsec = be32_to_cpup(p++);
if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
return nfserr_inval;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -408,13 +406,13 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
if (bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
READ_BUF(4);
len += 4;
- READ32(dummy32); /* lfs: we don't use it */
+ dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
READ_BUF(4);
len += 4;
- READ32(dummy32); /* pi: we don't use it either */
+ dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */
READ_BUF(4);
len += 4;
- READ32(dummy32);
+ dummy32 = be32_to_cpup(p++);
READ_BUF(dummy32);
if (dummy32 > NFSD4_MAX_SEC_LABEL_LEN)
return nfserr_badlabel;
@@ -445,7 +443,7 @@ nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
DECODE_HEAD;
READ_BUF(sizeof(stateid_t));
- READ32(sid->si_generation);
+ sid->si_generation = be32_to_cpup(p++);
COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
DECODE_TAIL;
@@ -457,7 +455,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
DECODE_HEAD;
READ_BUF(4);
- READ32(access->ac_req_access);
+ access->ac_req_access = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -472,7 +470,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
/* callback_sec_params4 */
READ_BUF(4);
- READ32(nr_secflavs);
+ nr_secflavs = be32_to_cpup(p++);
if (nr_secflavs)
cbs->flavor = (u32)(-1);
else
@@ -480,7 +478,7 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
cbs->flavor = 0;
for (i = 0; i < nr_secflavs; ++i) {
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
switch (dummy) {
case RPC_AUTH_NULL:
/* Nothing to read */
@@ -490,21 +488,21 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
case RPC_AUTH_UNIX:
READ_BUF(8);
/* stamp */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* machine name */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
SAVEMEM(machine_name, dummy);
/* uid, gid */
READ_BUF(8);
- READ32(uid);
- READ32(gid);
+ uid = be32_to_cpup(p++);
+ gid = be32_to_cpup(p++);
/* more gids */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
if (cbs->flavor == (u32)(-1)) {
kuid_t kuid = make_kuid(&init_user_ns, uid);
@@ -524,14 +522,14 @@ static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_
"not supported!\n");
READ_BUF(8);
/* gcbp_service */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
/* gcbp_handle_from_server */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
/* gcbp_handle_from_client */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
break;
default:
@@ -547,7 +545,7 @@ static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, stru
DECODE_HEAD;
READ_BUF(4);
- READ32(bc->bc_cb_program);
+ bc->bc_cb_program = be32_to_cpup(p++);
nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
DECODE_TAIL;
@@ -559,7 +557,7 @@ static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp,
READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- READ32(bcts->dir);
+ bcts->dir = be32_to_cpup(p++);
/* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
* could help us figure out we should be using it. */
DECODE_TAIL;
@@ -571,7 +569,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
DECODE_HEAD;
READ_BUF(4);
- READ32(close->cl_seqid);
+ close->cl_seqid = be32_to_cpup(p++);
return nfsd4_decode_stateid(argp, &close->cl_stateid);
DECODE_TAIL;
@@ -584,8 +582,8 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
DECODE_HEAD;
READ_BUF(12);
- READ64(commit->co_offset);
- READ32(commit->co_count);
+ p = xdr_decode_hyper(p, &commit->co_offset);
+ commit->co_count = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -596,19 +594,19 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
DECODE_HEAD;
READ_BUF(4);
- READ32(create->cr_type);
+ create->cr_type = be32_to_cpup(p++);
switch (create->cr_type) {
case NF4LNK:
READ_BUF(4);
- READ32(create->cr_linklen);
+ create->cr_linklen = be32_to_cpup(p++);
READ_BUF(create->cr_linklen);
SAVEMEM(create->cr_linkname, create->cr_linklen);
break;
case NF4BLK:
case NF4CHR:
READ_BUF(8);
- READ32(create->cr_specdata1);
- READ32(create->cr_specdata2);
+ create->cr_specdata1 = be32_to_cpup(p++);
+ create->cr_specdata2 = be32_to_cpup(p++);
break;
case NF4SOCK:
case NF4FIFO:
@@ -618,7 +616,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
}
READ_BUF(4);
- READ32(create->cr_namelen);
+ create->cr_namelen = be32_to_cpup(p++);
READ_BUF(create->cr_namelen);
SAVEMEM(create->cr_name, create->cr_namelen);
if ((status = check_filename(create->cr_name, create->cr_namelen)))
@@ -650,7 +648,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
DECODE_HEAD;
READ_BUF(4);
- READ32(link->li_namelen);
+ link->li_namelen = be32_to_cpup(p++);
READ_BUF(link->li_namelen);
SAVEMEM(link->li_name, link->li_namelen);
if ((status = check_filename(link->li_name, link->li_namelen)))
@@ -668,24 +666,24 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
* type, reclaim(boolean), offset, length, new_lock_owner(boolean)
*/
READ_BUF(28);
- READ32(lock->lk_type);
+ lock->lk_type = be32_to_cpup(p++);
if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
goto xdr_error;
- READ32(lock->lk_reclaim);
- READ64(lock->lk_offset);
- READ64(lock->lk_length);
- READ32(lock->lk_is_new);
+ lock->lk_reclaim = be32_to_cpup(p++);
+ p = xdr_decode_hyper(p, &lock->lk_offset);
+ p = xdr_decode_hyper(p, &lock->lk_length);
+ lock->lk_is_new = be32_to_cpup(p++);
if (lock->lk_is_new) {
READ_BUF(4);
- READ32(lock->lk_new_open_seqid);
+ lock->lk_new_open_seqid = be32_to_cpup(p++);
status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
if (status)
return status;
READ_BUF(8 + sizeof(clientid_t));
- READ32(lock->lk_new_lock_seqid);
+ lock->lk_new_lock_seqid = be32_to_cpup(p++);
COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
- READ32(lock->lk_new_owner.len);
+ lock->lk_new_owner.len = be32_to_cpup(p++);
READ_BUF(lock->lk_new_owner.len);
READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
} else {
@@ -693,7 +691,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
if (status)
return status;
READ_BUF(4);
- READ32(lock->lk_old_lock_seqid);
+ lock->lk_old_lock_seqid = be32_to_cpup(p++);
}
DECODE_TAIL;
@@ -705,13 +703,13 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
DECODE_HEAD;
READ_BUF(32);
- READ32(lockt->lt_type);
+ lockt->lt_type = be32_to_cpup(p++);
if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
goto xdr_error;
- READ64(lockt->lt_offset);
- READ64(lockt->lt_length);
+ p = xdr_decode_hyper(p, &lockt->lt_offset);
+ p = xdr_decode_hyper(p, &lockt->lt_length);
COPYMEM(&lockt->lt_clientid, 8);
- READ32(lockt->lt_owner.len);
+ lockt->lt_owner.len = be32_to_cpup(p++);
READ_BUF(lockt->lt_owner.len);
READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
@@ -724,16 +722,16 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
DECODE_HEAD;
READ_BUF(8);
- READ32(locku->lu_type);
+ locku->lu_type = be32_to_cpup(p++);
if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
goto xdr_error;
- READ32(locku->lu_seqid);
+ locku->lu_seqid = be32_to_cpup(p++);
status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
if (status)
return status;
READ_BUF(16);
- READ64(locku->lu_offset);
- READ64(locku->lu_length);
+ p = xdr_decode_hyper(p, &locku->lu_offset);
+ p = xdr_decode_hyper(p, &locku->lu_length);
DECODE_TAIL;
}
@@ -744,7 +742,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
DECODE_HEAD;
READ_BUF(4);
- READ32(lookup->lo_len);
+ lookup->lo_len = be32_to_cpup(p++);
READ_BUF(lookup->lo_len);
SAVEMEM(lookup->lo_name, lookup->lo_len);
if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
@@ -759,7 +757,7 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
u32 w;
READ_BUF(4);
- READ32(w);
+ w = be32_to_cpup(p++);
*share_access = w & NFS4_SHARE_ACCESS_MASK;
*deleg_want = w & NFS4_SHARE_WANT_MASK;
if (deleg_when)
@@ -811,7 +809,7 @@ static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
__be32 *p;
READ_BUF(4);
- READ32(*x);
+ *x = be32_to_cpup(p++);
/* Note: unlinke access bits, deny bits may be zero. */
if (*x & ~NFS4_SHARE_DENY_BOTH)
return nfserr_bad_xdr;
@@ -825,7 +823,7 @@ static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_ne
__be32 *p;
READ_BUF(4);
- READ32(o->len);
+ o->len = be32_to_cpup(p++);
if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
return nfserr_bad_xdr;
@@ -850,7 +848,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
open->op_xdr_error = 0;
/* seqid, share_access, share_deny, clientid, ownerlen */
READ_BUF(4);
- READ32(open->op_seqid);
+ open->op_seqid = be32_to_cpup(p++);
/* decode, yet ignore deleg_when until supported */
status = nfsd4_decode_share_access(argp, &open->op_share_access,
&open->op_deleg_want, &dummy);
@@ -865,13 +863,13 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
if (status)
goto xdr_error;
READ_BUF(4);
- READ32(open->op_create);
+ open->op_create = be32_to_cpup(p++);
switch (open->op_create) {
case NFS4_OPEN_NOCREATE:
break;
case NFS4_OPEN_CREATE:
READ_BUF(4);
- READ32(open->op_createmode);
+ open->op_createmode = be32_to_cpup(p++);
switch (open->op_createmode) {
case NFS4_CREATE_UNCHECKED:
case NFS4_CREATE_GUARDED:
@@ -904,12 +902,12 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
/* open_claim */
READ_BUF(4);
- READ32(open->op_claim_type);
+ open->op_claim_type = be32_to_cpup(p++);
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
READ_BUF(4);
- READ32(open->op_fname.len);
+ open->op_fname.len = be32_to_cpup(p++);
READ_BUF(open->op_fname.len);
SAVEMEM(open->op_fname.data, open->op_fname.len);
if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
@@ -917,14 +915,14 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
READ_BUF(4);
- READ32(open->op_delegate_type);
+ open->op_delegate_type = be32_to_cpup(p++);
break;
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
if (status)
return status;
READ_BUF(4);
- READ32(open->op_fname.len);
+ open->op_fname.len = be32_to_cpup(p++);
READ_BUF(open->op_fname.len);
SAVEMEM(open->op_fname.data, open->op_fname.len);
if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
@@ -962,7 +960,7 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
if (status)
return status;
READ_BUF(4);
- READ32(open_conf->oc_seqid);
+ open_conf->oc_seqid = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -976,7 +974,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
if (status)
return status;
READ_BUF(4);
- READ32(open_down->od_seqid);
+ open_down->od_seqid = be32_to_cpup(p++);
status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
&open_down->od_deleg_want, NULL);
if (status)
@@ -993,7 +991,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
DECODE_HEAD;
READ_BUF(4);
- READ32(putfh->pf_fhlen);
+ putfh->pf_fhlen = be32_to_cpup(p++);
if (putfh->pf_fhlen > NFS4_FHSIZE)
goto xdr_error;
READ_BUF(putfh->pf_fhlen);
@@ -1019,8 +1017,8 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
if (status)
return status;
READ_BUF(12);
- READ64(read->rd_offset);
- READ32(read->rd_length);
+ p = xdr_decode_hyper(p, &read->rd_offset);
+ read->rd_length = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -1031,10 +1029,10 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
DECODE_HEAD;
READ_BUF(24);
- READ64(readdir->rd_cookie);
+ p = xdr_decode_hyper(p, &readdir->rd_cookie);
COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
- READ32(readdir->rd_dircount); /* just in case you needed a useless field... */
- READ32(readdir->rd_maxcount);
+ readdir->rd_dircount = be32_to_cpup(p++);
+ readdir->rd_maxcount = be32_to_cpup(p++);
if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
goto out;
@@ -1047,7 +1045,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove
DECODE_HEAD;
READ_BUF(4);
- READ32(remove->rm_namelen);
+ remove->rm_namelen = be32_to_cpup(p++);
READ_BUF(remove->rm_namelen);
SAVEMEM(remove->rm_name, remove->rm_namelen);
if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
@@ -1062,10 +1060,10 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
DECODE_HEAD;
READ_BUF(4);
- READ32(rename->rn_snamelen);
+ rename->rn_snamelen = be32_to_cpup(p++);
READ_BUF(rename->rn_snamelen + 4);
SAVEMEM(rename->rn_sname, rename->rn_snamelen);
- READ32(rename->rn_tnamelen);
+ rename->rn_tnamelen = be32_to_cpup(p++);
READ_BUF(rename->rn_tnamelen);
SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
@@ -1097,7 +1095,7 @@ nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
DECODE_HEAD;
READ_BUF(4);
- READ32(secinfo->si_namelen);
+ secinfo->si_namelen = be32_to_cpup(p++);
READ_BUF(secinfo->si_namelen);
SAVEMEM(secinfo->si_name, secinfo->si_namelen);
status = check_filename(secinfo->si_name, secinfo->si_namelen);
@@ -1113,7 +1111,7 @@ nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
DECODE_HEAD;
READ_BUF(4);
- READ32(sin->sin_style);
+ sin->sin_style = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -1144,16 +1142,16 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
if (status)
return nfserr_bad_xdr;
READ_BUF(8);
- READ32(setclientid->se_callback_prog);
- READ32(setclientid->se_callback_netid_len);
+ setclientid->se_callback_prog = be32_to_cpup(p++);
+ setclientid->se_callback_netid_len = be32_to_cpup(p++);
READ_BUF(setclientid->se_callback_netid_len + 4);
SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
- READ32(setclientid->se_callback_addr_len);
+ setclientid->se_callback_addr_len = be32_to_cpup(p++);
READ_BUF(setclientid->se_callback_addr_len + 4);
SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
- READ32(setclientid->se_callback_ident);
+ setclientid->se_callback_ident = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -1186,7 +1184,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
* nfsd4_proc_verify */
READ_BUF(4);
- READ32(verify->ve_attrlen);
+ verify->ve_attrlen = be32_to_cpup(p++);
READ_BUF(verify->ve_attrlen);
SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
@@ -1204,11 +1202,11 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
if (status)
return status;
READ_BUF(16);
- READ64(write->wr_offset);
- READ32(write->wr_stable_how);
+ p = xdr_decode_hyper(p, &write->wr_offset);
+ write->wr_stable_how = be32_to_cpup(p++);
if (write->wr_stable_how > 2)
goto xdr_error;
- READ32(write->wr_buflen);
+ write->wr_buflen = be32_to_cpup(p++);
/* Sorry .. no magic macros for this.. *
* READ_BUF(write->wr_buflen);
@@ -1254,7 +1252,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
READ_BUF(12);
COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
- READ32(rlockowner->rl_owner.len);
+ rlockowner->rl_owner.len = be32_to_cpup(p++);
READ_BUF(rlockowner->rl_owner.len);
READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
@@ -1278,63 +1276,63 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
return nfserr_bad_xdr;
READ_BUF(4);
- READ32(exid->flags);
+ exid->flags = be32_to_cpup(p++);
/* Ignore state_protect4_a */
READ_BUF(4);
- READ32(exid->spa_how);
+ exid->spa_how = be32_to_cpup(p++);
switch (exid->spa_how) {
case SP4_NONE:
break;
case SP4_MACH_CRED:
/* spo_must_enforce */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
p += dummy;
/* spo_must_allow */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
p += dummy;
break;
case SP4_SSV:
/* ssp_ops */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
p += dummy;
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy * 4);
p += dummy;
/* ssp_hash_algs<> */
READ_BUF(4);
- READ32(tmp);
+ tmp = be32_to_cpup(p++);
while (tmp--) {
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
}
/* ssp_encr_algs<> */
READ_BUF(4);
- READ32(tmp);
+ tmp = be32_to_cpup(p++);
while (tmp--) {
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
}
/* ssp_window and ssp_num_gss_handles */
READ_BUF(8);
- READ32(dummy);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
+ dummy = be32_to_cpup(p++);
break;
default:
goto xdr_error;
@@ -1342,7 +1340,7 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
/* Ignore Implementation ID */
READ_BUF(4); /* nfs_impl_id4 array length */
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
if (dummy > 1)
goto xdr_error;
@@ -1350,13 +1348,13 @@ nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
if (dummy == 1) {
/* nii_domain */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
/* nii_name */
READ_BUF(4);
- READ32(dummy);
+ dummy = be32_to_cpup(p++);
READ_BUF(dummy);
p += XDR_QUADLEN(dummy);
@@ -1376,21 +1374,21 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
READ_BUF(16);
COPYMEM(&sess->clientid, 8);
- READ32(sess->seqid);
- READ32(sess->flags);
+ sess->seqid = be32_to_cpup(p++);
+ sess->flags = be32_to_cpup(p++);
/* Fore channel attrs */
READ_BUF(28);
- READ32(dummy); /* headerpadsz is always 0 */
- READ32(sess->fore_channel.maxreq_sz);
- READ32(sess->fore_channel.maxresp_sz);
- READ32(sess->fore_channel.maxresp_cached);
- READ32(sess->fore_channel.maxops);
- READ32(sess->fore_channel.maxreqs);
- READ32(sess->fore_channel.nr_rdma_attrs);
+ dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
+ sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
+ sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
+ sess->fore_channel.maxops = be32_to_cpup(p++);
+ sess->fore_channel.maxreqs = be32_to_cpup(p++);
+ sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);
if (sess->fore_channel.nr_rdma_attrs == 1) {
READ_BUF(4);
- READ32(sess->fore_channel.rdma_attrs);
+ sess->fore_channel.rdma_attrs = be32_to_cpup(p++);
} else if (sess->fore_channel.nr_rdma_attrs > 1) {
dprintk("Too many fore channel attr bitmaps!\n");
goto xdr_error;
@@ -1398,23 +1396,23 @@ nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
/* Back channel attrs */
READ_BUF(28);
- READ32(dummy); /* headerpadsz is always 0 */
- READ32(sess->back_channel.maxreq_sz);
- READ32(sess->back_channel.maxresp_sz);
- READ32(sess->back_channel.maxresp_cached);
- READ32(sess->back_channel.maxops);
- READ32(sess->back_channel.maxreqs);
- READ32(sess->back_channel.nr_rdma_attrs);
+ dummy = be32_to_cpup(p++); /* headerpadsz is always 0 */
+ sess->back_channel.maxreq_sz = be32_to_cpup(p++);
+ sess->back_channel.maxresp_sz = be32_to_cpup(p++);
+ sess->back_channel.maxresp_cached = be32_to_cpup(p++);
+ sess->back_channel.maxops = be32_to_cpup(p++);
+ sess->back_channel.maxreqs = be32_to_cpup(p++);
+ sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);
if (sess->back_channel.nr_rdma_attrs == 1) {
READ_BUF(4);
- READ32(sess->back_channel.rdma_attrs);
+ sess->back_channel.rdma_attrs = be32_to_cpup(p++);
} else if (sess->back_channel.nr_rdma_attrs > 1) {
dprintk("Too many back channel attr bitmaps!\n");
goto xdr_error;
}
READ_BUF(4);
- READ32(sess->callback_prog);
+ sess->callback_prog = be32_to_cpup(p++);
nfsd4_decode_cb_sec(argp, &sess->cb_sec);
DECODE_TAIL;
}
@@ -1437,7 +1435,7 @@ nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
DECODE_HEAD;
READ_BUF(sizeof(stateid_t));
- READ32(free_stateid->fr_stateid.si_generation);
+ free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);
COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
DECODE_TAIL;
@@ -1451,10 +1449,10 @@ nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- READ32(seq->seqid);
- READ32(seq->slotid);
- READ32(seq->maxslots);
- READ32(seq->cachethis);
+ seq->seqid = be32_to_cpup(p++);
+ seq->slotid = be32_to_cpup(p++);
+ seq->maxslots = be32_to_cpup(p++);
+ seq->cachethis = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -1511,7 +1509,7 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
DECODE_HEAD;
READ_BUF(4);
- READ32(rc->rca_one_fs);
+ rc->rca_one_fs = be32_to_cpup(p++);
DECODE_TAIL;
}
@@ -1605,47 +1603,25 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
return true;
}
-/*
- * Return a rough estimate of the maximum possible reply size. Note the
- * estimate includes rpc headers so is meant to be passed to
- * svc_reserve, not svc_reserve_auth.
- *
- * Also note the current compound encoding permits only one operation to
- * use pages beyond the first one, so the maximum possible length is the
- * maximum over these values, not the sum.
- */
-static int nfsd4_max_reply(u32 opnum)
-{
- switch (opnum) {
- case OP_READLINK:
- case OP_READDIR:
- /*
- * Both of these ops take a single page for data and put
- * the head and tail in another page:
- */
- return 2 * PAGE_SIZE;
- case OP_READ:
- return INT_MAX;
- default:
- return PAGE_SIZE;
- }
-}
-
static __be32
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
DECODE_HEAD;
struct nfsd4_op *op;
bool cachethis = false;
- int max_reply = PAGE_SIZE;
+ int auth_slack= argp->rqstp->rq_auth_slack;
+ int max_reply = auth_slack + 8; /* opcnt, status */
+ int readcount = 0;
+ int readbytes = 0;
int i;
READ_BUF(4);
- READ32(argp->taglen);
+ argp->taglen = be32_to_cpup(p++);
READ_BUF(argp->taglen + 8);
SAVEMEM(argp->tag, argp->taglen);
- READ32(argp->minorversion);
- READ32(argp->opcnt);
+ argp->minorversion = be32_to_cpup(p++);
+ argp->opcnt = be32_to_cpup(p++);
+ max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
if (argp->taglen > NFSD4_MAX_TAGLEN)
goto xdr_error;
@@ -1669,7 +1645,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
op->replay = NULL;
READ_BUF(4);
- READ32(op->opnum);
+ op->opnum = be32_to_cpup(p++);
if (nfsd4_opnum_in_range(argp, op))
op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
@@ -1677,97 +1653,82 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
}
-
- if (op->status) {
- argp->opcnt = i+1;
- break;
- }
/*
* We'll try to cache the result in the DRC if any one
* op in the compound wants to be cached:
*/
cachethis |= nfsd4_cache_this_op(op);
- max_reply = max(max_reply, nfsd4_max_reply(op->opnum));
+ if (op->opnum == OP_READ) {
+ readcount++;
+ readbytes += nfsd4_max_reply(argp->rqstp, op);
+ } else
+ max_reply += nfsd4_max_reply(argp->rqstp, op);
+
+ if (op->status) {
+ argp->opcnt = i+1;
+ break;
+ }
}
/* Sessions make the DRC unnecessary: */
if (argp->minorversion)
cachethis = false;
- if (max_reply != INT_MAX)
- svc_reserve(argp->rqstp, max_reply);
+ svc_reserve(argp->rqstp, max_reply + readbytes);
argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
- DECODE_TAIL;
-}
-
-#define WRITE32(n) *p++ = htonl(n)
-#define WRITE64(n) do { \
- *p++ = htonl((u32)((n) >> 32)); \
- *p++ = htonl((u32)(n)); \
-} while (0)
-#define WRITEMEM(ptr,nbytes) do { if (nbytes > 0) { \
- *(p + XDR_QUADLEN(nbytes) -1) = 0; \
- memcpy(p, ptr, nbytes); \
- p += XDR_QUADLEN(nbytes); \
-}} while (0)
-
-static void write32(__be32 **p, u32 n)
-{
- *(*p)++ = htonl(n);
-}
+ if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
+ argp->rqstp->rq_splice_ok = false;
-static void write64(__be32 **p, u64 n)
-{
- write32(p, (n >> 32));
- write32(p, (u32)n);
+ DECODE_TAIL;
}
-static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode)
{
if (IS_I_VERSION(inode)) {
- write64(p, inode->i_version);
+ p = xdr_encode_hyper(p, inode->i_version);
} else {
- write32(p, stat->ctime.tv_sec);
- write32(p, stat->ctime.tv_nsec);
+ *p++ = cpu_to_be32(stat->ctime.tv_sec);
+ *p++ = cpu_to_be32(stat->ctime.tv_nsec);
}
+ return p;
}
-static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
{
- write32(p, c->atomic);
+ *p++ = cpu_to_be32(c->atomic);
if (c->change_supported) {
- write64(p, c->before_change);
- write64(p, c->after_change);
+ p = xdr_encode_hyper(p, c->before_change);
+ p = xdr_encode_hyper(p, c->after_change);
} else {
- write32(p, c->before_ctime_sec);
- write32(p, c->before_ctime_nsec);
- write32(p, c->after_ctime_sec);
- write32(p, c->after_ctime_nsec);
+ *p++ = cpu_to_be32(c->before_ctime_sec);
+ *p++ = cpu_to_be32(c->before_ctime_nsec);
+ *p++ = cpu_to_be32(c->after_ctime_sec);
+ *p++ = cpu_to_be32(c->after_ctime_nsec);
}
+ return p;
}
-#define RESERVE_SPACE(nbytes) do { \
- p = resp->p; \
- BUG_ON(p + XDR_QUADLEN(nbytes) > resp->end); \
-} while (0)
-#define ADJUST_ARGS() resp->p = p
-
/* Encode as an array of strings the string given with components
* separated @sep, escaped with esc_enter and esc_exit.
*/
-static __be32 nfsd4_encode_components_esc(char sep, char *components,
- __be32 **pp, int *buflen,
- char esc_enter, char esc_exit)
+static __be32 nfsd4_encode_components_esc(struct xdr_stream *xdr, char sep,
+ char *components, char esc_enter,
+ char esc_exit)
{
- __be32 *p = *pp;
- __be32 *countp = p;
+ __be32 *p;
+ __be32 pathlen;
+ int pathlen_offset;
int strlen, count=0;
char *str, *end, *next;
dprintk("nfsd4_encode_components(%s)\n", components);
- if ((*buflen -= 4) < 0)
+
+ pathlen_offset = xdr->buf->len;
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
return nfserr_resource;
- WRITE32(0); /* We will fill this in with @count later */
+ p++; /* We will fill this in with @count later */
+
end = str = components;
while (*end) {
bool found_esc = false;
@@ -1789,59 +1750,57 @@ static __be32 nfsd4_encode_components_esc(char sep, char *components,
strlen = end - str;
if (strlen) {
- if ((*buflen -= ((XDR_QUADLEN(strlen) << 2) + 4)) < 0)
+ p = xdr_reserve_space(xdr, strlen + 4);
+ if (!p)
return nfserr_resource;
- WRITE32(strlen);
- WRITEMEM(str, strlen);
+ p = xdr_encode_opaque(p, str, strlen);
count++;
}
else
end++;
str = end;
}
- *pp = p;
- p = countp;
- WRITE32(count);
+ pathlen = htonl(xdr->buf->len - pathlen_offset);
+ write_bytes_to_xdr_buf(xdr->buf, pathlen_offset, &pathlen, 4);
return 0;
}
/* Encode as an array of strings the string given with components
* separated @sep.
*/
-static __be32 nfsd4_encode_components(char sep, char *components,
- __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_components(struct xdr_stream *xdr, char sep,
+ char *components)
{
- return nfsd4_encode_components_esc(sep, components, pp, buflen, 0, 0);
+ return nfsd4_encode_components_esc(xdr, sep, components, 0, 0);
}
/*
* encode a location element of a fs_locations structure
*/
-static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
- __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fs_location4(struct xdr_stream *xdr,
+ struct nfsd4_fs_location *location)
{
__be32 status;
- __be32 *p = *pp;
- status = nfsd4_encode_components_esc(':', location->hosts, &p, buflen,
+ status = nfsd4_encode_components_esc(xdr, ':', location->hosts,
'[', ']');
if (status)
return status;
- status = nfsd4_encode_components('/', location->path, &p, buflen);
+ status = nfsd4_encode_components(xdr, '/', location->path);
if (status)
return status;
- *pp = p;
return 0;
}
/*
* Encode a path in RFC3530 'pathname4' format
*/
-static __be32 nfsd4_encode_path(const struct path *root,
- const struct path *path, __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_path(struct xdr_stream *xdr,
+ const struct path *root,
+ const struct path *path)
{
struct path cur = *path;
- __be32 *p = *pp;
+ __be32 *p;
struct dentry **components = NULL;
unsigned int ncomponents = 0;
__be32 err = nfserr_jukebox;
@@ -1872,11 +1831,11 @@ static __be32 nfsd4_encode_path(const struct path *root,
components[ncomponents++] = cur.dentry;
cur.dentry = dget_parent(cur.dentry);
}
-
- *buflen -= 4;
- if (*buflen < 0)
+ err = nfserr_resource;
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_free;
- WRITE32(ncomponents);
+ *p++ = cpu_to_be32(ncomponents);
while (ncomponents) {
struct dentry *dentry = components[ncomponents - 1];
@@ -1884,20 +1843,18 @@ static __be32 nfsd4_encode_path(const struct path *root,
spin_lock(&dentry->d_lock);
len = dentry->d_name.len;
- *buflen -= 4 + (XDR_QUADLEN(len) << 2);
- if (*buflen < 0) {
+ p = xdr_reserve_space(xdr, len + 4);
+ if (!p) {
spin_unlock(&dentry->d_lock);
goto out_free;
}
- WRITE32(len);
- WRITEMEM(dentry->d_name.name, len);
+ p = xdr_encode_opaque(p, dentry->d_name.name, len);
dprintk("/%s", dentry->d_name.name);
spin_unlock(&dentry->d_lock);
dput(dentry);
ncomponents--;
}
- *pp = p;
err = 0;
out_free:
dprintk(")\n");
@@ -1908,8 +1865,8 @@ out_free:
return err;
}
-static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
- const struct path *path, __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fsloc_fsroot(struct xdr_stream *xdr,
+ struct svc_rqst *rqstp, const struct path *path)
{
struct svc_export *exp_ps;
__be32 res;
@@ -1917,7 +1874,7 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
exp_ps = rqst_find_fsidzero_export(rqstp);
if (IS_ERR(exp_ps))
return nfserrno(PTR_ERR(exp_ps));
- res = nfsd4_encode_path(&exp_ps->ex_path, path, pp, buflen);
+ res = nfsd4_encode_path(xdr, &exp_ps->ex_path, path);
exp_put(exp_ps);
return res;
}
@@ -1925,28 +1882,26 @@ static __be32 nfsd4_encode_fsloc_fsroot(struct svc_rqst *rqstp,
/*
* encode a fs_locations structure
*/
-static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
- struct svc_export *exp,
- __be32 **pp, int *buflen)
+static __be32 nfsd4_encode_fs_locations(struct xdr_stream *xdr,
+ struct svc_rqst *rqstp, struct svc_export *exp)
{
__be32 status;
int i;
- __be32 *p = *pp;
+ __be32 *p;
struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
- status = nfsd4_encode_fsloc_fsroot(rqstp, &exp->ex_path, &p, buflen);
+ status = nfsd4_encode_fsloc_fsroot(xdr, rqstp, &exp->ex_path);
if (status)
return status;
- if ((*buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
return nfserr_resource;
- WRITE32(fslocs->locations_count);
+ *p++ = cpu_to_be32(fslocs->locations_count);
for (i=0; i<fslocs->locations_count; i++) {
- status = nfsd4_encode_fs_location4(&fslocs->locations[i],
- &p, buflen);
+ status = nfsd4_encode_fs_location4(xdr, &fslocs->locations[i]);
if (status)
return status;
}
- *pp = p;
return 0;
}
@@ -1965,15 +1920,15 @@ static u32 nfs4_file_type(umode_t mode)
}
static inline __be32
-nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
- __be32 **p, int *buflen)
+nfsd4_encode_aclname(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ struct nfs4_ace *ace)
{
if (ace->whotype != NFS4_ACL_WHO_NAMED)
- return nfs4_acl_write_who(ace->whotype, p, buflen);
+ return nfs4_acl_write_who(xdr, ace->whotype);
else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
- return nfsd4_encode_group(rqstp, ace->who_gid, p, buflen);
+ return nfsd4_encode_group(xdr, rqstp, ace->who_gid);
else
- return nfsd4_encode_user(rqstp, ace->who_uid, p, buflen);
+ return nfsd4_encode_user(xdr, rqstp, ace->who_uid);
}
#define WORD0_ABSENT_FS_ATTRS (FATTR4_WORD0_FS_LOCATIONS | FATTR4_WORD0_FSID | \
@@ -1982,31 +1937,28 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, struct nfs4_ace *ace,
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
static inline __be32
-nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
+nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ void *context, int len)
{
- __be32 *p = *pp;
+ __be32 *p;
- if (*buflen < ((XDR_QUADLEN(len) << 2) + 4 + 4 + 4))
+ p = xdr_reserve_space(xdr, len + 4 + 4 + 4);
+ if (!p)
return nfserr_resource;
/*
* For now we use a 0 here to indicate the null translation; in
* the future we may place a call to translation code here.
*/
- if ((*buflen -= 8) < 0)
- return nfserr_resource;
-
- WRITE32(0); /* lfs */
- WRITE32(0); /* pi */
+ *p++ = cpu_to_be32(0); /* lfs */
+ *p++ = cpu_to_be32(0); /* pi */
p = xdr_encode_opaque(p, context, len);
- *buflen -= (XDR_QUADLEN(len) << 2) + 4;
-
- *pp = p;
return 0;
}
#else
static inline __be32
-nfsd4_encode_security_label(struct svc_rqst *rqstp, void *context, int len, __be32 **pp, int *buflen)
+nfsd4_encode_security_label(struct xdr_stream *xdr, struct svc_rqst *rqstp,
+ void *context, int len)
{ return 0; }
#endif
@@ -2045,12 +1997,11 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat)
/*
* Note: @fhp can be NULL; in this case, we might have to compose the filehandle
* ourselves.
- *
- * countp is the buffer size in _words_
*/
-__be32
-nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
- struct dentry *dentry, __be32 **buffer, int count, u32 *bmval,
+static __be32
+nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp,
+ struct svc_export *exp,
+ struct dentry *dentry, u32 *bmval,
struct svc_rqst *rqstp, int ignore_crossmnt)
{
u32 bmval0 = bmval[0];
@@ -2059,12 +2010,13 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
struct kstat stat;
struct svc_fh *tempfh = NULL;
struct kstatfs statfs;
- int buflen = count << 2;
- __be32 *attrlenp;
+ __be32 *p;
+ int starting_len = xdr->buf->len;
+ int attrlen_offset;
+ __be32 attrlen;
u32 dummy;
u64 dummy64;
u32 rdattr_err = 0;
- __be32 *p = *buffer;
__be32 status;
int err;
int aclsupport = 0;
@@ -2095,8 +2047,8 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
err = vfs_getattr(&path, &stat);
if (err)
goto out_nfserr;
- if ((bmval0 & (FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL |
- FATTR4_WORD0_MAXNAME)) ||
+ if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE |
+ FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_MAXNAME)) ||
(bmval1 & (FATTR4_WORD1_SPACE_AVAIL | FATTR4_WORD1_SPACE_FREE |
FATTR4_WORD1_SPACE_TOTAL))) {
err = vfs_statfs(&path, &statfs);
@@ -2145,25 +2097,33 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
#endif /* CONFIG_NFSD_V4_SECURITY_LABEL */
if (bmval2) {
- if ((buflen -= 16) < 0)
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
goto out_resource;
- WRITE32(3);
- WRITE32(bmval0);
- WRITE32(bmval1);
- WRITE32(bmval2);
+ *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(bmval0);
+ *p++ = cpu_to_be32(bmval1);
+ *p++ = cpu_to_be32(bmval2);
} else if (bmval1) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE32(2);
- WRITE32(bmval0);
- WRITE32(bmval1);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(bmval0);
+ *p++ = cpu_to_be32(bmval1);
} else {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE32(1);
- WRITE32(bmval0);
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(bmval0);
}
- attrlenp = p++; /* to be backfilled later */
+
+ attrlen_offset = xdr->buf->len;
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ goto out_resource;
+ p++; /* to be backfilled later */
if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
u32 word0 = nfsd_suppattrs0(minorversion);
@@ -2175,296 +2135,343 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
if (!contextsupport)
word2 &= ~FATTR4_WORD2_SECURITY_LABEL;
if (!word2) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE32(2);
- WRITE32(word0);
- WRITE32(word1);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(word0);
+ *p++ = cpu_to_be32(word1);
} else {
- if ((buflen -= 16) < 0)
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
goto out_resource;
- WRITE32(3);
- WRITE32(word0);
- WRITE32(word1);
- WRITE32(word2);
+ *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(word0);
+ *p++ = cpu_to_be32(word1);
+ *p++ = cpu_to_be32(word2);
}
}
if (bmval0 & FATTR4_WORD0_TYPE) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
dummy = nfs4_file_type(stat.mode);
if (dummy == NF4BAD) {
status = nfserr_serverfault;
goto out;
}
- WRITE32(dummy);
+ *p++ = cpu_to_be32(dummy);
}
if (bmval0 & FATTR4_WORD0_FH_EXPIRE_TYPE) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
if (exp->ex_flags & NFSEXP_NOSUBTREECHECK)
- WRITE32(NFS4_FH_PERSISTENT);
+ *p++ = cpu_to_be32(NFS4_FH_PERSISTENT);
else
- WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
+ *p++ = cpu_to_be32(NFS4_FH_PERSISTENT|
+ NFS4_FH_VOL_RENAME);
}
if (bmval0 & FATTR4_WORD0_CHANGE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- write_change(&p, &stat, dentry->d_inode);
+ p = encode_change(p, &stat, dentry->d_inode);
}
if (bmval0 & FATTR4_WORD0_SIZE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64(stat.size);
+ p = xdr_encode_hyper(p, stat.size);
}
if (bmval0 & FATTR4_WORD0_LINK_SUPPORT) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_SYMLINK_SUPPORT) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_NAMED_ATTR) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
}
if (bmval0 & FATTR4_WORD0_FSID) {
- if ((buflen -= 16) < 0)
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
goto out_resource;
if (exp->ex_fslocs.migrated) {
- WRITE64(NFS4_REFERRAL_FSID_MAJOR);
- WRITE64(NFS4_REFERRAL_FSID_MINOR);
+ p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MAJOR);
+ p = xdr_encode_hyper(p, NFS4_REFERRAL_FSID_MINOR);
} else switch(fsid_source(fhp)) {
case FSIDSOURCE_FSID:
- WRITE64((u64)exp->ex_fsid);
- WRITE64((u64)0);
+ p = xdr_encode_hyper(p, (u64)exp->ex_fsid);
+ p = xdr_encode_hyper(p, (u64)0);
break;
case FSIDSOURCE_DEV:
- WRITE32(0);
- WRITE32(MAJOR(stat.dev));
- WRITE32(0);
- WRITE32(MINOR(stat.dev));
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(MAJOR(stat.dev));
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(MINOR(stat.dev));
break;
case FSIDSOURCE_UUID:
- WRITEMEM(exp->ex_uuid, 16);
+ p = xdr_encode_opaque_fixed(p, exp->ex_uuid,
+ EX_UUID_LEN);
break;
}
}
if (bmval0 & FATTR4_WORD0_UNIQUE_HANDLES) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
}
if (bmval0 & FATTR4_WORD0_LEASE_TIME) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(nn->nfsd4_lease);
+ *p++ = cpu_to_be32(nn->nfsd4_lease);
}
if (bmval0 & FATTR4_WORD0_RDATTR_ERROR) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(rdattr_err);
+ *p++ = cpu_to_be32(rdattr_err);
}
if (bmval0 & FATTR4_WORD0_ACL) {
struct nfs4_ace *ace;
if (acl == NULL) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
goto out_acl;
}
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(acl->naces);
+ *p++ = cpu_to_be32(acl->naces);
for (ace = acl->aces; ace < acl->aces + acl->naces; ace++) {
- if ((buflen -= 4*3) < 0)
+ p = xdr_reserve_space(xdr, 4*3);
+ if (!p)
goto out_resource;
- WRITE32(ace->type);
- WRITE32(ace->flag);
- WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
- status = nfsd4_encode_aclname(rqstp, ace, &p, &buflen);
+ *p++ = cpu_to_be32(ace->type);
+ *p++ = cpu_to_be32(ace->flag);
+ *p++ = cpu_to_be32(ace->access_mask &
+ NFS4_ACE_MASK_ALL);
+ status = nfsd4_encode_aclname(xdr, rqstp, ace);
if (status)
goto out;
}
}
out_acl:
if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(aclsupport ?
+ *p++ = cpu_to_be32(aclsupport ?
ACL4_SUPPORT_ALLOW_ACL|ACL4_SUPPORT_DENY_ACL : 0);
}
if (bmval0 & FATTR4_WORD0_CANSETTIME) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_CASE_INSENSITIVE) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
}
if (bmval0 & FATTR4_WORD0_CASE_PRESERVING) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_CHOWN_RESTRICTED) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_FILEHANDLE) {
- buflen -= (XDR_QUADLEN(fhp->fh_handle.fh_size) << 2) + 4;
- if (buflen < 0)
+ p = xdr_reserve_space(xdr, fhp->fh_handle.fh_size + 4);
+ if (!p)
goto out_resource;
- WRITE32(fhp->fh_handle.fh_size);
- WRITEMEM(&fhp->fh_handle.fh_base, fhp->fh_handle.fh_size);
+ p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base,
+ fhp->fh_handle.fh_size);
}
if (bmval0 & FATTR4_WORD0_FILEID) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64(stat.ino);
+ p = xdr_encode_hyper(p, stat.ino);
}
if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64((u64) statfs.f_ffree);
+ p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
}
if (bmval0 & FATTR4_WORD0_FILES_FREE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64((u64) statfs.f_ffree);
+ p = xdr_encode_hyper(p, (u64) statfs.f_ffree);
}
if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64((u64) statfs.f_files);
+ p = xdr_encode_hyper(p, (u64) statfs.f_files);
}
if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
- status = nfsd4_encode_fs_locations(rqstp, exp, &p, &buflen);
+ status = nfsd4_encode_fs_locations(xdr, rqstp, exp);
if (status)
goto out;
}
if (bmval0 & FATTR4_WORD0_HOMOGENEOUS) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval0 & FATTR4_WORD0_MAXFILESIZE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64(exp->ex_path.mnt->mnt_sb->s_maxbytes);
+ p = xdr_encode_hyper(p, exp->ex_path.mnt->mnt_sb->s_maxbytes);
}
if (bmval0 & FATTR4_WORD0_MAXLINK) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(255);
+ *p++ = cpu_to_be32(255);
}
if (bmval0 & FATTR4_WORD0_MAXNAME) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(statfs.f_namelen);
+ *p++ = cpu_to_be32(statfs.f_namelen);
}
if (bmval0 & FATTR4_WORD0_MAXREAD) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64((u64) svc_max_payload(rqstp));
+ p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
}
if (bmval0 & FATTR4_WORD0_MAXWRITE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE64((u64) svc_max_payload(rqstp));
+ p = xdr_encode_hyper(p, (u64) svc_max_payload(rqstp));
}
if (bmval1 & FATTR4_WORD1_MODE) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(stat.mode & S_IALLUGO);
+ *p++ = cpu_to_be32(stat.mode & S_IALLUGO);
}
if (bmval1 & FATTR4_WORD1_NO_TRUNC) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(1);
+ *p++ = cpu_to_be32(1);
}
if (bmval1 & FATTR4_WORD1_NUMLINKS) {
- if ((buflen -= 4) < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto out_resource;
- WRITE32(stat.nlink);
+ *p++ = cpu_to_be32(stat.nlink);
}
if (bmval1 & FATTR4_WORD1_OWNER) {
- status = nfsd4_encode_user(rqstp, stat.uid, &p, &buflen);
+ status = nfsd4_encode_user(xdr, rqstp, stat.uid);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
- status = nfsd4_encode_group(rqstp, stat.gid, &p, &buflen);
+ status = nfsd4_encode_group(xdr, rqstp, stat.gid);
if (status)
goto out;
}
if (bmval1 & FATTR4_WORD1_RAWDEV) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
- WRITE32((u32) MAJOR(stat.rdev));
- WRITE32((u32) MINOR(stat.rdev));
+ *p++ = cpu_to_be32((u32) MAJOR(stat.rdev));
+ *p++ = cpu_to_be32((u32) MINOR(stat.rdev));
}
if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
dummy64 = (u64)statfs.f_bavail * (u64)statfs.f_bsize;
- WRITE64(dummy64);
+ p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
dummy64 = (u64)statfs.f_bfree * (u64)statfs.f_bsize;
- WRITE64(dummy64);
+ p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
dummy64 = (u64)statfs.f_blocks * (u64)statfs.f_bsize;
- WRITE64(dummy64);
+ p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_SPACE_USED) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
dummy64 = (u64)stat.blocks << 9;
- WRITE64(dummy64);
+ p = xdr_encode_hyper(p, dummy64);
}
if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE64((s64)stat.atime.tv_sec);
- WRITE32(stat.atime.tv_nsec);
+ p = xdr_encode_hyper(p, (s64)stat.atime.tv_sec);
+ *p++ = cpu_to_be32(stat.atime.tv_nsec);
}
if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE32(0);
- WRITE32(1);
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(1);
+ *p++ = cpu_to_be32(0);
}
if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE64((s64)stat.ctime.tv_sec);
- WRITE32(stat.ctime.tv_nsec);
+ p = xdr_encode_hyper(p, (s64)stat.ctime.tv_sec);
+ *p++ = cpu_to_be32(stat.ctime.tv_nsec);
}
if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
- if ((buflen -= 12) < 0)
+ p = xdr_reserve_space(xdr, 12);
+ if (!p)
goto out_resource;
- WRITE64((s64)stat.mtime.tv_sec);
- WRITE32(stat.mtime.tv_nsec);
+ p = xdr_encode_hyper(p, (s64)stat.mtime.tv_sec);
+ *p++ = cpu_to_be32(stat.mtime.tv_nsec);
}
if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
- if ((buflen -= 8) < 0)
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
goto out_resource;
/*
* Get parent's attributes if not ignoring crossmount
@@ -2473,25 +2480,26 @@ out_acl:
if (ignore_crossmnt == 0 &&
dentry == exp->ex_path.mnt->mnt_root)
get_parent_attributes(exp, &stat);
- WRITE64(stat.ino);
+ p = xdr_encode_hyper(p, stat.ino);
}
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
- status = nfsd4_encode_security_label(rqstp, context,
- contextlen, &p, &buflen);
+ status = nfsd4_encode_security_label(xdr, rqstp, context,
+ contextlen);
if (status)
goto out;
}
if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
- if ((buflen -= 16) < 0)
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
goto out_resource;
- WRITE32(3);
- WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
- WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
- WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
+ *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+ *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
+ *p++ = cpu_to_be32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
}
- *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
- *buffer = p;
+ attrlen = htonl(xdr->buf->len - attrlen_offset - 4);
+ write_bytes_to_xdr_buf(xdr->buf, attrlen_offset, &attrlen, 4);
status = nfs_ok;
out:
@@ -2504,6 +2512,8 @@ out:
fh_put(tempfh);
kfree(tempfh);
}
+ if (status)
+ xdr_truncate_encode(xdr, starting_len);
return status;
out_nfserr:
status = nfserrno(err);
@@ -2513,6 +2523,37 @@ out_resource:
goto out;
}
+static void svcxdr_init_encode_from_buffer(struct xdr_stream *xdr,
+ struct xdr_buf *buf, __be32 *p, int bytes)
+{
+ xdr->scratch.iov_len = 0;
+ memset(buf, 0, sizeof(struct xdr_buf));
+ buf->head[0].iov_base = p;
+ buf->head[0].iov_len = 0;
+ buf->len = 0;
+ xdr->buf = buf;
+ xdr->iov = buf->head;
+ xdr->p = p;
+ xdr->end = (void *)p + bytes;
+ buf->buflen = bytes;
+}
+
+__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
+ struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, u32 *bmval,
+ struct svc_rqst *rqstp, int ignore_crossmnt)
+{
+ struct xdr_buf dummy;
+ struct xdr_stream xdr;
+ __be32 ret;
+
+ svcxdr_init_encode_from_buffer(&xdr, &dummy, *p, words << 2);
+ ret = nfsd4_encode_fattr(&xdr, fhp, exp, dentry, bmval, rqstp,
+ ignore_crossmnt);
+ *p = xdr.p;
+ return ret;
+}
+
static inline int attributes_need_mount(u32 *bmval)
{
if (bmval[0] & ~(FATTR4_WORD0_RDATTR_ERROR | FATTR4_WORD0_LEASE_TIME))
@@ -2523,8 +2564,8 @@ static inline int attributes_need_mount(u32 *bmval)
}
static __be32
-nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
- const char *name, int namlen, __be32 **p, int buflen)
+nfsd4_encode_dirent_fattr(struct xdr_stream *xdr, struct nfsd4_readdir *cd,
+ const char *name, int namlen)
{
struct svc_export *exp = cd->rd_fhp->fh_export;
struct dentry *dentry;
@@ -2576,7 +2617,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
}
out_encode:
- nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
+ nfserr = nfsd4_encode_fattr(xdr, NULL, exp, dentry, cd->rd_bmval,
cd->rd_rqstp, ignore_crossmnt);
out_put:
dput(dentry);
@@ -2585,9 +2626,12 @@ out_put:
}
static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
+nfsd4_encode_rdattr_error(struct xdr_stream *xdr, __be32 nfserr)
{
- if (buflen < 6)
+ __be32 *p;
+
+ p = xdr_reserve_space(xdr, 6);
+ if (!p)
return NULL;
*p++ = htonl(2);
*p++ = htonl(FATTR4_WORD0_RDATTR_ERROR); /* bmval0 */
@@ -2604,10 +2648,13 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
{
struct readdir_cd *ccd = ccdv;
struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
- int buflen;
- __be32 *p = cd->buffer;
- __be32 *cookiep;
+ struct xdr_stream *xdr = cd->xdr;
+ int start_offset = xdr->buf->len;
+ int cookie_offset;
+ int entry_bytes;
__be32 nfserr = nfserr_toosmall;
+ __be64 wire_offset;
+ __be32 *p;
/* In nfsv4, "." and ".." never make it onto the wire.. */
if (name && isdotent(name, namlen)) {
@@ -2615,19 +2662,24 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
return 0;
}
- if (cd->offset)
- xdr_encode_hyper(cd->offset, (u64) offset);
+ if (cd->cookie_offset) {
+ wire_offset = cpu_to_be64(offset);
+ write_bytes_to_xdr_buf(xdr->buf, cd->cookie_offset,
+ &wire_offset, 8);
+ }
- buflen = cd->buflen - 4 - XDR_QUADLEN(namlen);
- if (buflen < 0)
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
goto fail;
-
*p++ = xdr_one; /* mark entry present */
- cookiep = p;
+ cookie_offset = xdr->buf->len;
+ p = xdr_reserve_space(xdr, 3*4 + namlen);
+ if (!p)
+ goto fail;
p = xdr_encode_hyper(p, NFS_OFFSET_MAX); /* offset of next entry */
p = xdr_encode_array(p, name, namlen); /* name length & name */
- nfserr = nfsd4_encode_dirent_fattr(cd, name, namlen, &p, buflen);
+ nfserr = nfsd4_encode_dirent_fattr(xdr, cd, name, namlen);
switch (nfserr) {
case nfs_ok:
break;
@@ -2646,59 +2698,74 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
*/
if (!(cd->rd_bmval[0] & FATTR4_WORD0_RDATTR_ERROR))
goto fail;
- p = nfsd4_encode_rdattr_error(p, buflen, nfserr);
+ p = nfsd4_encode_rdattr_error(xdr, nfserr);
if (p == NULL) {
nfserr = nfserr_toosmall;
goto fail;
}
}
- cd->buflen -= (p - cd->buffer);
- cd->buffer = p;
- cd->offset = cookiep;
+ nfserr = nfserr_toosmall;
+ entry_bytes = xdr->buf->len - start_offset;
+ if (entry_bytes > cd->rd_maxcount)
+ goto fail;
+ cd->rd_maxcount -= entry_bytes;
+ if (!cd->rd_dircount)
+ goto fail;
+ cd->rd_dircount--;
+ cd->cookie_offset = cookie_offset;
skip_entry:
cd->common.err = nfs_ok;
return 0;
fail:
+ xdr_truncate_encode(xdr, start_offset);
cd->common.err = nfserr;
return -EINVAL;
}
-static void
-nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
+static __be32
+nfsd4_encode_stateid(struct xdr_stream *xdr, stateid_t *sid)
{
__be32 *p;
- RESERVE_SPACE(sizeof(stateid_t));
- WRITE32(sid->si_generation);
- WRITEMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, sizeof(stateid_t));
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(sid->si_generation);
+ p = xdr_encode_opaque_fixed(p, &sid->si_opaque,
+ sizeof(stateid_opaque_t));
+ return 0;
}
static __be32
nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(8);
- WRITE32(access->ac_supported);
- WRITE32(access->ac_resp_access);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(access->ac_supported);
+ *p++ = cpu_to_be32(access->ac_resp_access);
}
return nfserr;
}
static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 8);
- WRITEMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(bcts->dir);
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(bcts->dir);
/* Sorry, we do not yet support RDMA over 4.1: */
- WRITE32(0);
- ADJUST_ARGS();
+ *p++ = cpu_to_be32(0);
}
return nfserr;
}
@@ -2706,8 +2773,10 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
static __be32
nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (!nfserr)
- nfsd4_encode_stateid(resp, &close->cl_stateid);
+ nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid);
return nfserr;
}
@@ -2716,12 +2785,15 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
static __be32
nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(NFS4_VERIFIER_SIZE);
- WRITEMEM(commit->co_verf.data, NFS4_VERIFIER_SIZE);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
+ NFS4_VERIFIER_SIZE);
}
return nfserr;
}
@@ -2729,15 +2801,17 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
static __be32
nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(32);
- write_cinfo(&p, &create->cr_cinfo);
- WRITE32(2);
- WRITE32(create->cr_bmval[0]);
- WRITE32(create->cr_bmval[1]);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 32);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &create->cr_cinfo);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(create->cr_bmval[0]);
+ *p++ = cpu_to_be32(create->cr_bmval[1]);
}
return nfserr;
}
@@ -2746,14 +2820,13 @@ static __be32
nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
{
struct svc_fh *fhp = getattr->ga_fhp;
- int buflen;
+ struct xdr_stream *xdr = &resp->xdr;
if (nfserr)
return nfserr;
- buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
- nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
- &resp->p, buflen, getattr->ga_bmval,
+ nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
+ getattr->ga_bmval,
resp->rqstp, 0);
return nfserr;
}
@@ -2761,16 +2834,17 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
static __be32
nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh **fhpp)
{
+ struct xdr_stream *xdr = &resp->xdr;
struct svc_fh *fhp = *fhpp;
unsigned int len;
__be32 *p;
if (!nfserr) {
len = fhp->fh_handle.fh_size;
- RESERVE_SPACE(len + 4);
- WRITE32(len);
- WRITEMEM(&fhp->fh_handle.fh_base, len);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, len + 4);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
}
return nfserr;
}
@@ -2779,52 +2853,69 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
* Including all fields other than the name, a LOCK4denied structure requires
* 8(clientid) + 4(namelen) + 8(offset) + 8(length) + 4(type) = 32 bytes.
*/
-static void
-nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
+static __be32
+nfsd4_encode_lock_denied(struct xdr_stream *xdr, struct nfsd4_lock_denied *ld)
{
struct xdr_netobj *conf = &ld->ld_owner;
__be32 *p;
- RESERVE_SPACE(32 + XDR_LEN(conf->len));
- WRITE64(ld->ld_start);
- WRITE64(ld->ld_length);
- WRITE32(ld->ld_type);
+again:
+ p = xdr_reserve_space(xdr, 32 + XDR_LEN(conf->len));
+ if (!p) {
+ /*
+ * Don't fail to return the result just because we can't
+ * return the conflicting open:
+ */
+ if (conf->len) {
+ conf->len = 0;
+ conf->data = NULL;
+ goto again;
+ }
+ return nfserr_resource;
+ }
+ p = xdr_encode_hyper(p, ld->ld_start);
+ p = xdr_encode_hyper(p, ld->ld_length);
+ *p++ = cpu_to_be32(ld->ld_type);
if (conf->len) {
- WRITEMEM(&ld->ld_clientid, 8);
- WRITE32(conf->len);
- WRITEMEM(conf->data, conf->len);
- kfree(conf->data);
+ p = xdr_encode_opaque_fixed(p, &ld->ld_clientid, 8);
+ p = xdr_encode_opaque(p, conf->data, conf->len);
} else { /* non - nfsv4 lock in conflict, no clientid nor owner */
- WRITE64((u64)0); /* clientid */
- WRITE32(0); /* length of owner name */
+ p = xdr_encode_hyper(p, (u64)0); /* clientid */
+ *p++ = cpu_to_be32(0); /* length of owner name */
}
- ADJUST_ARGS();
+ return nfserr_denied;
}
static __be32
nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (!nfserr)
- nfsd4_encode_stateid(resp, &lock->lk_resp_stateid);
+ nfserr = nfsd4_encode_stateid(xdr, &lock->lk_resp_stateid);
else if (nfserr == nfserr_denied)
- nfsd4_encode_lock_denied(resp, &lock->lk_denied);
-
+ nfserr = nfsd4_encode_lock_denied(xdr, &lock->lk_denied);
+ kfree(lock->lk_denied.ld_owner.data);
return nfserr;
}
static __be32
nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (nfserr == nfserr_denied)
- nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
+ nfsd4_encode_lock_denied(xdr, &lockt->lt_denied);
return nfserr;
}
static __be32
nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (!nfserr)
- nfsd4_encode_stateid(resp, &locku->lu_stateid);
+ nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid);
return nfserr;
}
@@ -2833,12 +2924,14 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
static __be32
nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(20);
- write_cinfo(&p, &link->li_cinfo);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &link->li_cinfo);
}
return nfserr;
}
@@ -2847,72 +2940,86 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
static __be32
nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (nfserr)
goto out;
- nfsd4_encode_stateid(resp, &open->op_stateid);
- RESERVE_SPACE(40);
- write_cinfo(&p, &open->op_cinfo);
- WRITE32(open->op_rflags);
- WRITE32(2);
- WRITE32(open->op_bmval[0]);
- WRITE32(open->op_bmval[1]);
- WRITE32(open->op_delegate_type);
- ADJUST_ARGS();
+ nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
+ if (nfserr)
+ goto out;
+ p = xdr_reserve_space(xdr, 40);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &open->op_cinfo);
+ *p++ = cpu_to_be32(open->op_rflags);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(open->op_bmval[0]);
+ *p++ = cpu_to_be32(open->op_bmval[1]);
+ *p++ = cpu_to_be32(open->op_delegate_type);
switch (open->op_delegate_type) {
case NFS4_OPEN_DELEGATE_NONE:
break;
case NFS4_OPEN_DELEGATE_READ:
- nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
- RESERVE_SPACE(20);
- WRITE32(open->op_recall);
+ nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
+ if (nfserr)
+ return nfserr;
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(open->op_recall);
/*
* TODO: ACE's in delegations
*/
- WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
- WRITE32(0);
- WRITE32(0);
- WRITE32(0); /* XXX: is NULL principal ok? */
- ADJUST_ARGS();
+ *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */
break;
case NFS4_OPEN_DELEGATE_WRITE:
- nfsd4_encode_stateid(resp, &open->op_delegate_stateid);
- RESERVE_SPACE(32);
- WRITE32(0);
+ nfserr = nfsd4_encode_stateid(xdr, &open->op_delegate_stateid);
+ if (nfserr)
+ return nfserr;
+ p = xdr_reserve_space(xdr, 32);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(0);
/*
* TODO: space_limit's in delegations
*/
- WRITE32(NFS4_LIMIT_SIZE);
- WRITE32(~(u32)0);
- WRITE32(~(u32)0);
+ *p++ = cpu_to_be32(NFS4_LIMIT_SIZE);
+ *p++ = cpu_to_be32(~(u32)0);
+ *p++ = cpu_to_be32(~(u32)0);
/*
* TODO: ACE's in delegations
*/
- WRITE32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
- WRITE32(0);
- WRITE32(0);
- WRITE32(0); /* XXX: is NULL principal ok? */
- ADJUST_ARGS();
+ *p++ = cpu_to_be32(NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0); /* XXX: is NULL principal ok? */
break;
case NFS4_OPEN_DELEGATE_NONE_EXT: /* 4.1 */
switch (open->op_why_no_deleg) {
case WND4_CONTENTION:
case WND4_RESOURCE:
- RESERVE_SPACE(8);
- WRITE32(open->op_why_no_deleg);
- WRITE32(0); /* deleg signaling not supported yet */
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(open->op_why_no_deleg);
+ /* deleg signaling not supported yet: */
+ *p++ = cpu_to_be32(0);
break;
default:
- RESERVE_SPACE(4);
- WRITE32(open->op_why_no_deleg);
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(open->op_why_no_deleg);
}
- ADJUST_ARGS();
break;
default:
BUG();
@@ -2925,8 +3032,10 @@ out:
static __be32
nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (!nfserr)
- nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
+ nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
return nfserr;
}
@@ -2934,127 +3043,233 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
static __be32
nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
{
+ struct xdr_stream *xdr = &resp->xdr;
+
if (!nfserr)
- nfsd4_encode_stateid(resp, &od->od_stateid);
+ nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid);
return nfserr;
}
-static __be32
-nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
- struct nfsd4_read *read)
+static __be32 nfsd4_encode_splice_read(
+ struct nfsd4_compoundres *resp,
+ struct nfsd4_read *read,
+ struct file *file, unsigned long maxcount)
{
+ struct xdr_stream *xdr = &resp->xdr;
+ struct xdr_buf *buf = xdr->buf;
u32 eof;
- int v;
- struct page *page;
- unsigned long maxcount;
- long len;
- __be32 *p;
+ int space_left;
+ __be32 nfserr;
+ __be32 *p = xdr->p - 2;
- if (nfserr)
- return nfserr;
- if (resp->xbuf->page_len)
+ /*
+ * Don't inline pages unless we know there's room for eof,
+ * count, and possible padding:
+ */
+ if (xdr->end - xdr->p < 3)
return nfserr_resource;
- RESERVE_SPACE(8); /* eof flag and byte count */
+ nfserr = nfsd_splice_read(read->rd_rqstp, file,
+ read->rd_offset, &maxcount);
+ if (nfserr) {
+ /*
+ * nfsd_splice_actor may have already messed with the
+ * page length; reset it so as not to confuse
+ * xdr_truncate_encode:
+ */
+ buf->page_len = 0;
+ return nfserr;
+ }
- maxcount = svc_max_payload(resp->rqstp);
- if (maxcount > read->rd_length)
- maxcount = read->rd_length;
+ eof = (read->rd_offset + maxcount >=
+ read->rd_fhp->fh_dentry->d_inode->i_size);
+
+ *(p++) = htonl(eof);
+ *(p++) = htonl(maxcount);
+
+ buf->page_len = maxcount;
+ buf->len += maxcount;
+ xdr->page_ptr += (maxcount + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ /* Use rest of head for padding and remaining ops: */
+ buf->tail[0].iov_base = xdr->p;
+ buf->tail[0].iov_len = 0;
+ xdr->iov = buf->tail;
+ if (maxcount&3) {
+ int pad = 4 - (maxcount&3);
+
+ *(xdr->p++) = 0;
+
+ buf->tail[0].iov_base += maxcount&3;
+ buf->tail[0].iov_len = pad;
+ buf->len += pad;
+ }
+
+ space_left = min_t(int, (void *)xdr->end - (void *)xdr->p,
+ buf->buflen - buf->len);
+ buf->buflen = buf->len + space_left;
+ xdr->end = (__be32 *)((void *)xdr->end + space_left);
+
+ return 0;
+}
+
+static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
+ struct nfsd4_read *read,
+ struct file *file, unsigned long maxcount)
+{
+ struct xdr_stream *xdr = &resp->xdr;
+ u32 eof;
+ int v;
+ int starting_len = xdr->buf->len - 8;
+ long len;
+ int thislen;
+ __be32 nfserr;
+ __be32 tmp;
+ __be32 *p;
+ u32 zzz = 0;
+ int pad;
len = maxcount;
v = 0;
- while (len > 0) {
- page = *(resp->rqstp->rq_next_page);
- if (!page) { /* ran out of pages */
- maxcount -= len;
- break;
- }
- resp->rqstp->rq_vec[v].iov_base = page_address(page);
- resp->rqstp->rq_vec[v].iov_len =
- len < PAGE_SIZE ? len : PAGE_SIZE;
- resp->rqstp->rq_next_page++;
+
+ thislen = (void *)xdr->end - (void *)xdr->p;
+ if (len < thislen)
+ thislen = len;
+ p = xdr_reserve_space(xdr, (thislen+3)&~3);
+ WARN_ON_ONCE(!p);
+ resp->rqstp->rq_vec[v].iov_base = p;
+ resp->rqstp->rq_vec[v].iov_len = thislen;
+ v++;
+ len -= thislen;
+
+ while (len) {
+ thislen = min_t(long, len, PAGE_SIZE);
+ p = xdr_reserve_space(xdr, (thislen+3)&~3);
+ WARN_ON_ONCE(!p);
+ resp->rqstp->rq_vec[v].iov_base = p;
+ resp->rqstp->rq_vec[v].iov_len = thislen;
v++;
- len -= PAGE_SIZE;
+ len -= thislen;
}
read->rd_vlen = v;
- nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp,
- read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
- &maxcount);
-
+ nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
+ read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
+ xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
+
eof = (read->rd_offset + maxcount >=
read->rd_fhp->fh_dentry->d_inode->i_size);
- WRITE32(eof);
- WRITE32(maxcount);
- ADJUST_ARGS();
- resp->xbuf->head[0].iov_len = (char*)p
- - (char*)resp->xbuf->head[0].iov_base;
- resp->xbuf->page_len = maxcount;
+ tmp = htonl(eof);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
+ tmp = htonl(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
- /* Use rest of head for padding and remaining ops: */
- resp->xbuf->tail[0].iov_base = p;
- resp->xbuf->tail[0].iov_len = 0;
- if (maxcount&3) {
- RESERVE_SPACE(4);
- WRITE32(0);
- resp->xbuf->tail[0].iov_base += maxcount&3;
- resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
- ADJUST_ARGS();
- }
+ pad = (maxcount&3) ? 4 - (maxcount&3) : 0;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 8 + maxcount,
+ &zzz, pad);
return 0;
+
}
static __be32
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
+nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
+ struct nfsd4_read *read)
{
- int maxcount;
- char *page;
+ unsigned long maxcount;
+ struct xdr_stream *xdr = &resp->xdr;
+ struct file *file = read->rd_filp;
+ int starting_len = xdr->buf->len;
+ struct raparms *ra;
__be32 *p;
+ __be32 err;
if (nfserr)
return nfserr;
- if (resp->xbuf->page_len)
+
+ p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
+ if (!p) {
+ WARN_ON_ONCE(resp->rqstp->rq_splice_ok);
return nfserr_resource;
- if (!*resp->rqstp->rq_next_page)
+ }
+ if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) {
+ WARN_ON_ONCE(1);
return nfserr_resource;
+ }
+ xdr_commit_encode(xdr);
+
+ maxcount = svc_max_payload(resp->rqstp);
+ if (maxcount > xdr->buf->buflen - xdr->buf->len)
+ maxcount = xdr->buf->buflen - xdr->buf->len;
+ if (maxcount > read->rd_length)
+ maxcount = read->rd_length;
+
+ if (!read->rd_filp) {
+ err = nfsd_get_tmp_read_open(resp->rqstp, read->rd_fhp,
+ &file, &ra);
+ if (err)
+ goto err_truncate;
+ }
+
+ if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
+ err = nfsd4_encode_splice_read(resp, read, file, maxcount);
+ else
+ err = nfsd4_encode_readv(resp, read, file, maxcount);
+
+ if (!read->rd_filp)
+ nfsd_put_tmp_read_open(file, ra);
+
+err_truncate:
+ if (err)
+ xdr_truncate_encode(xdr, starting_len);
+ return err;
+}
+
+static __be32
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
+{
+ int maxcount;
+ __be32 wire_count;
+ int zero = 0;
+ struct xdr_stream *xdr = &resp->xdr;
+ int length_offset = xdr->buf->len;
+ __be32 *p;
- page = page_address(*(resp->rqstp->rq_next_page++));
+ if (nfserr)
+ return nfserr;
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
maxcount = PAGE_SIZE;
- RESERVE_SPACE(4);
+ p = xdr_reserve_space(xdr, maxcount);
+ if (!p)
+ return nfserr_resource;
/*
* XXX: By default, the ->readlink() VFS op will truncate symlinks
* if they would overflow the buffer. Is this kosher in NFSv4? If
* not, one easy fix is: if ->readlink() precisely fills the buffer,
* assume that truncation occurred, and return NFS4ERR_RESOURCE.
*/
- nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
+ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp,
+ (char *)p, &maxcount);
if (nfserr == nfserr_isdir)
- return nfserr_inval;
- if (nfserr)
+ nfserr = nfserr_inval;
+ if (nfserr) {
+ xdr_truncate_encode(xdr, length_offset);
return nfserr;
-
- WRITE32(maxcount);
- ADJUST_ARGS();
- resp->xbuf->head[0].iov_len = (char*)p
- - (char*)resp->xbuf->head[0].iov_base;
- resp->xbuf->page_len = maxcount;
-
- /* Use rest of head for padding and remaining ops: */
- resp->xbuf->tail[0].iov_base = p;
- resp->xbuf->tail[0].iov_len = 0;
- if (maxcount&3) {
- RESERVE_SPACE(4);
- WRITE32(0);
- resp->xbuf->tail[0].iov_base += maxcount&3;
- resp->xbuf->tail[0].iov_len = 4 - (maxcount&3);
- ADJUST_ARGS();
}
+
+ wire_count = htonl(maxcount);
+ write_bytes_to_xdr_buf(xdr->buf, length_offset, &wire_count, 4);
+ xdr_truncate_encode(xdr, length_offset + 4 + maxcount);
+ if (maxcount & 3)
+ write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
+ &zero, 4 - (maxcount&3));
return 0;
}
@@ -3062,47 +3277,52 @@ static __be32
nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
{
int maxcount;
+ int bytes_left;
loff_t offset;
- __be32 *page, *savep, *tailbase;
+ __be64 wire_offset;
+ struct xdr_stream *xdr = &resp->xdr;
+ int starting_len = xdr->buf->len;
__be32 *p;
if (nfserr)
return nfserr;
- if (resp->xbuf->page_len)
- return nfserr_resource;
- if (!*resp->rqstp->rq_next_page)
- return nfserr_resource;
- RESERVE_SPACE(NFS4_VERIFIER_SIZE);
- savep = p;
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
/* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
- WRITE32(0);
- WRITE32(0);
- ADJUST_ARGS();
- resp->xbuf->head[0].iov_len = ((char*)resp->p) - (char*)resp->xbuf->head[0].iov_base;
- tailbase = p;
-
- maxcount = PAGE_SIZE;
- if (maxcount > readdir->rd_maxcount)
- maxcount = readdir->rd_maxcount;
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
+ resp->xdr.buf->head[0].iov_len = ((char *)resp->xdr.p)
+ - (char *)resp->xdr.buf->head[0].iov_base;
/*
- * Convert from bytes to words, account for the two words already
- * written, make sure to leave two words at the end for the next
- * pointer and eof field.
+ * Number of bytes left for directory entries allowing for the
+ * final 8 bytes of the readdir and a following failed op:
+ */
+ bytes_left = xdr->buf->buflen - xdr->buf->len
+ - COMPOUND_ERR_SLACK_SPACE - 8;
+ if (bytes_left < 0) {
+ nfserr = nfserr_resource;
+ goto err_no_verf;
+ }
+ maxcount = min_t(u32, readdir->rd_maxcount, INT_MAX);
+ /*
+ * Note the rfc defines rd_maxcount as the size of the
+ * READDIR4resok structure, which includes the verifier above
+ * and the 8 bytes encoded at the end of this function:
*/
- maxcount = (maxcount >> 2) - 4;
- if (maxcount < 0) {
- nfserr = nfserr_toosmall;
+ if (maxcount < 16) {
+ nfserr = nfserr_toosmall;
goto err_no_verf;
}
+ maxcount = min_t(int, maxcount-16, bytes_left);
- page = page_address(*(resp->rqstp->rq_next_page++));
+ readdir->xdr = xdr;
+ readdir->rd_maxcount = maxcount;
readdir->common.err = 0;
- readdir->buflen = maxcount;
- readdir->buffer = page;
- readdir->offset = NULL;
+ readdir->cookie_offset = 0;
offset = readdir->rd_cookie;
nfserr = nfsd_readdir(readdir->rd_rqstp, readdir->rd_fhp,
@@ -3110,42 +3330,49 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
&readdir->common, nfsd4_encode_dirent);
if (nfserr == nfs_ok &&
readdir->common.err == nfserr_toosmall &&
- readdir->buffer == page)
- nfserr = nfserr_toosmall;
+ xdr->buf->len == starting_len + 8) {
+ /* nothing encoded; which limit did we hit?: */
+ if (maxcount - 16 < bytes_left)
+ /* It was the fault of rd_maxcount: */
+ nfserr = nfserr_toosmall;
+ else
+ /* We ran out of buffer space: */
+ nfserr = nfserr_resource;
+ }
if (nfserr)
goto err_no_verf;
- if (readdir->offset)
- xdr_encode_hyper(readdir->offset, offset);
+ if (readdir->cookie_offset) {
+ wire_offset = cpu_to_be64(offset);
+ write_bytes_to_xdr_buf(xdr->buf, readdir->cookie_offset,
+ &wire_offset, 8);
+ }
- p = readdir->buffer;
+ p = xdr_reserve_space(xdr, 8);
+ if (!p) {
+ WARN_ON_ONCE(1);
+ goto err_no_verf;
+ }
*p++ = 0; /* no more entries */
*p++ = htonl(readdir->common.err == nfserr_eof);
- resp->xbuf->page_len = ((char*)p) -
- (char*)page_address(*(resp->rqstp->rq_next_page-1));
-
- /* Use rest of head for padding and remaining ops: */
- resp->xbuf->tail[0].iov_base = tailbase;
- resp->xbuf->tail[0].iov_len = 0;
- resp->p = resp->xbuf->tail[0].iov_base;
- resp->end = resp->p + (PAGE_SIZE - resp->xbuf->head[0].iov_len)/4;
return 0;
err_no_verf:
- p = savep;
- ADJUST_ARGS();
+ xdr_truncate_encode(xdr, starting_len);
return nfserr;
}
static __be32
nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(20);
- write_cinfo(&p, &remove->rm_cinfo);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &remove->rm_cinfo);
}
return nfserr;
}
@@ -3153,19 +3380,21 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
static __be32
nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(40);
- write_cinfo(&p, &rename->rn_sinfo);
- write_cinfo(&p, &rename->rn_tinfo);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 40);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &rename->rn_sinfo);
+ p = encode_cinfo(p, &rename->rn_tinfo);
}
return nfserr;
}
static __be32
-nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
+nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
__be32 nfserr, struct svc_export *exp)
{
u32 i, nflavs, supported;
@@ -3176,6 +3405,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
if (nfserr)
goto out;
+ nfserr = nfserr_resource;
if (exp->ex_nflavors) {
flavs = exp->ex_flavors;
nflavs = exp->ex_nflavors;
@@ -3197,9 +3427,10 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
}
supported = 0;
- RESERVE_SPACE(4);
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ goto out;
flavorsp = p++; /* to be backfilled later */
- ADJUST_ARGS();
for (i = 0; i < nflavs; i++) {
rpc_authflavor_t pf = flavs[i].pseudoflavor;
@@ -3207,18 +3438,20 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
if (rpcauth_get_gssinfo(pf, &info) == 0) {
supported++;
- RESERVE_SPACE(4 + 4 + XDR_LEN(info.oid.len) + 4 + 4);
- WRITE32(RPC_AUTH_GSS);
- WRITE32(info.oid.len);
- WRITEMEM(info.oid.data, info.oid.len);
- WRITE32(info.qop);
- WRITE32(info.service);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 4 + 4 +
+ XDR_LEN(info.oid.len) + 4 + 4);
+ if (!p)
+ goto out;
+ *p++ = cpu_to_be32(RPC_AUTH_GSS);
+ p = xdr_encode_opaque(p, info.oid.data, info.oid.len);
+ *p++ = cpu_to_be32(info.qop);
+ *p++ = cpu_to_be32(info.service);
} else if (pf < RPC_AUTH_MAXFLAVOR) {
supported++;
- RESERVE_SPACE(4);
- WRITE32(pf);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ goto out;
+ *p++ = cpu_to_be32(pf);
} else {
if (report)
pr_warn("NFS: SECINFO: security flavor %u "
@@ -3229,7 +3462,7 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
if (nflavs != supported)
report = false;
*flavorsp = htonl(supported);
-
+ nfserr = 0;
out:
if (exp)
exp_put(exp);
@@ -3240,14 +3473,18 @@ static __be32
nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo *secinfo)
{
- return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->si_exp);
+ struct xdr_stream *xdr = &resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp);
}
static __be32
nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_secinfo_no_name *secinfo)
{
- return nfsd4_do_encode_secinfo(resp, nfserr, secinfo->sin_exp);
+ struct xdr_stream *xdr = &resp->xdr;
+
+ return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp);
}
/*
@@ -3257,41 +3494,47 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
static __be32
nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- RESERVE_SPACE(16);
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
+ return nfserr_resource;
if (nfserr) {
- WRITE32(3);
- WRITE32(0);
- WRITE32(0);
- WRITE32(0);
+ *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
}
else {
- WRITE32(3);
- WRITE32(setattr->sa_bmval[0]);
- WRITE32(setattr->sa_bmval[1]);
- WRITE32(setattr->sa_bmval[2]);
+ *p++ = cpu_to_be32(3);
+ *p++ = cpu_to_be32(setattr->sa_bmval[0]);
+ *p++ = cpu_to_be32(setattr->sa_bmval[1]);
+ *p++ = cpu_to_be32(setattr->sa_bmval[2]);
}
- ADJUST_ARGS();
return nfserr;
}
static __be32
nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(8 + NFS4_VERIFIER_SIZE);
- WRITEMEM(&scd->se_clientid, 8);
- WRITEMEM(&scd->se_confirm, NFS4_VERIFIER_SIZE);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 8 + NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, &scd->se_clientid, 8);
+ p = xdr_encode_opaque_fixed(p, &scd->se_confirm,
+ NFS4_VERIFIER_SIZE);
}
else if (nfserr == nfserr_clid_inuse) {
- RESERVE_SPACE(8);
- WRITE32(0);
- WRITE32(0);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(0);
+ *p++ = cpu_to_be32(0);
}
return nfserr;
}
@@ -3299,14 +3542,17 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
static __be32
nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (!nfserr) {
- RESERVE_SPACE(16);
- WRITE32(write->wr_bytes_written);
- WRITE32(write->wr_how_written);
- WRITEMEM(write->wr_verifier.data, NFS4_VERIFIER_SIZE);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(write->wr_bytes_written);
+ *p++ = cpu_to_be32(write->wr_how_written);
+ p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
+ NFS4_VERIFIER_SIZE);
}
return nfserr;
}
@@ -3323,6 +3569,7 @@ static __be32
nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_exchange_id *exid)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
char *major_id;
char *server_scope;
@@ -3338,60 +3585,61 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
server_scope = utsname()->nodename;
server_scope_sz = strlen(server_scope);
- RESERVE_SPACE(
+ p = xdr_reserve_space(xdr,
8 /* eir_clientid */ +
4 /* eir_sequenceid */ +
4 /* eir_flags */ +
4 /* spr_how */);
+ if (!p)
+ return nfserr_resource;
- WRITEMEM(&exid->clientid, 8);
- WRITE32(exid->seqid);
- WRITE32(exid->flags);
+ p = xdr_encode_opaque_fixed(p, &exid->clientid, 8);
+ *p++ = cpu_to_be32(exid->seqid);
+ *p++ = cpu_to_be32(exid->flags);
- WRITE32(exid->spa_how);
- ADJUST_ARGS();
+ *p++ = cpu_to_be32(exid->spa_how);
switch (exid->spa_how) {
case SP4_NONE:
break;
case SP4_MACH_CRED:
/* spo_must_enforce, spo_must_allow */
- RESERVE_SPACE(16);
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
+ return nfserr_resource;
/* spo_must_enforce bitmap: */
- WRITE32(2);
- WRITE32(nfs4_minimal_spo_must_enforce[0]);
- WRITE32(nfs4_minimal_spo_must_enforce[1]);
+ *p++ = cpu_to_be32(2);
+ *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[0]);
+ *p++ = cpu_to_be32(nfs4_minimal_spo_must_enforce[1]);
/* empty spo_must_allow bitmap: */
- WRITE32(0);
+ *p++ = cpu_to_be32(0);
- ADJUST_ARGS();
break;
default:
WARN_ON_ONCE(1);
}
- RESERVE_SPACE(
+ p = xdr_reserve_space(xdr,
8 /* so_minor_id */ +
4 /* so_major_id.len */ +
(XDR_QUADLEN(major_id_sz) * 4) +
4 /* eir_server_scope.len */ +
(XDR_QUADLEN(server_scope_sz) * 4) +
4 /* eir_server_impl_id.count (0) */);
+ if (!p)
+ return nfserr_resource;
/* The server_owner struct */
- WRITE64(minor_id); /* Minor id */
+ p = xdr_encode_hyper(p, minor_id); /* Minor id */
/* major id */
- WRITE32(major_id_sz);
- WRITEMEM(major_id, major_id_sz);
+ p = xdr_encode_opaque(p, major_id, major_id_sz);
/* Server scope */
- WRITE32(server_scope_sz);
- WRITEMEM(server_scope, server_scope_sz);
+ p = xdr_encode_opaque(p, server_scope, server_scope_sz);
/* Implementation id */
- WRITE32(0); /* zero length nfs_impl_id4 array */
- ADJUST_ARGS();
+ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */
return 0;
}
@@ -3399,47 +3647,54 @@ static __be32
nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_create_session *sess)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (nfserr)
return nfserr;
- RESERVE_SPACE(24);
- WRITEMEM(sess->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(sess->seqid);
- WRITE32(sess->flags);
- ADJUST_ARGS();
-
- RESERVE_SPACE(28);
- WRITE32(0); /* headerpadsz */
- WRITE32(sess->fore_channel.maxreq_sz);
- WRITE32(sess->fore_channel.maxresp_sz);
- WRITE32(sess->fore_channel.maxresp_cached);
- WRITE32(sess->fore_channel.maxops);
- WRITE32(sess->fore_channel.maxreqs);
- WRITE32(sess->fore_channel.nr_rdma_attrs);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 24);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, sess->sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(sess->seqid);
+ *p++ = cpu_to_be32(sess->flags);
+
+ p = xdr_reserve_space(xdr, 28);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(0); /* headerpadsz */
+ *p++ = cpu_to_be32(sess->fore_channel.maxreq_sz);
+ *p++ = cpu_to_be32(sess->fore_channel.maxresp_sz);
+ *p++ = cpu_to_be32(sess->fore_channel.maxresp_cached);
+ *p++ = cpu_to_be32(sess->fore_channel.maxops);
+ *p++ = cpu_to_be32(sess->fore_channel.maxreqs);
+ *p++ = cpu_to_be32(sess->fore_channel.nr_rdma_attrs);
if (sess->fore_channel.nr_rdma_attrs) {
- RESERVE_SPACE(4);
- WRITE32(sess->fore_channel.rdma_attrs);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(sess->fore_channel.rdma_attrs);
}
- RESERVE_SPACE(28);
- WRITE32(0); /* headerpadsz */
- WRITE32(sess->back_channel.maxreq_sz);
- WRITE32(sess->back_channel.maxresp_sz);
- WRITE32(sess->back_channel.maxresp_cached);
- WRITE32(sess->back_channel.maxops);
- WRITE32(sess->back_channel.maxreqs);
- WRITE32(sess->back_channel.nr_rdma_attrs);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 28);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(0); /* headerpadsz */
+ *p++ = cpu_to_be32(sess->back_channel.maxreq_sz);
+ *p++ = cpu_to_be32(sess->back_channel.maxresp_sz);
+ *p++ = cpu_to_be32(sess->back_channel.maxresp_cached);
+ *p++ = cpu_to_be32(sess->back_channel.maxops);
+ *p++ = cpu_to_be32(sess->back_channel.maxreqs);
+ *p++ = cpu_to_be32(sess->back_channel.nr_rdma_attrs);
if (sess->back_channel.nr_rdma_attrs) {
- RESERVE_SPACE(4);
- WRITE32(sess->back_channel.rdma_attrs);
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 4);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(sess->back_channel.rdma_attrs);
}
return 0;
}
@@ -3448,22 +3703,25 @@ static __be32
nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_sequence *seq)
{
+ struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
if (nfserr)
return nfserr;
- RESERVE_SPACE(NFS4_MAX_SESSIONID_LEN + 20);
- WRITEMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- WRITE32(seq->seqid);
- WRITE32(seq->slotid);
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, seq->sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(seq->seqid);
+ *p++ = cpu_to_be32(seq->slotid);
/* Note slotid's are numbered from zero: */
- WRITE32(seq->maxslots - 1); /* sr_highest_slotid */
- WRITE32(seq->maxslots - 1); /* sr_target_highest_slotid */
- WRITE32(seq->status_flags);
+ *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_highest_slotid */
+ *p++ = cpu_to_be32(seq->maxslots - 1); /* sr_target_highest_slotid */
+ *p++ = cpu_to_be32(seq->status_flags);
- ADJUST_ARGS();
- resp->cstate.datap = p; /* DRC cache data pointer */
+ resp->cstate.data_offset = xdr->buf->len; /* DRC cache data pointer */
return 0;
}
@@ -3471,20 +3729,22 @@ static __be32
nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_test_stateid *test_stateid)
{
+ struct xdr_stream *xdr = &resp->xdr;
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
if (nfserr)
return nfserr;
- RESERVE_SPACE(4 + (4 * test_stateid->ts_num_ids));
+ p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids));
+ if (!p)
+ return nfserr_resource;
*p++ = htonl(test_stateid->ts_num_ids);
list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) {
*p++ = stateid->ts_id_status;
}
- ADJUST_ARGS();
return nfserr;
}
@@ -3563,81 +3823,99 @@ static nfsd4_enc nfsd4_enc_ops[] = {
};
/*
- * Calculate the total amount of memory that the compound response has taken
- * after encoding the current operation with pad.
- *
- * pad: if operation is non-idempotent, pad was calculate by op_rsize_bop()
- * which was specified at nfsd4_operation, else pad is zero.
- *
- * Compare this length to the session se_fmaxresp_sz and se_fmaxresp_cached.
+ * Calculate whether we still have space to encode repsize bytes.
+ * There are two considerations:
+ * - For NFS versions >=4.1, the size of the reply must stay within
+ * session limits
+ * - For all NFS versions, we must stay within limited preallocated
+ * buffer space.
*
- * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
- * will be at least a page and will therefore hold the xdr_buf head.
+ * This is called before the operation is processed, so can only provide
+ * an upper estimate. For some nonidempotent operations (such as
+ * getattr), it's not necessarily a problem if that estimate is wrong,
+ * as we can fail it after processing without significant side effects.
*/
-__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
+__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 respsize)
{
- struct xdr_buf *xb = &resp->rqstp->rq_res;
- struct nfsd4_session *session = NULL;
+ struct xdr_buf *buf = &resp->rqstp->rq_res;
struct nfsd4_slot *slot = resp->cstate.slot;
- u32 length, tlen = 0;
+ if (buf->len + respsize <= buf->buflen)
+ return nfs_ok;
if (!nfsd4_has_session(&resp->cstate))
- return 0;
-
- session = resp->cstate.session;
-
- if (xb->page_len == 0) {
- length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
- } else {
- if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
- tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;
-
- length = xb->head[0].iov_len + xb->page_len + tlen + pad;
- }
- dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
- length, xb->page_len, tlen, pad);
-
- if (length > session->se_fchannel.maxresp_sz)
- return nfserr_rep_too_big;
-
- if ((slot->sl_flags & NFSD4_SLOT_CACHETHIS) &&
- length > session->se_fchannel.maxresp_cached)
+ return nfserr_resource;
+ if (slot->sl_flags & NFSD4_SLOT_CACHETHIS) {
+ WARN_ON_ONCE(1);
return nfserr_rep_too_big_to_cache;
-
- return 0;
+ }
+ return nfserr_rep_too_big;
}
void
nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
{
+ struct xdr_stream *xdr = &resp->xdr;
struct nfs4_stateowner *so = resp->cstate.replay_owner;
- __be32 *statp;
+ struct svc_rqst *rqstp = resp->rqstp;
+ int post_err_offset;
+ nfsd4_enc encoder;
__be32 *p;
- RESERVE_SPACE(8);
- WRITE32(op->opnum);
- statp = p++; /* to be backfilled at the end */
- ADJUST_ARGS();
+ p = xdr_reserve_space(xdr, 8);
+ if (!p) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ *p++ = cpu_to_be32(op->opnum);
+ post_err_offset = xdr->buf->len;
if (op->opnum == OP_ILLEGAL)
goto status;
BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
- op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
+ encoder = nfsd4_enc_ops[op->opnum];
+ op->status = encoder(resp, op->status, &op->u);
+ xdr_commit_encode(xdr);
+
/* nfsd4_check_resp_size guarantees enough room for error status */
- if (!op->status)
- op->status = nfsd4_check_resp_size(resp, 0);
+ if (!op->status) {
+ int space_needed = 0;
+ if (!nfsd4_last_compound_op(rqstp))
+ space_needed = COMPOUND_ERR_SLACK_SPACE;
+ op->status = nfsd4_check_resp_size(resp, space_needed);
+ }
+ if (op->status == nfserr_resource && nfsd4_has_session(&resp->cstate)) {
+ struct nfsd4_slot *slot = resp->cstate.slot;
+
+ if (slot->sl_flags & NFSD4_SLOT_CACHETHIS)
+ op->status = nfserr_rep_too_big_to_cache;
+ else
+ op->status = nfserr_rep_too_big;
+ }
+ if (op->status == nfserr_resource ||
+ op->status == nfserr_rep_too_big ||
+ op->status == nfserr_rep_too_big_to_cache) {
+ /*
+ * The operation may have already been encoded or
+ * partially encoded. No op returns anything additional
+ * in the case of one of these three errors, so we can
+ * just truncate back to after the status. But it's a
+ * bug if we had to do this on a non-idempotent op:
+ */
+ warn_on_nonidempotent_op(op);
+ xdr_truncate_encode(xdr, post_err_offset);
+ }
if (so) {
+ int len = xdr->buf->len - post_err_offset;
+
so->so_replay.rp_status = op->status;
- so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
- memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen);
+ so->so_replay.rp_buflen = len;
+ read_bytes_from_xdr_buf(xdr->buf, post_err_offset,
+ so->so_replay.rp_buf, len);
}
status:
- /*
- * Note: We write the status directly, instead of using WRITE32(),
- * since it is already in network byte order.
- */
- *statp = op->status;
+ /* Note that op->status is already in network byte order: */
+ write_bytes_to_xdr_buf(xdr->buf, post_err_offset - 4, &op->status, 4);
}
/*
@@ -3649,21 +3927,22 @@ status:
* called with nfs4_lock_state() held
*/
void
-nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
{
__be32 *p;
struct nfs4_replay *rp = op->replay;
BUG_ON(!rp);
- RESERVE_SPACE(8);
- WRITE32(op->opnum);
+ p = xdr_reserve_space(xdr, 8 + rp->rp_buflen);
+ if (!p) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ *p++ = cpu_to_be32(op->opnum);
*p++ = rp->rp_status; /* already xdr'ed */
- ADJUST_ARGS();
- RESERVE_SPACE(rp->rp_buflen);
- WRITEMEM(rp->rp_buf, rp->rp_buflen);
- ADJUST_ARGS();
+ p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
}
int
@@ -3720,19 +3999,19 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
* All that remains is to write the tag and operation count...
*/
struct nfsd4_compound_state *cs = &resp->cstate;
- struct kvec *iov;
+ struct xdr_buf *buf = resp->xdr.buf;
+
+ WARN_ON_ONCE(buf->len != buf->head[0].iov_len + buf->page_len +
+ buf->tail[0].iov_len);
+
+ rqstp->rq_next_page = resp->xdr.page_ptr + 1;
+
p = resp->tagp;
*p++ = htonl(resp->taglen);
memcpy(p, resp->tag, resp->taglen);
p += XDR_QUADLEN(resp->taglen);
*p++ = htonl(resp->opcnt);
- if (rqstp->rq_res.page_len)
- iov = &rqstp->rq_res.tail[0];
- else
- iov = &rqstp->rq_res.head[0];
- iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
- BUG_ON(iov->iov_len > PAGE_SIZE);
if (nfsd4_has_session(cs)) {
struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
struct nfs4_client *clp = cs->session->se_client;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index f8f060ffbf4f..6040da8830ff 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -224,13 +224,6 @@ hash_refile(struct svc_cacherep *rp)
hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
}
-static inline bool
-nfsd_cache_entry_expired(struct svc_cacherep *rp)
-{
- return rp->c_state != RC_INPROG &&
- time_after(jiffies, rp->c_timestamp + RC_EXPIRE);
-}
-
/*
* Walk the LRU list and prune off entries that are older than RC_EXPIRE.
* Also prune the oldest ones when the total exceeds the max number of entries.
@@ -242,8 +235,14 @@ prune_cache_entries(void)
long freed = 0;
list_for_each_entry_safe(rp, tmp, &lru_head, c_lru) {
- if (!nfsd_cache_entry_expired(rp) &&
- num_drc_entries <= max_drc_entries)
+ /*
+ * Don't free entries attached to calls that are still
+ * in-progress, but do keep scanning the list.
+ */
+ if (rp->c_state == RC_INPROG)
+ continue;
+ if (num_drc_entries <= max_drc_entries &&
+ time_before(jiffies, rp->c_timestamp + RC_EXPIRE))
break;
nfsd_reply_cache_free_locked(rp);
freed++;
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index f34d9de802ab..51844048937f 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -1179,7 +1179,6 @@ static int __init init_nfsd(void)
retval = nfsd4_init_slabs();
if (retval)
goto out_unregister_pernet;
- nfs4_state_init();
retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
if (retval)
goto out_free_slabs;
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 479eb681c27c..847daf37e566 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -15,11 +15,20 @@
#include <linux/nfs2.h>
#include <linux/nfs3.h>
#include <linux/nfs4.h>
+#include <linux/sunrpc/svc.h>
#include <linux/sunrpc/msg_prot.h>
-#include <linux/nfsd/debug.h>
-#include <linux/nfsd/export.h>
-#include <linux/nfsd/stats.h>
+#include <uapi/linux/nfsd/debug.h>
+
+#include "stats.h"
+#include "export.h"
+
+#undef ifdebug
+#ifdef NFSD_DEBUG
+# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
+#else
+# define ifdebug(flag) if (0)
+#endif
/*
* nfsd version
@@ -106,7 +115,6 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
*/
#ifdef CONFIG_NFSD_V4
extern unsigned long max_delegations;
-void nfs4_state_init(void);
int nfsd4_init_slabs(void);
void nfsd4_free_slabs(void);
int nfs4_state_start(void);
@@ -117,7 +125,6 @@ void nfs4_reset_lease(time_t leasetime);
int nfs4_reset_recoverydir(char *recdir);
char * nfs4_recoverydir(void);
#else
-static inline void nfs4_state_init(void) { }
static inline int nfsd4_init_slabs(void) { return 0; }
static inline void nfsd4_free_slabs(void) { }
static inline int nfs4_state_start(void) { return 0; }
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 3c37b160dcad..ec8393418154 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -88,9 +88,8 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
/* Check if the request originated from a secure port. */
if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
- dprintk(KERN_WARNING
- "nfsd: request from insecure port %s!\n",
- svc_print_addr(rqstp, buf, sizeof(buf)));
+ dprintk("nfsd: request from insecure port %s!\n",
+ svc_print_addr(rqstp, buf, sizeof(buf)));
return nfserr_perm;
}
@@ -169,8 +168,8 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
data_left -= len;
if (data_left < 0)
return error;
- exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_auth);
- fid = (struct fid *)(fh->fh_auth + len);
+ exp = rqst_exp_find(rqstp, fh->fh_fsid_type, fh->fh_fsid);
+ fid = (struct fid *)(fh->fh_fsid + len);
} else {
__u32 tfh[2];
dev_t xdev;
@@ -385,7 +384,7 @@ static void _fh_update(struct svc_fh *fhp, struct svc_export *exp,
{
if (dentry != exp->ex_path.dentry) {
struct fid *fid = (struct fid *)
- (fhp->fh_handle.fh_auth + fhp->fh_handle.fh_size/4 - 1);
+ (fhp->fh_handle.fh_fsid + fhp->fh_handle.fh_size/4 - 1);
int maxsize = (fhp->fh_maxsize - fhp->fh_handle.fh_size)/4;
int subtreecheck = !(exp->ex_flags & NFSEXP_NOSUBTREECHECK);
@@ -513,7 +512,6 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/
struct inode * inode = dentry->d_inode;
- __u32 *datap;
dev_t ex_dev = exp_sb(exp)->s_dev;
dprintk("nfsd: fh_compose(exp %02x:%02x/%ld %pd2, ino=%ld)\n",
@@ -557,17 +555,16 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
if (inode)
_fh_update_old(dentry, exp, &fhp->fh_handle);
} else {
- int len;
+ fhp->fh_handle.fh_size =
+ key_len(fhp->fh_handle.fh_fsid_type) + 4;
fhp->fh_handle.fh_auth_type = 0;
- datap = fhp->fh_handle.fh_auth+0;
- mk_fsid(fhp->fh_handle.fh_fsid_type, datap, ex_dev,
+
+ mk_fsid(fhp->fh_handle.fh_fsid_type,
+ fhp->fh_handle.fh_fsid,
+ ex_dev,
exp->ex_path.dentry->d_inode->i_ino,
exp->ex_fsid, exp->ex_uuid);
- len = key_len(fhp->fh_handle.fh_fsid_type);
- datap += len/4;
- fhp->fh_handle.fh_size = 4 + len;
-
if (inode)
_fh_update(fhp, exp, dentry);
if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) {
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index ad67964d0bb1..2e89e70ac15c 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -1,9 +1,58 @@
-/* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de> */
+/*
+ * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
+ *
+ * This file describes the layout of the file handles as passed
+ * over the wire.
+ */
+#ifndef _LINUX_NFSD_NFSFH_H
+#define _LINUX_NFSD_NFSFH_H
+
+#include <linux/sunrpc/svc.h>
+#include <uapi/linux/nfsd/nfsfh.h>
+
+static inline __u32 ino_t_to_u32(ino_t ino)
+{
+ return (__u32) ino;
+}
+
+static inline ino_t u32_to_ino_t(__u32 uino)
+{
+ return (ino_t) uino;
+}
-#ifndef _LINUX_NFSD_FH_INT_H
-#define _LINUX_NFSD_FH_INT_H
+/*
+ * This is the internal representation of an NFS handle used in knfsd.
+ * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
+ */
+typedef struct svc_fh {
+ struct knfsd_fh fh_handle; /* FH data */
+ struct dentry * fh_dentry; /* validated dentry */
+ struct svc_export * fh_export; /* export pointer */
+ int fh_maxsize; /* max size for fh_handle */
+
+ unsigned char fh_locked; /* inode locked by us */
+ unsigned char fh_want_write; /* remount protection taken */
+
+#ifdef CONFIG_NFSD_V3
+ unsigned char fh_post_saved; /* post-op attrs saved */
+ unsigned char fh_pre_saved; /* pre-op attrs saved */
+
+ /* Pre-op attributes saved during fh_lock */
+ __u64 fh_pre_size; /* size before operation */
+ struct timespec fh_pre_mtime; /* mtime before oper */
+ struct timespec fh_pre_ctime; /* ctime before oper */
+ /*
+ * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
+ * to find out if it is valid.
+ */
+ u64 fh_pre_change;
+
+ /* Post-op attributes saved in fh_unlock */
+ struct kstat fh_post_attr; /* full attrs after operation */
+ u64 fh_post_change; /* nfsv4 change; see above */
+#endif /* CONFIG_NFSD_V3 */
-#include <linux/nfsd/nfsfh.h>
+} svc_fh;
enum nfsd_fsid {
FSID_DEV = 0,
@@ -215,4 +264,4 @@ fh_unlock(struct svc_fh *fhp)
}
}
-#endif /* _LINUX_NFSD_FH_INT_H */
+#endif /* _LINUX_NFSD_NFSFH_H */
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 9a4a5f9e7468..1879e43f2868 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -591,12 +591,6 @@ nfsd(void *vrqstp)
nfsdstats.th_cnt++;
mutex_unlock(&nfsd_mutex);
- /*
- * We want less throttling in balance_dirty_pages() so that nfs to
- * localhost doesn't cause nfsd to lock up due to all the client's
- * dirty pages.
- */
- current->flags |= PF_LESS_THROTTLE;
set_freezable();
/*
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 9c769a47ac5a..1ac306b769df 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -214,7 +214,8 @@ nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
int
nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
return xdr_argsize_check(rqstp, p);
}
@@ -248,7 +249,8 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
{
unsigned int len;
int v;
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->offset = ntohl(*p++);
@@ -281,7 +283,8 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
unsigned int len, hdr, dlen;
int v;
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
p++; /* beginoffset */
@@ -355,7 +358,8 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
int
nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->buffer = page_address(*(rqstp->rq_next_page++));
@@ -391,7 +395,8 @@ int
nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
struct nfsd_readdirargs *args)
{
- if (!(p = decode_fh(p, &args->fh)))
+ p = decode_fh(p, &args->fh);
+ if (!p)
return 0;
args->cookie = ntohl(*p++);
args->count = ntohl(*p++);
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 424d8f5f2317..374c66283ac5 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -37,7 +37,6 @@
#include <linux/idr.h>
#include <linux/sunrpc/svc_xprt.h>
-#include <linux/nfsd/nfsfh.h>
#include "nfsfh.h"
typedef struct {
@@ -123,7 +122,7 @@ static inline struct nfs4_delegation *delegstateid(struct nfs4_stid *s)
/* Maximum number of operations per session compound */
#define NFSD_MAX_OPS_PER_COMPOUND 16
/* Maximum session per slot cache size */
-#define NFSD_SLOT_CACHE_SIZE 1024
+#define NFSD_SLOT_CACHE_SIZE 2048
/* Maximum number of NFSD_SLOT_CACHE_SIZE slots per session */
#define NFSD_CACHE_SIZE_SLOTS_PER_SESSION 32
#define NFSD_MAX_MEM_PER_SESSION \
@@ -464,8 +463,6 @@ extern void nfs4_release_reclaim(struct nfsd_net *);
extern struct nfs4_client_reclaim *nfsd4_find_reclaim_client(const char *recdir,
struct nfsd_net *nn);
extern __be32 nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn);
-extern void nfs4_free_openowner(struct nfs4_openowner *);
-extern void nfs4_free_lockowner(struct nfs4_lockowner *);
extern int set_callback_cred(void);
extern void nfsd4_init_callback(struct nfsd4_callback *);
extern void nfsd4_probe_callback(struct nfs4_client *clp);
diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c
index 6d4521feb6e3..cd90878a76aa 100644
--- a/fs/nfsd/stats.c
+++ b/fs/nfsd/stats.c
@@ -24,7 +24,6 @@
#include <linux/seq_file.h>
#include <linux/module.h>
#include <linux/sunrpc/stats.h>
-#include <linux/nfsd/stats.h>
#include <net/net_namespace.h>
#include "nfsd.h"
diff --git a/include/linux/nfsd/stats.h b/fs/nfsd/stats.h
index e75b2544ff12..a5c944b771c6 100644
--- a/include/linux/nfsd/stats.h
+++ b/fs/nfsd/stats.h
@@ -1,12 +1,10 @@
/*
- * linux/include/linux/nfsd/stats.h
- *
* Statistics for NFS server.
*
* Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de>
*/
-#ifndef LINUX_NFSD_STATS_H
-#define LINUX_NFSD_STATS_H
+#ifndef _NFSD_STATS_H
+#define _NFSD_STATS_H
#include <uapi/linux/nfsd/stats.h>
@@ -42,4 +40,4 @@ extern struct svc_stat nfsd_svcstats;
void nfsd_stat_init(void);
void nfsd_stat_shutdown(void);
-#endif /* LINUX_NFSD_STATS_H */
+#endif /* _NFSD_STATS_H */
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 16f0673a423c..140c496f612c 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -820,55 +820,54 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
}
-static __be32
-nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+__be32 nfsd_finish_read(struct file *file, unsigned long *count, int host_err)
{
- mm_segment_t oldfs;
- __be32 err;
- int host_err;
-
- err = nfserr_perm;
-
- if (file->f_op->splice_read && rqstp->rq_splice_ok) {
- struct splice_desc sd = {
- .len = 0,
- .total_len = *count,
- .pos = offset,
- .u.data = rqstp,
- };
-
- rqstp->rq_next_page = rqstp->rq_respages + 1;
- host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
- } else {
- oldfs = get_fs();
- set_fs(KERNEL_DS);
- host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
- set_fs(oldfs);
- }
-
if (host_err >= 0) {
nfsdstats.io_read += host_err;
*count = host_err;
- err = 0;
fsnotify_access(file);
+ return 0;
} else
- err = nfserrno(host_err);
- return err;
+ return nfserrno(host_err);
+}
+
+int nfsd_splice_read(struct svc_rqst *rqstp,
+ struct file *file, loff_t offset, unsigned long *count)
+{
+ struct splice_desc sd = {
+ .len = 0,
+ .total_len = *count,
+ .pos = offset,
+ .u.data = rqstp,
+ };
+ int host_err;
+
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+ host_err = splice_direct_to_actor(file, &sd, nfsd_direct_splice_actor);
+ return nfsd_finish_read(file, count, host_err);
}
-static void kill_suid(struct dentry *dentry)
+int nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
+ unsigned long *count)
{
- struct iattr ia;
- ia.ia_valid = ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
+ mm_segment_t oldfs;
+ int host_err;
- mutex_lock(&dentry->d_inode->i_mutex);
- /*
- * Note we call this on write, so notify_change will not
- * encounter any conflicting delegations:
- */
- notify_change(dentry, &ia, NULL);
- mutex_unlock(&dentry->d_inode->i_mutex);
+ oldfs = get_fs();
+ set_fs(KERNEL_DS);
+ host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+ set_fs(oldfs);
+ return nfsd_finish_read(file, count, host_err);
+}
+
+static __be32
+nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+ if (file->f_op->splice_read && rqstp->rq_splice_ok)
+ return nfsd_splice_read(rqstp, file, offset, count);
+ else
+ return nfsd_readv(file, offset, vec, vlen, count);
}
/*
@@ -922,6 +921,16 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
int stable = *stablep;
int use_wgather;
loff_t pos = offset;
+ unsigned int pflags = current->flags;
+
+ if (rqstp->rq_local)
+ /*
+ * We want less throttling in balance_dirty_pages()
+ * and shrink_inactive_list() so that nfs to
+ * localhost doesn't cause nfsd to lock up due to all
+ * the client's dirty pages or its congested queue.
+ */
+ current->flags |= PF_LESS_THROTTLE;
dentry = file->f_path.dentry;
inode = dentry->d_inode;
@@ -942,10 +951,6 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
nfsdstats.io_write += host_err;
fsnotify_modify(file);
- /* clear setuid/setgid flag after write */
- if (inode->i_mode & (S_ISUID | S_ISGID))
- kill_suid(dentry);
-
if (stable) {
if (use_wgather)
host_err = wait_for_concurrent_writes(file);
@@ -959,36 +964,33 @@ out_nfserr:
err = 0;
else
err = nfserrno(host_err);
+ if (rqstp->rq_local)
+ tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
return err;
}
-/*
- * Read data from a file. count must contain the requested read count
- * on entry. On return, *count contains the number of bytes actually read.
- * N.B. After this call fhp needs an fh_put
- */
-__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+__be32 nfsd_get_tmp_read_open(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ struct file **file, struct raparms **ra)
{
- struct file *file;
struct inode *inode;
- struct raparms *ra;
__be32 err;
- err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+ err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, file);
if (err)
return err;
- inode = file_inode(file);
+ inode = file_inode(*file);
/* Get readahead parameters */
- ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+ *ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
- if (ra && ra->p_set)
- file->f_ra = ra->p_ra;
-
- err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+ if (*ra && (*ra)->p_set)
+ (*file)->f_ra = (*ra)->p_ra;
+ return nfs_ok;
+}
+void nfsd_put_tmp_read_open(struct file *file, struct raparms *ra)
+{
/* Write back readahead params */
if (ra) {
struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
@@ -998,28 +1000,29 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
ra->p_count--;
spin_unlock(&rab->pb_lock);
}
-
nfsd_close(file);
- return err;
}
-/* As above, but use the provided file descriptor. */
-__be32
-nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
- loff_t offset, struct kvec *vec, int vlen,
- unsigned long *count)
+/*
+ * Read data from a file. count must contain the requested read count
+ * on entry. On return, *count contains the number of bytes actually read.
+ * N.B. After this call fhp needs an fh_put
+ */
+__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
{
- __be32 err;
+ struct file *file;
+ struct raparms *ra;
+ __be32 err;
+
+ err = nfsd_get_tmp_read_open(rqstp, fhp, &file, &ra);
+ if (err)
+ return err;
+
+ err = nfsd_vfs_read(rqstp, file, offset, vec, vlen, count);
+
+ nfsd_put_tmp_read_open(file, ra);
- if (file) {
- err = nfsd_permission(rqstp, fhp->fh_export, fhp->fh_dentry,
- NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
- if (err)
- goto out;
- err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
- } else /* Note file may still be NULL in NFSv4 special stateid case: */
- err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
-out:
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index fbe90bdb2214..91b6ae3f658b 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -70,10 +70,16 @@ __be32 nfsd_commit(struct svc_rqst *, struct svc_fh *,
__be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
void nfsd_close(struct file *);
+struct raparms;
+__be32 nfsd_get_tmp_read_open(struct svc_rqst *, struct svc_fh *,
+ struct file **, struct raparms **);
+void nfsd_put_tmp_read_open(struct file *, struct raparms *);
+int nfsd_splice_read(struct svc_rqst *,
+ struct file *, loff_t, unsigned long *);
+int nfsd_readv(struct file *, loff_t, struct kvec *, int,
+ unsigned long *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
loff_t, struct kvec *, int, unsigned long *);
-__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
- loff_t, struct kvec *, int, unsigned long *);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 5ea7df305083..18cbb6d9c8a9 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -58,7 +58,7 @@ struct nfsd4_compound_state {
/* For sessions DRC */
struct nfsd4_session *session;
struct nfsd4_slot *slot;
- __be32 *datap;
+ int data_offset;
size_t iovlen;
u32 minorversion;
__be32 status;
@@ -287,9 +287,8 @@ struct nfsd4_readdir {
struct svc_fh * rd_fhp; /* response */
struct readdir_cd common;
- __be32 * buffer;
- int buflen;
- __be32 * offset;
+ struct xdr_stream *xdr;
+ int cookie_offset;
};
struct nfsd4_release_lockowner {
@@ -506,9 +505,7 @@ struct nfsd4_compoundargs {
struct nfsd4_compoundres {
/* scratch variables for XDR encode */
- __be32 * p;
- __be32 * end;
- struct xdr_buf * xbuf;
+ struct xdr_stream xdr;
struct svc_rqst * rqstp;
u32 taglen;
@@ -538,6 +535,9 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt;
}
+int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
+void warn_on_nonidempotent_op(struct nfsd4_op *op);
+
#define NFS4_SVC_XDRSIZE sizeof(struct nfsd4_compoundargs)
static inline void
@@ -563,10 +563,11 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
struct nfsd4_compoundres *);
__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
-void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
- struct dentry *dentry, __be32 **buffer, int countp,
- u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
+void nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op);
+__be32 nfsd4_encode_fattr_to_buf(__be32 **p, int words,
+ struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry,
+ u32 *bmval, struct svc_rqst *, int ignore_crossmnt);
extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *,
struct nfsd4_setclientid *setclid);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 0b18776b075e..6152cbe353e8 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1215,7 +1215,7 @@ xfs_ioctl_setattr(
* cleared upon successful return from chown()
*/
if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
- !inode_capable(VFS_I(ip), CAP_FSETID))
+ !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
/*
diff --git a/include/linux/capability.h b/include/linux/capability.h
index a6ee1f9a5018..84b13ad67c1c 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -210,7 +210,7 @@ extern bool has_ns_capability_noaudit(struct task_struct *t,
struct user_namespace *ns, int cap);
extern bool capable(int cap);
extern bool ns_capable(struct user_namespace *ns, int cap);
-extern bool inode_capable(const struct inode *inode, int cap);
+extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
/* audit system wants to get cap info from files as well */
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index dcaad79f54ed..219d79627c05 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -17,13 +17,13 @@
#include <linux/fs.h>
#include <linux/kref.h>
#include <linux/utsname.h>
-#include <linux/nfsd/nfsfh.h>
#include <linux/lockd/bind.h>
#include <linux/lockd/xdr.h>
#ifdef CONFIG_LOCKD_V4
#include <linux/lockd/xdr4.h>
#endif
#include <linux/lockd/debug.h>
+#include <linux/sunrpc/svc.h>
/*
* Version string
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index b73027298b3a..d424b9de3aff 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -63,12 +63,12 @@ struct mmc_ext_csd {
unsigned int power_off_longtime; /* Units: ms */
u8 power_off_notification; /* state */
unsigned int hs_max_dtr;
+ unsigned int hs200_max_dtr;
#define MMC_HIGH_26_MAX_DTR 26000000
#define MMC_HIGH_52_MAX_DTR 52000000
#define MMC_HIGH_DDR_MAX_DTR 52000000
#define MMC_HS200_MAX_DTR 200000000
unsigned int sectors;
- unsigned int card_type;
unsigned int hc_erase_size; /* In sectors */
unsigned int hc_erase_timeout; /* In milliseconds */
unsigned int sec_trim_mult; /* Secure trim multiplier */
@@ -110,6 +110,7 @@ struct mmc_ext_csd {
u8 raw_pwr_cl_200_360; /* 237 */
u8 raw_pwr_cl_ddr_52_195; /* 238 */
u8 raw_pwr_cl_ddr_52_360; /* 239 */
+ u8 raw_pwr_cl_ddr_200_360; /* 253 */
u8 raw_bkops_status; /* 246 */
u8 raw_sectors[4]; /* 212 - 4 bytes */
@@ -194,6 +195,7 @@ struct sdio_cis {
};
struct mmc_host;
+struct mmc_ios;
struct sdio_func;
struct sdio_func_tuple;
@@ -250,15 +252,11 @@ struct mmc_card {
unsigned int state; /* (our) card state */
#define MMC_STATE_PRESENT (1<<0) /* present in sysfs */
#define MMC_STATE_READONLY (1<<1) /* card is read-only */
-#define MMC_STATE_HIGHSPEED (1<<2) /* card is in high speed mode */
-#define MMC_STATE_BLOCKADDR (1<<3) /* card uses block-addressing */
-#define MMC_STATE_HIGHSPEED_DDR (1<<4) /* card is in high speed mode */
-#define MMC_STATE_ULTRAHIGHSPEED (1<<5) /* card is in ultra high speed mode */
-#define MMC_CARD_SDXC (1<<6) /* card is SDXC */
-#define MMC_CARD_REMOVED (1<<7) /* card has been removed */
-#define MMC_STATE_HIGHSPEED_200 (1<<8) /* card is in HS200 mode */
-#define MMC_STATE_DOING_BKOPS (1<<10) /* card is doing BKOPS */
-#define MMC_STATE_SUSPENDED (1<<11) /* card is suspended */
+#define MMC_STATE_BLOCKADDR (1<<2) /* card uses block-addressing */
+#define MMC_CARD_SDXC (1<<3) /* card is SDXC */
+#define MMC_CARD_REMOVED (1<<4) /* card has been removed */
+#define MMC_STATE_DOING_BKOPS (1<<5) /* card is doing BKOPS */
+#define MMC_STATE_SUSPENDED (1<<6) /* card is suspended */
unsigned int quirks; /* card quirks */
#define MMC_QUIRK_LENIENT_FN0 (1<<0) /* allow SDIO FN0 writes outside of the VS CCCR range */
#define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1) /* use func->cur_blksize */
@@ -301,6 +299,7 @@ struct mmc_card {
struct sdio_func_tuple *tuples; /* unknown common tuples */
unsigned int sd_bus_speed; /* Bus Speed Mode set for the card */
+ unsigned int mmc_avail_type; /* supported device type by both host and card */
struct dentry *debugfs_root;
struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
@@ -353,7 +352,7 @@ struct mmc_fixup {
#define CID_OEMID_ANY ((unsigned short) -1)
#define CID_NAME_ANY (NULL)
-#define END_FIXUP { 0 }
+#define END_FIXUP { NULL }
#define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end, \
_cis_vendor, _cis_device, \
@@ -418,11 +417,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
#define mmc_card_present(c) ((c)->state & MMC_STATE_PRESENT)
#define mmc_card_readonly(c) ((c)->state & MMC_STATE_READONLY)
-#define mmc_card_highspeed(c) ((c)->state & MMC_STATE_HIGHSPEED)
-#define mmc_card_hs200(c) ((c)->state & MMC_STATE_HIGHSPEED_200)
#define mmc_card_blockaddr(c) ((c)->state & MMC_STATE_BLOCKADDR)
-#define mmc_card_ddr_mode(c) ((c)->state & MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_uhs(c) ((c)->state & MMC_STATE_ULTRAHIGHSPEED)
#define mmc_card_ext_capacity(c) ((c)->state & MMC_CARD_SDXC)
#define mmc_card_removed(c) ((c) && ((c)->state & MMC_CARD_REMOVED))
#define mmc_card_doing_bkops(c) ((c)->state & MMC_STATE_DOING_BKOPS)
@@ -430,11 +425,7 @@ static inline void __maybe_unused remove_quirk(struct mmc_card *card, int data)
#define mmc_card_set_present(c) ((c)->state |= MMC_STATE_PRESENT)
#define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
-#define mmc_card_set_highspeed(c) ((c)->state |= MMC_STATE_HIGHSPEED)
-#define mmc_card_set_hs200(c) ((c)->state |= MMC_STATE_HIGHSPEED_200)
#define mmc_card_set_blockaddr(c) ((c)->state |= MMC_STATE_BLOCKADDR)
-#define mmc_card_set_ddr_mode(c) ((c)->state |= MMC_STATE_HIGHSPEED_DDR)
-#define mmc_card_set_uhs(c) ((c)->state |= MMC_STATE_ULTRAHIGHSPEED)
#define mmc_card_set_ext_capacity(c) ((c)->state |= MMC_CARD_SDXC)
#define mmc_card_set_removed(c) ((c)->state |= MMC_CARD_REMOVED)
#define mmc_card_set_doing_bkops(c) ((c)->state |= MMC_STATE_DOING_BKOPS)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 6ce7d2cd3c7a..babaea93bca6 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -248,20 +248,6 @@ struct dw_mci_board {
/* delay in mS before detecting cards after interrupt */
u32 detect_delay_ms;
- int (*init)(u32 slot_id, irq_handler_t , void *);
- int (*get_ro)(u32 slot_id);
- int (*get_cd)(u32 slot_id);
- int (*get_ocr)(u32 slot_id);
- int (*get_bus_wd)(u32 slot_id);
- /*
- * Enable power to selected slot and set voltage to desired level.
- * Voltage levels are specified using MMC_VDD_xxx defines defined
- * in linux/mmc/host.h file.
- */
- void (*setpower)(u32 slot_id, u32 volt);
- void (*exit)(u32 slot_id);
- void (*select_slot)(u32 slot_id);
-
struct dw_mci_dma_ops *dma_ops;
struct dma_pdata *data;
struct block_settings *blk_settings;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index cb61ea4d6945..7960424d0bc0 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -17,6 +17,7 @@
#include <linux/fault-inject.h>
#include <linux/mmc/core.h>
+#include <linux/mmc/card.h>
#include <linux/mmc/pm.h>
struct mmc_ios {
@@ -58,13 +59,9 @@ struct mmc_ios {
#define MMC_TIMING_UHS_SDR50 5
#define MMC_TIMING_UHS_SDR104 6
#define MMC_TIMING_UHS_DDR50 7
-#define MMC_TIMING_MMC_HS200 8
-
-#define MMC_SDR_MODE 0
-#define MMC_1_2V_DDR_MODE 1
-#define MMC_1_8V_DDR_MODE 2
-#define MMC_1_2V_SDR_MODE 3
-#define MMC_1_8V_SDR_MODE 4
+#define MMC_TIMING_MMC_DDR52 8
+#define MMC_TIMING_MMC_HS200 9
+#define MMC_TIMING_MMC_HS400 10
unsigned char signal_voltage; /* signalling voltage (1.8V or 3.3V) */
@@ -136,6 +133,9 @@ struct mmc_host_ops {
/* The tuning command opcode value is different for SD and eMMC cards */
int (*execute_tuning)(struct mmc_host *host, u32 opcode);
+
+ /* Prepare HS400 target operating frequency depending host driver */
+ int (*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
int (*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
void (*hw_reset)(struct mmc_host *host);
void (*card_event)(struct mmc_host *host);
@@ -278,6 +278,11 @@ struct mmc_host {
#define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \
MMC_CAP2_PACKED_WR)
#define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */
+#define MMC_CAP2_HS400_1_8V (1 << 15) /* Can support HS400 1.8V */
+#define MMC_CAP2_HS400_1_2V (1 << 16) /* Can support HS400 1.2V */
+#define MMC_CAP2_HS400 (MMC_CAP2_HS400_1_8V | \
+ MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
mmc_pm_flag_t pm_caps; /* supported pm features */
@@ -318,6 +323,8 @@ struct mmc_host {
int rescan_disable; /* disable card detection */
int rescan_entered; /* used with nonremovable devices */
+ bool trigger_card_event; /* card_event necessary */
+
struct mmc_card *card; /* device attached to this host */
wait_queue_head_t wq;
@@ -391,12 +398,13 @@ static inline void mmc_signal_sdio_irq(struct mmc_host *host)
wake_up_process(host->sdio_irq_thread);
}
+void sdio_run_irqs(struct mmc_host *host);
+
#ifdef CONFIG_REGULATOR
int mmc_regulator_get_ocrmask(struct regulator *supply);
int mmc_regulator_set_ocr(struct mmc_host *mmc,
struct regulator *supply,
unsigned short vdd_bit);
-int mmc_regulator_get_supply(struct mmc_host *mmc);
#else
static inline int mmc_regulator_get_ocrmask(struct regulator *supply)
{
@@ -409,13 +417,10 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc,
{
return 0;
}
-
-static inline int mmc_regulator_get_supply(struct mmc_host *mmc)
-{
- return 0;
-}
#endif
+int mmc_regulator_get_supply(struct mmc_host *mmc);
+
int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *);
static inline int mmc_card_is_removable(struct mmc_host *host)
@@ -475,4 +480,32 @@ static inline unsigned int mmc_host_clk_rate(struct mmc_host *host)
return host->ios.clock;
}
#endif
+
+static inline int mmc_card_hs(struct mmc_card *card)
+{
+ return card->host->ios.timing == MMC_TIMING_SD_HS ||
+ card->host->ios.timing == MMC_TIMING_MMC_HS;
+}
+
+static inline int mmc_card_uhs(struct mmc_card *card)
+{
+ return card->host->ios.timing >= MMC_TIMING_UHS_SDR12 &&
+ card->host->ios.timing <= MMC_TIMING_UHS_DDR50;
+}
+
+static inline bool mmc_card_hs200(struct mmc_card *card)
+{
+ return card->host->ios.timing == MMC_TIMING_MMC_HS200;
+}
+
+static inline bool mmc_card_ddr52(struct mmc_card *card)
+{
+ return card->host->ios.timing == MMC_TIMING_MMC_DDR52;
+}
+
+static inline bool mmc_card_hs400(struct mmc_card *card)
+{
+ return card->host->ios.timing == MMC_TIMING_MMC_HS400;
+}
+
#endif /* LINUX_MMC_HOST_H */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 50bcde3677ca..64ec963ed347 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -325,6 +325,7 @@ struct _mmc_csd {
#define EXT_CSD_POWER_OFF_LONG_TIME 247 /* RO */
#define EXT_CSD_GENERIC_CMD6_TIME 248 /* RO */
#define EXT_CSD_CACHE_SIZE 249 /* RO, 4 bytes */
+#define EXT_CSD_PWR_CL_DDR_200_360 253 /* RO */
#define EXT_CSD_TAG_UNIT_SIZE 498 /* RO */
#define EXT_CSD_DATA_TAG_SUPPORT 499 /* RO */
#define EXT_CSD_MAX_PACKED_WRITES 500 /* RO */
@@ -354,18 +355,25 @@ struct _mmc_csd {
#define EXT_CSD_CMD_SET_SECURE (1<<1)
#define EXT_CSD_CMD_SET_CPSECURE (1<<2)
-#define EXT_CSD_CARD_TYPE_26 (1<<0) /* Card can run at 26MHz */
-#define EXT_CSD_CARD_TYPE_52 (1<<1) /* Card can run at 52MHz */
-#define EXT_CSD_CARD_TYPE_MASK 0x3F /* Mask out reserved bits */
+#define EXT_CSD_CARD_TYPE_HS_26 (1<<0) /* Card can run at 26MHz */
+#define EXT_CSD_CARD_TYPE_HS_52 (1<<1) /* Card can run at 52MHz */
+#define EXT_CSD_CARD_TYPE_HS (EXT_CSD_CARD_TYPE_HS_26 | \
+ EXT_CSD_CARD_TYPE_HS_52)
#define EXT_CSD_CARD_TYPE_DDR_1_8V (1<<2) /* Card can run at 52MHz */
/* DDR mode @1.8V or 3V I/O */
#define EXT_CSD_CARD_TYPE_DDR_1_2V (1<<3) /* Card can run at 52MHz */
/* DDR mode @1.2V I/O */
#define EXT_CSD_CARD_TYPE_DDR_52 (EXT_CSD_CARD_TYPE_DDR_1_8V \
| EXT_CSD_CARD_TYPE_DDR_1_2V)
-#define EXT_CSD_CARD_TYPE_SDR_1_8V (1<<4) /* Card can run at 200MHz */
-#define EXT_CSD_CARD_TYPE_SDR_1_2V (1<<5) /* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_8V (1<<4) /* Card can run at 200MHz */
+#define EXT_CSD_CARD_TYPE_HS200_1_2V (1<<5) /* Card can run at 200MHz */
/* SDR mode @1.2V I/O */
+#define EXT_CSD_CARD_TYPE_HS200 (EXT_CSD_CARD_TYPE_HS200_1_8V | \
+ EXT_CSD_CARD_TYPE_HS200_1_2V)
+#define EXT_CSD_CARD_TYPE_HS400_1_8V (1<<6) /* Card can run at 200MHz DDR, 1.8V */
+#define EXT_CSD_CARD_TYPE_HS400_1_2V (1<<7) /* Card can run at 200MHz DDR, 1.2V */
+#define EXT_CSD_CARD_TYPE_HS400 (EXT_CSD_CARD_TYPE_HS400_1_8V | \
+ EXT_CSD_CARD_TYPE_HS400_1_2V)
#define EXT_CSD_BUS_WIDTH_1 0 /* Card is in 1 bit mode */
#define EXT_CSD_BUS_WIDTH_4 1 /* Card is in 4 bit mode */
@@ -373,6 +381,11 @@ struct _mmc_csd {
#define EXT_CSD_DDR_BUS_WIDTH_4 5 /* Card is in 4 bit DDR mode */
#define EXT_CSD_DDR_BUS_WIDTH_8 6 /* Card is in 8 bit DDR mode */
+#define EXT_CSD_TIMING_BC 0 /* Backwards compatility */
+#define EXT_CSD_TIMING_HS 1 /* High speed */
+#define EXT_CSD_TIMING_HS200 2 /* HS200 */
+#define EXT_CSD_TIMING_HS400 3 /* HS400 */
+
#define EXT_CSD_SEC_ER_EN BIT(0)
#define EXT_CSD_SEC_BD_BLK_EN BIT(2)
#define EXT_CSD_SEC_GB_CL_EN BIT(4)
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 7be12b883485..08abe9941884 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -57,12 +57,8 @@ struct sdhci_host {
#define SDHCI_QUIRK_BROKEN_CARD_DETECTION (1<<15)
/* Controller reports inverted write-protect state */
#define SDHCI_QUIRK_INVERTED_WRITE_PROTECT (1<<16)
-/* Controller has nonstandard clock management */
-#define SDHCI_QUIRK_NONSTANDARD_CLOCK (1<<17)
/* Controller does not like fast PIO transfers */
#define SDHCI_QUIRK_PIO_NEEDS_DELAY (1<<18)
-/* Controller losing signal/interrupt enable states after reset */
-#define SDHCI_QUIRK_RESTORE_IRQS_AFTER_RESET (1<<19)
/* Controller has to be forced to use block size of 2048 bytes */
#define SDHCI_QUIRK_FORCE_BLK_SZ_2048 (1<<20)
/* Controller cannot do multi-block transfers */
@@ -147,6 +143,7 @@ struct sdhci_host {
bool runtime_suspended; /* Host is runtime suspended */
bool bus_on; /* Bus power prevents runtime suspend */
+ bool preset_enabled; /* Preset is enabled */
struct mmc_request *mrq; /* Current request */
struct mmc_command *cmd; /* Current command */
@@ -164,8 +161,7 @@ struct sdhci_host {
dma_addr_t adma_addr; /* Mapped ADMA descr. table */
dma_addr_t align_addr; /* Mapped bounce buffer */
- struct tasklet_struct card_tasklet; /* Tasklet structures */
- struct tasklet_struct finish_tasklet;
+ struct tasklet_struct finish_tasklet; /* Tasklet structures */
struct timer_list timer; /* Timer for timeouts */
@@ -177,6 +173,13 @@ struct sdhci_host {
unsigned int ocr_avail_mmc;
u32 ocr_mask; /* available voltages */
+ unsigned timing; /* Current timing */
+
+ u32 thread_isr;
+
+ /* cached registers */
+ u32 ier;
+
wait_queue_head_t buf_ready_int; /* Waitqueue for Buffer Read Ready interrupt */
unsigned int tuning_done; /* Condition flag set when CMD19 succeeds */
diff --git a/include/linux/nfs.h b/include/linux/nfs.h
index 3e794c12e90a..610af5155ef2 100644
--- a/include/linux/nfs.h
+++ b/include/linux/nfs.h
@@ -46,6 +46,9 @@ static inline void nfs_copy_fh(struct nfs_fh *target, const struct nfs_fh *sourc
enum nfs3_stable_how {
NFS_UNSTABLE = 0,
NFS_DATA_SYNC = 1,
- NFS_FILE_SYNC = 2
+ NFS_FILE_SYNC = 2,
+
+ /* used by direct.c to mark verf as invalid */
+ NFS_INVALID_STABLE_HOW = -1
};
#endif /* _LINUX_NFS_H */
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 12c2cb947df5..a1e3064a8d99 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -399,8 +399,6 @@ enum lock_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
-#define FATTR4_WORD2_CHANGE_SECURITY_LABEL \
- (1UL << 17)
/* MDS threshold bitmap bits */
#define THRESHOLD_RD (1UL << 0)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fa6918b0f829..919576b8e2cf 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -520,7 +520,6 @@ extern int nfs_writepage(struct page *page, struct writeback_control *wbc);
extern int nfs_writepages(struct address_space *, struct writeback_control *);
extern int nfs_flush_incompatible(struct file *file, struct page *page);
extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
-extern void nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
/*
* Try to write back everything synchronously (but check the
@@ -553,7 +552,6 @@ nfs_have_writebacks(struct inode *inode)
extern int nfs_readpage(struct file *, struct page *);
extern int nfs_readpages(struct file *, struct address_space *,
struct list_head *, unsigned);
-extern int nfs_readpage_result(struct rpc_task *, struct nfs_read_data *);
extern int nfs_readpage_async(struct nfs_open_context *, struct inode *,
struct page *);
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 92ce5783b707..7d9096d95d4a 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -22,12 +22,17 @@
* Valid flags for a dirty buffer
*/
enum {
- PG_BUSY = 0,
- PG_MAPPED,
- PG_CLEAN,
- PG_NEED_COMMIT,
- PG_NEED_RESCHED,
- PG_COMMIT_TO_DS,
+ PG_BUSY = 0, /* nfs_{un}lock_request */
+ PG_MAPPED, /* page private set for buffered io */
+ PG_CLEAN, /* write succeeded */
+ PG_COMMIT_TO_DS, /* used by pnfs layouts */
+ PG_INODE_REF, /* extra ref held by inode (head req only) */
+ PG_HEADLOCK, /* page group lock of wb_head */
+ PG_TEARDOWN, /* page group sync for destroy */
+ PG_UNLOCKPAGE, /* page group sync bit in read path */
+ PG_UPTODATE, /* page group sync bit in read path */
+ PG_WB_END, /* page group sync bit in write path */
+ PG_REMOVE, /* page group sync bit in write path */
};
struct nfs_inode;
@@ -43,15 +48,29 @@ struct nfs_page {
struct kref wb_kref; /* reference count */
unsigned long wb_flags;
struct nfs_write_verifier wb_verf; /* Commit cookie */
+ struct nfs_page *wb_this_page; /* list of reqs for this page */
+ struct nfs_page *wb_head; /* head pointer for req list */
};
struct nfs_pageio_descriptor;
struct nfs_pageio_ops {
void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
- bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
+ size_t (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *,
+ struct nfs_page *);
int (*pg_doio)(struct nfs_pageio_descriptor *);
};
+struct nfs_rw_ops {
+ const fmode_t rw_mode;
+ struct nfs_rw_header *(*rw_alloc_header)(void);
+ void (*rw_free_header)(struct nfs_rw_header *);
+ void (*rw_release)(struct nfs_pgio_data *);
+ int (*rw_done)(struct rpc_task *, struct nfs_pgio_data *, struct inode *);
+ void (*rw_result)(struct rpc_task *, struct nfs_pgio_data *);
+ void (*rw_initiate)(struct nfs_pgio_data *, struct rpc_message *,
+ struct rpc_task_setup *, int);
+};
+
struct nfs_pageio_descriptor {
struct list_head pg_list;
unsigned long pg_bytes_written;
@@ -63,6 +82,7 @@ struct nfs_pageio_descriptor {
struct inode *pg_inode;
const struct nfs_pageio_ops *pg_ops;
+ const struct nfs_rw_ops *pg_rw_ops;
int pg_ioflags;
int pg_error;
const struct rpc_call_ops *pg_rpc_callops;
@@ -75,29 +95,33 @@ struct nfs_pageio_descriptor {
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
- struct inode *inode,
struct page *page,
+ struct nfs_page *last,
unsigned int offset,
unsigned int count);
-extern void nfs_release_request(struct nfs_page *req);
+extern void nfs_release_request(struct nfs_page *);
extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
struct inode *inode,
const struct nfs_pageio_ops *pg_ops,
const struct nfs_pgio_completion_ops *compl_ops,
+ const struct nfs_rw_ops *rw_ops,
size_t bsize,
int how);
extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
struct nfs_page *);
extern void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
extern void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
-extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
struct nfs_page *prev,
struct nfs_page *req);
extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
-extern void nfs_unlock_and_release_request(struct nfs_page *req);
+extern void nfs_unlock_and_release_request(struct nfs_page *);
+extern void nfs_page_group_lock(struct nfs_page *);
+extern void nfs_page_group_unlock(struct nfs_page *);
+extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
/*
* Lock the page of an asynchronous request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 6fb5b2335b59..9a1396e70310 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -489,31 +489,21 @@ struct nfs4_delegreturnres {
};
/*
- * Arguments to the read call.
+ * Arguments to the write call.
*/
-struct nfs_readargs {
- struct nfs4_sequence_args seq_args;
- struct nfs_fh * fh;
- struct nfs_open_context *context;
- struct nfs_lock_context *lock_context;
- nfs4_stateid stateid;
- __u64 offset;
- __u32 count;
- unsigned int pgbase;
- struct page ** pages;
+struct nfs_write_verifier {
+ char data[8];
};
-struct nfs_readres {
- struct nfs4_sequence_res seq_res;
- struct nfs_fattr * fattr;
- __u32 count;
- int eof;
+struct nfs_writeverf {
+ struct nfs_write_verifier verifier;
+ enum nfs3_stable_how committed;
};
/*
- * Arguments to the write call.
+ * Arguments shared by the read and write call.
*/
-struct nfs_writeargs {
+struct nfs_pgio_args {
struct nfs4_sequence_args seq_args;
struct nfs_fh * fh;
struct nfs_open_context *context;
@@ -521,27 +511,20 @@ struct nfs_writeargs {
nfs4_stateid stateid;
__u64 offset;
__u32 count;
- enum nfs3_stable_how stable;
unsigned int pgbase;
struct page ** pages;
- const u32 * bitmask;
-};
-
-struct nfs_write_verifier {
- char data[8];
+ const u32 * bitmask; /* used by write */
+ enum nfs3_stable_how stable; /* used by write */
};
-struct nfs_writeverf {
- struct nfs_write_verifier verifier;
- enum nfs3_stable_how committed;
-};
-
-struct nfs_writeres {
+struct nfs_pgio_res {
struct nfs4_sequence_res seq_res;
struct nfs_fattr * fattr;
- struct nfs_writeverf * verf;
__u32 count;
- const struct nfs_server *server;
+ int eof; /* used by read */
+ struct nfs_writeverf * verf; /* used by write */
+ const struct nfs_server *server; /* used by write */
+
};
/*
@@ -1129,6 +1112,7 @@ struct pnfs_commit_bucket {
struct list_head committing;
struct pnfs_layout_segment *wlseg;
struct pnfs_layout_segment *clseg;
+ struct nfs_writeverf direct_verf;
};
struct pnfs_ds_commit_info {
@@ -1264,20 +1248,6 @@ struct nfs_page_array {
struct page *page_array[NFS_PAGEVEC_SIZE];
};
-struct nfs_read_data {
- struct nfs_pgio_header *header;
- struct list_head list;
- struct rpc_task task;
- struct nfs_fattr fattr; /* fattr storage */
- struct nfs_readargs args;
- struct nfs_readres res;
- unsigned long timestamp; /* For lease renewal */
- int (*read_done_cb) (struct rpc_task *task, struct nfs_read_data *data);
- __u64 mds_offset;
- struct nfs_page_array pages;
- struct nfs_client *ds_clp; /* pNFS data server */
-};
-
/* used as flag bits in nfs_pgio_header */
enum {
NFS_IOHDR_ERROR = 0,
@@ -1287,19 +1257,22 @@ enum {
NFS_IOHDR_NEED_RESCHED,
};
+struct nfs_pgio_data;
+
struct nfs_pgio_header {
struct inode *inode;
struct rpc_cred *cred;
struct list_head pages;
- struct list_head rpc_list;
+ struct nfs_pgio_data *data;
atomic_t refcnt;
struct nfs_page *req;
- struct nfs_writeverf *verf;
+ struct nfs_writeverf verf; /* Used for writes */
struct pnfs_layout_segment *lseg;
loff_t io_start;
const struct rpc_call_ops *mds_ops;
void (*release) (struct nfs_pgio_header *hdr);
const struct nfs_pgio_completion_ops *completion_ops;
+ const struct nfs_rw_ops *rw_ops;
struct nfs_direct_req *dreq;
void *layout_private;
spinlock_t lock;
@@ -1310,30 +1283,24 @@ struct nfs_pgio_header {
unsigned long flags;
};
-struct nfs_read_header {
- struct nfs_pgio_header header;
- struct nfs_read_data rpc_data;
-};
-
-struct nfs_write_data {
+struct nfs_pgio_data {
struct nfs_pgio_header *header;
- struct list_head list;
struct rpc_task task;
struct nfs_fattr fattr;
- struct nfs_writeverf verf;
- struct nfs_writeargs args; /* argument struct */
- struct nfs_writeres res; /* result struct */
+ struct nfs_writeverf verf; /* Used for writes */
+ struct nfs_pgio_args args; /* argument struct */
+ struct nfs_pgio_res res; /* result struct */
unsigned long timestamp; /* For lease renewal */
- int (*write_done_cb) (struct rpc_task *task, struct nfs_write_data *data);
+ int (*pgio_done_cb) (struct rpc_task *task, struct nfs_pgio_data *data);
__u64 mds_offset; /* Filelayout dense stripe */
struct nfs_page_array pages;
struct nfs_client *ds_clp; /* pNFS data server */
+ int ds_idx; /* ds index if ds_clp is set */
};
-struct nfs_write_header {
+struct nfs_rw_header {
struct nfs_pgio_header header;
- struct nfs_write_data rpc_data;
- struct nfs_writeverf verf;
+ struct nfs_pgio_data rpc_data;
};
struct nfs_mds_commit_info {
@@ -1465,16 +1432,11 @@ struct nfs_rpc_ops {
struct nfs_pathconf *);
int (*set_capabilities)(struct nfs_server *, struct nfs_fh *);
int (*decode_dirent)(struct xdr_stream *, struct nfs_entry *, int);
- void (*read_setup) (struct nfs_read_data *, struct rpc_message *);
- void (*read_pageio_init)(struct nfs_pageio_descriptor *, struct inode *,
- const struct nfs_pgio_completion_ops *);
- int (*read_rpc_prepare)(struct rpc_task *, struct nfs_read_data *);
- int (*read_done) (struct rpc_task *, struct nfs_read_data *);
- void (*write_setup) (struct nfs_write_data *, struct rpc_message *);
- void (*write_pageio_init)(struct nfs_pageio_descriptor *, struct inode *, int,
- const struct nfs_pgio_completion_ops *);
- int (*write_rpc_prepare)(struct rpc_task *, struct nfs_write_data *);
- int (*write_done) (struct rpc_task *, struct nfs_write_data *);
+ int (*pgio_rpc_prepare)(struct rpc_task *, struct nfs_pgio_data *);
+ void (*read_setup) (struct nfs_pgio_data *, struct rpc_message *);
+ int (*read_done) (struct rpc_task *, struct nfs_pgio_data *);
+ void (*write_setup) (struct nfs_pgio_data *, struct rpc_message *);
+ int (*write_done) (struct rpc_task *, struct nfs_pgio_data *);
void (*commit_setup) (struct nfs_commit_data *, struct rpc_message *);
void (*commit_rpc_prepare)(struct rpc_task *, struct nfs_commit_data *);
int (*commit_done) (struct rpc_task *, struct nfs_commit_data *);
diff --git a/include/linux/nfsd/debug.h b/include/linux/nfsd/debug.h
deleted file mode 100644
index 19ef8375b577..000000000000
--- a/include/linux/nfsd/debug.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/*
- * linux/include/linux/nfsd/debug.h
- *
- * Debugging-related stuff for nfsd
- *
- * Copyright (C) 1995 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef LINUX_NFSD_DEBUG_H
-#define LINUX_NFSD_DEBUG_H
-
-#include <uapi/linux/nfsd/debug.h>
-
-# undef ifdebug
-# ifdef NFSD_DEBUG
-# define ifdebug(flag) if (nfsd_debug & NFSDDBG_##flag)
-# else
-# define ifdebug(flag) if (0)
-# endif
-#endif /* LINUX_NFSD_DEBUG_H */
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
deleted file mode 100644
index a93593f1fa4e..000000000000
--- a/include/linux/nfsd/nfsfh.h
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * include/linux/nfsd/nfsfh.h
- *
- * This file describes the layout of the file handles as passed
- * over the wire.
- *
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
- * Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
- */
-#ifndef _LINUX_NFSD_FH_H
-#define _LINUX_NFSD_FH_H
-
-# include <linux/sunrpc/svc.h>
-#include <uapi/linux/nfsd/nfsfh.h>
-
-static inline __u32 ino_t_to_u32(ino_t ino)
-{
- return (__u32) ino;
-}
-
-static inline ino_t u32_to_ino_t(__u32 uino)
-{
- return (ino_t) uino;
-}
-
-/*
- * This is the internal representation of an NFS handle used in knfsd.
- * pre_mtime/post_version will be used to support wcc_attr's in NFSv3.
- */
-typedef struct svc_fh {
- struct knfsd_fh fh_handle; /* FH data */
- struct dentry * fh_dentry; /* validated dentry */
- struct svc_export * fh_export; /* export pointer */
- int fh_maxsize; /* max size for fh_handle */
-
- unsigned char fh_locked; /* inode locked by us */
- unsigned char fh_want_write; /* remount protection taken */
-
-#ifdef CONFIG_NFSD_V3
- unsigned char fh_post_saved; /* post-op attrs saved */
- unsigned char fh_pre_saved; /* pre-op attrs saved */
-
- /* Pre-op attributes saved during fh_lock */
- __u64 fh_pre_size; /* size before operation */
- struct timespec fh_pre_mtime; /* mtime before oper */
- struct timespec fh_pre_ctime; /* ctime before oper */
- /*
- * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
- * to find out if it is valid.
- */
- u64 fh_pre_change;
-
- /* Post-op attributes saved in fh_unlock */
- struct kstat fh_post_attr; /* full attrs after operation */
- u64 fh_post_change; /* nfsv4 change; see above */
-#endif /* CONFIG_NFSD_V3 */
-
-} svc_fh;
-
-#endif /* _LINUX_NFSD_FH_H */
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index c29a6dee6bec..88e6ea4a5d36 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -1,23 +1,6 @@
-/*
- * OMAP DMA Engine support
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
#ifndef __LINUX_OMAP_DMA_H
#define __LINUX_OMAP_DMA_H
-
-struct dma_chan;
-
-#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
-bool omap_dma_filter_fn(struct dma_chan *, void *);
-#else
-static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
-{
- return false;
-}
-#endif
+#include <linux/omap-dmaengine.h>
/*
* Legacy OMAP DMA handling defines and functions
diff --git a/include/linux/omap-dmaengine.h b/include/linux/omap-dmaengine.h
new file mode 100644
index 000000000000..8e6906c72e90
--- /dev/null
+++ b/include/linux/omap-dmaengine.h
@@ -0,0 +1,21 @@
+/*
+ * OMAP DMA Engine support
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __LINUX_OMAP_DMAENGINE_H
+#define __LINUX_OMAP_DMAENGINE_H
+
+struct dma_chan;
+
+#if defined(CONFIG_DMA_OMAP) || (defined(CONFIG_DMA_OMAP_MODULE) && defined(MODULE))
+bool omap_dma_filter_fn(struct dma_chan *, void *);
+#else
+static inline bool omap_dma_filter_fn(struct dma_chan *c, void *d)
+{
+ return false;
+}
+#endif
+#endif /* __LINUX_OMAP_DMAENGINE_H */
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 04e763221246..1bc7cd05b22e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -244,6 +244,7 @@ struct svc_rqst {
struct page * rq_pages[RPCSVC_MAXPAGES];
struct page * *rq_respages; /* points into rq_pages */
struct page * *rq_next_page; /* next reply page to use */
+ struct page * *rq_page_end; /* one past the last page */
struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */
@@ -254,11 +255,15 @@ struct svc_rqst {
u32 rq_prot; /* IP protocol */
unsigned short
rq_secure : 1; /* secure port */
+ unsigned short rq_local : 1; /* local request */
void * rq_argp; /* decoded arguments */
void * rq_resp; /* xdr'd results */
void * rq_auth_data; /* flavor-specific data */
-
+ int rq_auth_slack; /* extra space xdr code
+ * should leave in head
+ * for krb5i, krb5p.
+ */
int rq_reserved; /* space on socket outq
* reserved for this request
*/
@@ -454,11 +459,7 @@ char * svc_print_addr(struct svc_rqst *, char *, size_t);
*/
static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
{
- int added_space = 0;
-
- if (rqstp->rq_authop->flavour)
- added_space = RPC_MAX_AUTH_SIZE;
- svc_reserve(rqstp, space + added_space);
+ svc_reserve(rqstp, space + rqstp->rq_auth_slack);
}
#endif /* SUNRPC_SVC_H */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 0b8e3e6bdacf..5cf99a016368 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -115,14 +115,13 @@ struct svc_rdma_fastreg_mr {
struct list_head frmr_list;
};
struct svc_rdma_req_map {
- struct svc_rdma_fastreg_mr *frmr;
unsigned long count;
union {
struct kvec sge[RPCSVC_MAXPAGES];
struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES];
+ unsigned long lkey[RPCSVC_MAXPAGES];
};
};
-#define RDMACTXT_F_FAST_UNREG 1
#define RDMACTXT_F_LAST_CTXT 2
#define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index b05963f09ebf..7235040a19b2 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -24,6 +24,7 @@ struct svc_xprt_ops {
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
+ int (*xpo_secure_port)(struct svc_rqst *);
};
struct svc_xprt_class {
@@ -63,6 +64,7 @@ struct svc_xprt {
#define XPT_DETACHED 10 /* detached from tempsocks list */
#define XPT_LISTENER 11 /* listening endpoint */
#define XPT_CACHE_AUTH 12 /* cache auth info */
+#define XPT_LOCAL 13 /* connection from loopback interface */
struct svc_serv *xpt_server; /* service for transport */
atomic_t xpt_reserved; /* space on outq that is rsvd */
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 15f9204ee70b..70c6b92e15a7 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -215,6 +215,9 @@ typedef int (*kxdrdproc_t)(void *rqstp, struct xdr_stream *xdr, void *obj);
extern void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p);
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
+extern void xdr_commit_encode(struct xdr_stream *xdr);
+extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
+extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
unsigned int base, unsigned int len);
extern unsigned int xdr_stream_pos(const struct xdr_stream *xdr);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 3876f0f1dfd3..fcbfe8783243 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -24,6 +24,12 @@
#define RPC_MAX_SLOT_TABLE_LIMIT (65536U)
#define RPC_MAX_SLOT_TABLE RPC_MAX_SLOT_TABLE_LIMIT
+#define RPC_CWNDSHIFT (8U)
+#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
+#define RPC_INITCWND RPC_CWNDSCALE
+#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
+
/*
* This describes a timeout strategy
*/
diff --git a/include/uapi/linux/nfsd/nfsfh.h b/include/uapi/linux/nfsd/nfsfh.h
index 616e3b396476..20391235d088 100644
--- a/include/uapi/linux/nfsd/nfsfh.h
+++ b/include/uapi/linux/nfsd/nfsfh.h
@@ -1,13 +1,7 @@
/*
- * include/linux/nfsd/nfsfh.h
- *
* This file describes the layout of the file handles as passed
* over the wire.
*
- * Earlier versions of knfsd used to sign file handles using keyed MD5
- * or SHA. I've removed this code, because it doesn't give you more
- * security than blocking external access to port 2049 on your firewall.
- *
* Copyright (C) 1995, 1996, 1997 Olaf Kirch <okir@monad.swb.de>
*/
@@ -37,7 +31,7 @@ struct nfs_fhbase_old {
};
/*
- * This is the new flexible, extensible style NFSv2/v3 file handle.
+ * This is the new flexible, extensible style NFSv2/v3/v4 file handle.
* by Neil Brown <neilb@cse.unsw.edu.au> - March 2000
*
* The file handle starts with a sequence of four-byte words.
@@ -47,14 +41,7 @@ struct nfs_fhbase_old {
*
* All four-byte values are in host-byte-order.
*
- * The auth_type field specifies how the filehandle can be authenticated
- * This might allow a file to be confirmed to be in a writable part of a
- * filetree without checking the path from it up to the root.
- * Current values:
- * 0 - No authentication. fb_auth is 0 bytes long
- * Possible future values:
- * 1 - 4 bytes taken from MD5 hash of the remainer of the file handle
- * prefixed by a secret and with the important export flags.
+ * The auth_type field is deprecated and must be set to 0.
*
* The fsid_type identifies how the filesystem (or export point) is
* encoded.
@@ -71,14 +58,9 @@ struct nfs_fhbase_old {
* 7 - 8 byte inode number and 16 byte uuid
*
* The fileid_type identified how the file within the filesystem is encoded.
- * This is (will be) passed to, and set by, the underlying filesystem if it supports
- * filehandle operations. The filesystem must not use the value '0' or '0xff' and may
- * only use the values 1 and 2 as defined below:
- * Current values:
- * 0 - The root, or export point, of the filesystem. fb_fileid is 0 bytes.
- * 1 - 32bit inode number, 32 bit generation number.
- * 2 - 32bit inode number, 32 bit generation number, 32 bit parent directory inode number.
- *
+ * The values for this field are filesystem specific, exccept that
+ * filesystems must not use the values '0' or '0xff'. 'See enum fid_type'
+ * in include/linux/exportfs.h for currently registered values.
*/
struct nfs_fhbase_new {
__u8 fb_version; /* == 1, even => nfs_fhbase_old */
@@ -114,9 +96,9 @@ struct knfsd_fh {
#define fh_fsid_type fh_base.fh_new.fb_fsid_type
#define fh_auth_type fh_base.fh_new.fb_auth_type
#define fh_fileid_type fh_base.fh_new.fb_fileid_type
-#define fh_auth fh_base.fh_new.fb_auth
#define fh_fsid fh_base.fh_new.fb_auth
-
+/* Do not use, provided for userspace compatiblity. */
+#define fh_auth fh_base.fh_new.fb_auth
#endif /* _UAPI_LINUX_NFSD_FH_H */
diff --git a/kernel/capability.c b/kernel/capability.c
index 84b2bbf443e7..a5cf13c018ce 100644
--- a/kernel/capability.c
+++ b/kernel/capability.c
@@ -424,23 +424,19 @@ bool capable(int cap)
EXPORT_SYMBOL(capable);
/**
- * inode_capable - Check superior capability over inode
+ * capable_wrt_inode_uidgid - Check nsown_capable and uid and gid mapped
* @inode: The inode in question
* @cap: The capability in question
*
- * Return true if the current task has the given superior capability
- * targeted at it's own user namespace and that the given inode is owned
- * by the current user namespace or a child namespace.
- *
- * Currently we check to see if an inode is owned by the current
- * user namespace by seeing if the inode's owner maps into the
- * current user namespace.
- *
+ * Return true if the current task has the given capability targeted at
+ * its own user namespace and that the given inode's uid and gid are
+ * mapped into the current user namespace.
*/
-bool inode_capable(const struct inode *inode, int cap)
+bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
{
struct user_namespace *ns = current_user_ns();
- return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid);
+ return ns_capable(ns, cap) && kuid_has_mapping(ns, inode->i_uid) &&
+ kgid_has_mapping(ns, inode->i_gid);
}
-EXPORT_SYMBOL(inode_capable);
+EXPORT_SYMBOL(capable_wrt_inode_uidgid);
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 27ce26240932..92d5ab99fbf3 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -218,10 +218,8 @@ static struct gss_api_mech *_gss_mech_get_by_pseudoflavor(u32 pseudoflavor)
spin_lock(&registered_mechs_lock);
list_for_each_entry(pos, &registered_mechs, gm_list) {
- if (!mech_supports_pseudoflavor(pos, pseudoflavor)) {
- module_put(pos->gm_owner);
+ if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
- }
if (try_module_get(pos->gm_owner))
gm = pos;
break;
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 0f73f4507746..4ce5eccec1f6 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1503,6 +1503,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
break;
case RPC_GSS_SVC_PRIVACY:
/* placeholders for length and seq. number: */
@@ -1511,6 +1512,7 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
gc->gc_seq, rsci->mechctx))
goto garbage_args;
+ rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
break;
default:
goto auth_err;
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index ae333c1845bb..066362141133 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -374,7 +374,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
}
return;
out:
- printk(KERN_ERR "nfsd: failed to unregister %s cache\n", cd->name);
+ printk(KERN_ERR "RPC: failed to unregister %s cache\n", cd->name);
}
EXPORT_SYMBOL_GPL(sunrpc_destroy_cache_detail);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 25578afe1548..c0365c14b858 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -832,7 +832,8 @@ static void rpc_async_schedule(struct work_struct *work)
* @size: requested byte size
*
* To prevent rpciod from hanging, this allocator never sleeps,
- * returning NULL if the request cannot be serviced immediately.
+ * returning NULL and suppressing warning if the request cannot be serviced
+ * immediately.
* The caller can arrange to sleep in a way that is safe for rpciod.
*
* Most requests are 'small' (under 2KiB) and can be serviced from a
@@ -845,7 +846,7 @@ static void rpc_async_schedule(struct work_struct *work)
void *rpc_malloc(struct rpc_task *task, size_t size)
{
struct rpc_buffer *buf;
- gfp_t gfp = GFP_NOWAIT;
+ gfp_t gfp = GFP_NOWAIT | __GFP_NOWARN;
if (RPC_IS_SWAPPER(task))
gfp |= __GFP_MEMALLOC;
diff --git a/net/sunrpc/sunrpc.h b/net/sunrpc/sunrpc.h
index 14c9f6d1c5ff..f2b7cb540e61 100644
--- a/net/sunrpc/sunrpc.h
+++ b/net/sunrpc/sunrpc.h
@@ -43,6 +43,19 @@ static inline int rpc_reply_expected(struct rpc_task *task)
(task->tk_msg.rpc_proc->p_decode != NULL);
}
+static inline int sock_is_loopback(struct sock *sk)
+{
+ struct dst_entry *dst;
+ int loopback = 0;
+ rcu_read_lock();
+ dst = rcu_dereference(sk->sk_dst_cache);
+ if (dst && dst->dev &&
+ (dst->dev->features & NETIF_F_LOOPBACK))
+ loopback = 1;
+ rcu_read_unlock();
+ return loopback;
+}
+
int svc_send_common(struct socket *sock, struct xdr_buf *xdr,
struct page *headpage, unsigned long headoffset,
struct page *tailpage, unsigned long tailoffset);
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 06c6ff0cb911..b4737fbdec13 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -597,6 +597,7 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
}
rqstp->rq_pages[i] = p;
}
+ rqstp->rq_page_end = &rqstp->rq_pages[i];
rqstp->rq_pages[i++] = NULL; /* this might be seen in nfs_read_actor */
/* Make arg->head point to first page and arg->pages point to rest */
@@ -730,6 +731,8 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
newxpt = xprt->xpt_ops->xpo_accept(xprt);
if (newxpt)
svc_add_new_temp_xprt(serv, newxpt);
+ else
+ module_put(xprt->xpt_class->xcl_owner);
} else if (xprt->xpt_ops->xpo_has_wspace(xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
@@ -793,7 +796,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- rqstp->rq_secure = svc_port_is_privileged(svc_addr(rqstp));
+ rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
if (serv->sv_stats)
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index 2af7b0cba43a..79c0f3459b5c 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -54,6 +54,8 @@ svc_authenticate(struct svc_rqst *rqstp, __be32 *authp)
}
spin_unlock(&authtab_lock);
+ rqstp->rq_auth_slack = 0;
+
rqstp->rq_authop = aops;
return aops->accept(rqstp, authp);
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 43bcb4699d69..b507cd327d9b 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -400,6 +400,12 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
release_sock(sock->sk);
#endif
}
+
+static int svc_sock_secure_port(struct svc_rqst *rqstp)
+{
+ return svc_port_is_privileged(svc_addr(rqstp));
+}
+
/*
* INET callback when data has been received on the socket.
*/
@@ -678,6 +684,7 @@ static struct svc_xprt_ops svc_udp_ops = {
.xpo_prep_reply_hdr = svc_udp_prep_reply_hdr,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_udp_class = {
@@ -842,8 +849,7 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
* tell us anything. For now just warn about unpriv connections.
*/
if (!svc_port_is_privileged(sin)) {
- dprintk(KERN_WARNING
- "%s: connect from unprivileged port: %s\n",
+ dprintk("%s: connect from unprivileged port: %s\n",
serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
}
@@ -867,6 +873,10 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
}
svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen);
+ if (sock_is_loopback(newsock->sk))
+ set_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
+ else
+ clear_bit(XPT_LOCAL, &newsvsk->sk_xprt.xpt_flags);
if (serv->sv_stats)
serv->sv_stats->nettcpconn++;
@@ -1112,6 +1122,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
+ rqstp->rq_local = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1234,6 +1245,7 @@ static struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_bc_class = {
@@ -1272,6 +1284,7 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_prep_reply_hdr = svc_tcp_prep_reply_hdr,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
+ .xpo_secure_port = svc_sock_secure_port,
};
static struct svc_xprt_class svc_tcp_class = {
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index dd97ba3c4456..23fb4e75e245 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -462,6 +462,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
+ xdr_set_scratch_buffer(xdr, NULL, 0);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -482,6 +483,73 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
+ * xdr_commit_encode - Ensure all data is written to buffer
+ * @xdr: pointer to xdr_stream
+ *
+ * We handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place; xdr_commit_encode does that copying.
+ *
+ * Normally the caller doesn't need to call this directly, as the
+ * following xdr_reserve_space will do it. But an explicit call may be
+ * required at the end of encoding, or any other time when the xdr_buf
+ * data might be read.
+ */
+void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ int shift = xdr->scratch.iov_len;
+ void *page;
+
+ if (shift == 0)
+ return;
+ page = page_address(*xdr->page_ptr);
+ memcpy(xdr->scratch.iov_base, page, shift);
+ memmove(page, page + shift, (void *)xdr->p - page);
+ xdr->scratch.iov_len = 0;
+}
+EXPORT_SYMBOL_GPL(xdr_commit_encode);
+
+__be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr, size_t nbytes)
+{
+ static __be32 *p;
+ int space_left;
+ int frag1bytes, frag2bytes;
+
+ if (nbytes > PAGE_SIZE)
+ return NULL; /* Bigger buffers require special handling */
+ if (xdr->buf->len + nbytes > xdr->buf->buflen)
+ return NULL; /* Sorry, we're totally out of space */
+ frag1bytes = (xdr->end - xdr->p) << 2;
+ frag2bytes = nbytes - frag1bytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += frag1bytes;
+ else
+ xdr->buf->page_len += frag1bytes;
+ xdr->page_ptr++;
+ xdr->iov = NULL;
+ /*
+ * If the last encode didn't end exactly on a page boundary, the
+ * next one will straddle boundaries. Encode into the next
+ * page, then copy it back later in xdr_commit_encode. We use
+ * the "scratch" iov to track any temporarily unused fragment of
+ * space at the end of the previous buffer:
+ */
+ xdr->scratch.iov_base = xdr->p;
+ xdr->scratch.iov_len = frag1bytes;
+ p = page_address(*xdr->page_ptr);
+ /*
+ * Note this is where the next encode will start after we've
+ * shifted this one back:
+ */
+ xdr->p = (void *)p + frag2bytes;
+ space_left = xdr->buf->buflen - xdr->buf->len;
+ xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ xdr->buf->page_len += frag2bytes;
+ xdr->buf->len += nbytes;
+ return p;
+}
+
+/**
* xdr_reserve_space - Reserve buffer space for sending
* @xdr: pointer to xdr_stream
* @nbytes: number of bytes to reserve
@@ -495,20 +563,122 @@ __be32 * xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes)
__be32 *p = xdr->p;
__be32 *q;
+ xdr_commit_encode(xdr);
/* align nbytes on the next 32-bit boundary */
nbytes += 3;
nbytes &= ~3;
q = p + (nbytes >> 2);
if (unlikely(q > xdr->end || q < p))
- return NULL;
+ return xdr_get_next_encode_buffer(xdr, nbytes);
xdr->p = q;
- xdr->iov->iov_len += nbytes;
+ if (xdr->iov)
+ xdr->iov->iov_len += nbytes;
+ else
+ xdr->buf->page_len += nbytes;
xdr->buf->len += nbytes;
return p;
}
EXPORT_SYMBOL_GPL(xdr_reserve_space);
/**
+ * xdr_truncate_encode - truncate an encode buffer
+ * @xdr: pointer to xdr_stream
+ * @len: new length of buffer
+ *
+ * Truncates the xdr stream, so that xdr->buf->len == len,
+ * and xdr->p points at offset len from the start of the buffer, and
+ * head, tail, and page lengths are adjusted to correspond.
+ *
+ * If this means moving xdr->p to a different buffer, we assume that
+ * that the end pointer should be set to the end of the current page,
+ * except in the case of the head buffer when we assume the head
+ * buffer's current length represents the end of the available buffer.
+ *
+ * This is *not* safe to use on a buffer that already has inlined page
+ * cache pages (as in a zero-copy server read reply), except for the
+ * simple case of truncating from one position in the tail to another.
+ *
+ */
+void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
+{
+ struct xdr_buf *buf = xdr->buf;
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ int fraglen;
+ int new, old;
+
+ if (len > buf->len) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+ xdr_commit_encode(xdr);
+
+ fraglen = min_t(int, buf->len - len, tail->iov_len);
+ tail->iov_len -= fraglen;
+ buf->len -= fraglen;
+ if (tail->iov_len && buf->len == len) {
+ xdr->p = tail->iov_base + tail->iov_len;
+ /* xdr->end, xdr->iov should be set already */
+ return;
+ }
+ WARN_ON_ONCE(fraglen);
+ fraglen = min_t(int, buf->len - len, buf->page_len);
+ buf->page_len -= fraglen;
+ buf->len -= fraglen;
+
+ new = buf->page_base + buf->page_len;
+ old = new + fraglen;
+ xdr->page_ptr -= (old >> PAGE_SHIFT) - (new >> PAGE_SHIFT);
+
+ if (buf->page_len && buf->len == len) {
+ xdr->p = page_address(*xdr->page_ptr);
+ xdr->end = (void *)xdr->p + PAGE_SIZE;
+ xdr->p = (void *)xdr->p + (new % PAGE_SIZE);
+ /* xdr->iov should already be NULL */
+ return;
+ }
+ if (fraglen) {
+ xdr->end = head->iov_base + head->iov_len;
+ xdr->page_ptr--;
+ }
+ /* (otherwise assume xdr->end is already set) */
+ head->iov_len = len;
+ buf->len = len;
+ xdr->p = head->iov_base + head->iov_len;
+ xdr->iov = buf->head;
+}
+EXPORT_SYMBOL(xdr_truncate_encode);
+
+/**
+ * xdr_restrict_buflen - decrease available buffer space
+ * @xdr: pointer to xdr_stream
+ * @newbuflen: new maximum number of bytes available
+ *
+ * Adjust our idea of how much space is available in the buffer.
+ * If we've already used too much space in the buffer, returns -1.
+ * If the available space is already smaller than newbuflen, returns 0
+ * and does nothing. Otherwise, adjusts xdr->buf->buflen to newbuflen
+ * and ensures xdr->end is set at most offset newbuflen from the start
+ * of the buffer.
+ */
+int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen)
+{
+ struct xdr_buf *buf = xdr->buf;
+ int left_in_this_buf = (void *)xdr->end - (void *)xdr->p;
+ int end_offset = buf->len + left_in_this_buf;
+
+ if (newbuflen < 0 || newbuflen < buf->len)
+ return -1;
+ if (newbuflen > buf->buflen)
+ return 0;
+ if (newbuflen < end_offset)
+ xdr->end = (void *)xdr->end + newbuflen - end_offset;
+ buf->buflen = newbuflen;
+ return 0;
+}
+EXPORT_SYMBOL(xdr_restrict_buflen);
+
+/**
* xdr_write_pages - Insert a list of pages into an XDR buffer for sending
* @xdr: pointer to xdr_stream
* @pages: list of pages
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 89d051de6b3e..c3b2b3369e52 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -71,24 +71,6 @@ static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
static LIST_HEAD(xprt_list);
-/*
- * The transport code maintains an estimate on the maximum number of out-
- * standing RPC requests, using a smoothed version of the congestion
- * avoidance implemented in 44BSD. This is basically the Van Jacobson
- * congestion algorithm: If a retransmit occurs, the congestion window is
- * halved; otherwise, it is incremented by 1/cwnd when
- *
- * - a reply is received and
- * - a full number of requests are outstanding and
- * - the congestion window hasn't been updated recently.
- */
-#define RPC_CWNDSHIFT (8U)
-#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
-#define RPC_INITCWND RPC_CWNDSCALE
-#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
-
-#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
-
/**
* xprt_register_transport - register a transport implementation
* @transport: transport to register
@@ -446,7 +428,15 @@ EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
* @task: recently completed RPC request used to adjust window
* @result: result code of completed RPC request
*
- * We use a time-smoothed congestion estimator to avoid heavy oscillation.
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ * - a reply is received and
+ * - a full number of requests are outstanding and
+ * - the congestion window hasn't been updated recently.
*/
void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result)
{
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 96ead526b125..693966d3f33b 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -78,8 +78,7 @@ static const char transfertypes[][12] = {
* elements. Segments are then coalesced when registered, if possible
* within the selected memreg mode.
*
- * Note, this routine is never called if the connection's memory
- * registration strategy is 0 (bounce buffers).
+ * Returns positive number of segments converted, or a negative errno.
*/
static int
@@ -102,10 +101,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
page_base = xdrbuf->page_base & ~PAGE_MASK;
p = 0;
while (len && n < nsegs) {
+ if (!ppages[p]) {
+ /* alloc the pagelist for receiving buffer */
+ ppages[p] = alloc_page(GFP_ATOMIC);
+ if (!ppages[p])
+ return -ENOMEM;
+ }
seg[n].mr_page = ppages[p];
seg[n].mr_offset = (void *)(unsigned long) page_base;
seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len);
- BUG_ON(seg[n].mr_len > PAGE_SIZE);
+ if (seg[n].mr_len > PAGE_SIZE)
+ return -EIO;
len -= seg[n].mr_len;
++n;
++p;
@@ -114,7 +120,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
/* Message overflows the seg array */
if (len && n == nsegs)
- return 0;
+ return -EIO;
if (xdrbuf->tail[0].iov_len) {
/* the rpcrdma protocol allows us to omit any trailing
@@ -123,7 +129,7 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
return n;
if (n == nsegs)
/* Tail remains, but we're out of segments */
- return 0;
+ return -EIO;
seg[n].mr_page = NULL;
seg[n].mr_offset = xdrbuf->tail[0].iov_base;
seg[n].mr_len = xdrbuf->tail[0].iov_len;
@@ -164,15 +170,17 @@ rpcrdma_convert_iovs(struct xdr_buf *xdrbuf, unsigned int pos,
* Reply chunk (a counted array):
* N elements:
* 1 - N - HLOO - HLOO - ... - HLOO
+ *
+ * Returns positive RPC/RDMA header size, or negative errno.
*/
-static unsigned int
+static ssize_t
rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
struct rpcrdma_msg *headerp, enum rpcrdma_chunktype type)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt);
- int nsegs, nchunks = 0;
+ int n, nsegs, nchunks = 0;
unsigned int pos;
struct rpcrdma_mr_seg *seg = req->rl_segments;
struct rpcrdma_read_chunk *cur_rchunk = NULL;
@@ -198,12 +206,11 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
pos = target->head[0].iov_len;
nsegs = rpcrdma_convert_iovs(target, pos, type, seg, RPCRDMA_MAX_SEGS);
- if (nsegs == 0)
- return 0;
+ if (nsegs < 0)
+ return nsegs;
do {
- /* bind/register the memory, then build chunk from result. */
- int n = rpcrdma_register_external(seg, nsegs,
+ n = rpcrdma_register_external(seg, nsegs,
cur_wchunk != NULL, r_xprt);
if (n <= 0)
goto out;
@@ -248,10 +255,6 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
/* success. all failures return above */
req->rl_nchunks = nchunks;
- BUG_ON(nchunks == 0);
- BUG_ON((r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_FRMR)
- && (nchunks > 3));
-
/*
* finish off header. If write, marshal discrim and nchunks.
*/
@@ -278,8 +281,8 @@ rpcrdma_create_chunks(struct rpc_rqst *rqst, struct xdr_buf *target,
out:
for (pos = 0; nchunks--;)
pos += rpcrdma_deregister_external(
- &req->rl_segments[pos], r_xprt, NULL);
- return 0;
+ &req->rl_segments[pos], r_xprt);
+ return n;
}
/*
@@ -361,6 +364,8 @@ rpcrdma_inline_pullup(struct rpc_rqst *rqst, int pad)
* [1] -- the RPC header/data, marshaled by RPC and the NFS protocol.
* [2] -- optional padding.
* [3] -- if padded, header only in [1] and data here.
+ *
+ * Returns zero on success, otherwise a negative errno.
*/
int
@@ -370,7 +375,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
char *base;
- size_t hdrlen, rpclen, padlen;
+ size_t rpclen, padlen;
+ ssize_t hdrlen;
enum rpcrdma_chunktype rtype, wtype;
struct rpcrdma_msg *headerp;
@@ -441,14 +447,10 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
/* The following simplification is not true forever */
if (rtype != rpcrdma_noch && wtype == rpcrdma_replych)
wtype = rpcrdma_noch;
- BUG_ON(rtype != rpcrdma_noch && wtype != rpcrdma_noch);
-
- if (r_xprt->rx_ia.ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS &&
- (rtype != rpcrdma_noch || wtype != rpcrdma_noch)) {
- /* forced to "pure inline"? */
- dprintk("RPC: %s: too much data (%d/%d) for inline\n",
- __func__, rqst->rq_rcv_buf.len, rqst->rq_snd_buf.len);
- return -1;
+ if (rtype != rpcrdma_noch && wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: cannot marshal multiple chunk lists\n",
+ __func__);
+ return -EIO;
}
hdrlen = 28; /*sizeof *headerp;*/
@@ -474,8 +476,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
headerp->rm_body.rm_padded.rm_pempty[1] = xdr_zero;
headerp->rm_body.rm_padded.rm_pempty[2] = xdr_zero;
hdrlen += 2 * sizeof(u32); /* extra words in padhdr */
- BUG_ON(wtype != rpcrdma_noch);
-
+ if (wtype != rpcrdma_noch) {
+ dprintk("RPC: %s: invalid chunk list\n",
+ __func__);
+ return -EIO;
+ }
} else {
headerp->rm_body.rm_nochunks.rm_empty[0] = xdr_zero;
headerp->rm_body.rm_nochunks.rm_empty[1] = xdr_zero;
@@ -492,8 +497,7 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
* on receive. Therefore, we request a reply chunk
* for non-writes wherever feasible and efficient.
*/
- if (wtype == rpcrdma_noch &&
- r_xprt->rx_ia.ri_memreg_strategy > RPCRDMA_REGISTER)
+ if (wtype == rpcrdma_noch)
wtype = rpcrdma_replych;
}
}
@@ -511,9 +515,8 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
hdrlen = rpcrdma_create_chunks(rqst,
&rqst->rq_rcv_buf, headerp, wtype);
}
-
- if (hdrlen == 0)
- return -1;
+ if (hdrlen < 0)
+ return hdrlen;
dprintk("RPC: %s: %s: hdrlen %zd rpclen %zd padlen %zd"
" headerp 0x%p base 0x%p lkey 0x%x\n",
@@ -680,15 +683,11 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
rqst->rq_private_buf = rqst->rq_rcv_buf;
}
-/*
- * This function is called when an async event is posted to
- * the connection which changes the connection state. All it
- * does at this point is mark the connection up/down, the rpc
- * timers do the rest.
- */
void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
+rpcrdma_connect_worker(struct work_struct *work)
{
+ struct rpcrdma_ep *ep =
+ container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
struct rpc_xprt *xprt = ep->rep_xprt;
spin_lock_bh(&xprt->transport_lock);
@@ -705,13 +704,15 @@ rpcrdma_conn_func(struct rpcrdma_ep *ep)
}
/*
- * This function is called when memory window unbind which we are waiting
- * for completes. Just use rr_func (zeroed by upcall) to signal completion.
+ * This function is called when an async event is posted to
+ * the connection which changes the connection state. All it
+ * does at this point is mark the connection up/down, the rpc
+ * timers do the rest.
*/
-static void
-rpcrdma_unbind_func(struct rpcrdma_rep *rep)
+void
+rpcrdma_conn_func(struct rpcrdma_ep *ep)
{
- wake_up(&rep->rr_unbind);
+ schedule_delayed_work(&ep->rep_connect_worker, 0);
}
/*
@@ -728,7 +729,8 @@ rpcrdma_reply_handler(struct rpcrdma_rep *rep)
struct rpc_xprt *xprt = rep->rr_xprt;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
__be32 *iptr;
- int i, rdmalen, status;
+ int rdmalen, status;
+ unsigned long cwnd;
/* Check status. If bad, signal disconnect and return rep to pool */
if (rep->rr_len == ~0U) {
@@ -783,6 +785,7 @@ repost:
/* from here on, the reply is no longer an orphan */
req->rl_reply = rep;
+ xprt->reestablish_timeout = 0;
/* check for expected message types */
/* The order of some of these tests is important. */
@@ -857,26 +860,10 @@ badheader:
break;
}
- /* If using mw bind, start the deregister process now. */
- /* (Note: if mr_free(), cannot perform it here, in tasklet context) */
- if (req->rl_nchunks) switch (r_xprt->rx_ia.ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS:
- for (i = 0; req->rl_nchunks-- > 1;)
- i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- /* Optionally wait (not here) for unbinds to complete */
- rep->rr_func = rpcrdma_unbind_func;
- (void) rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, rep);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- for (i = 0; req->rl_nchunks--;)
- i += rpcrdma_deregister_external(&req->rl_segments[i],
- r_xprt, NULL);
- break;
- default:
- break;
- }
+ cwnd = xprt->cwnd;
+ xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT;
+ if (xprt->cwnd > cwnd)
+ xprt_release_rqst_cong(rqst->rq_task);
dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n",
__func__, xprt, rqst, status);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 8d904e4eef15..8f92a61ee2df 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -69,7 +70,8 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
/* Set up the XDR head */
rqstp->rq_arg.head[0].iov_base = page_address(page);
- rqstp->rq_arg.head[0].iov_len = min(byte_count, ctxt->sge[0].length);
+ rqstp->rq_arg.head[0].iov_len =
+ min_t(size_t, byte_count, ctxt->sge[0].length);
rqstp->rq_arg.len = byte_count;
rqstp->rq_arg.buflen = byte_count;
@@ -85,7 +87,7 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
page = ctxt->pages[sge_no];
put_page(rqstp->rq_pages[sge_no]);
rqstp->rq_pages[sge_no] = page;
- bc -= min(bc, ctxt->sge[sge_no].length);
+ bc -= min_t(u32, bc, ctxt->sge[sge_no].length);
rqstp->rq_arg.buflen += ctxt->sge[sge_no].length;
sge_no++;
}
@@ -113,291 +115,265 @@ static void rdma_build_arg_xdr(struct svc_rqst *rqstp,
rqstp->rq_arg.tail[0].iov_len = 0;
}
-/* Encode a read-chunk-list as an array of IB SGE
- *
- * Assumptions:
- * - chunk[0]->position points to pages[0] at an offset of 0
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- *
- */
-static int map_read_chunks(struct svcxprt_rdma *xprt,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
{
- int sge_no;
- int sge_bytes;
- int page_off;
- int page_no;
- int ch_bytes;
- int ch_no;
- struct rpcrdma_read_chunk *ch;
+ if (rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
+ RDMA_TRANSPORT_IWARP)
+ return 1;
+ else
+ return min_t(int, sge_count, xprt->sc_max_sge);
+}
- sge_no = 0;
- page_no = 0;
- page_off = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch_no = 0;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = ch_bytes;
- head->arg.len = rqstp->rq_arg.len + ch_bytes;
- head->arg.buflen = rqstp->rq_arg.buflen + ch_bytes;
- head->count++;
- chl_map->ch[0].start = 0;
- while (byte_count) {
- rpl_map->sge[sge_no].iov_base =
- page_address(rqstp->rq_arg.pages[page_no]) + page_off;
- sge_bytes = min_t(int, PAGE_SIZE-page_off, ch_bytes);
- rpl_map->sge[sge_no].iov_len = sge_bytes;
- /*
- * Don't bump head->count here because the same page
- * may be used by multiple SGE.
- */
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no+1];
+typedef int (*rdma_reader_fn)(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last);
+
+/* Issue an RDMA_READ using the local lkey to map the data sink */
+static int rdma_read_chunk_lcl(struct svcxprt_rdma *xprt,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head,
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
+{
+ struct ib_send_wr read_wr;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
+
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->read_hdr = head;
+ pages_needed =
+ min_t(int, pages_needed, rdma_read_max_sge(xprt, pages_needed));
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
rqstp->rq_next_page = rqstp->rq_respages + 1;
+ ctxt->sge[pno].addr =
+ ib_dma_map_page(xprt->sc_cm_id->device,
+ head->arg.pages[pg_no], pg_off,
+ PAGE_SIZE - pg_off,
+ DMA_FROM_DEVICE);
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ ctxt->sge[pno].addr);
+ if (ret)
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
- byte_count -= sge_bytes;
- ch_bytes -= sge_bytes;
- sge_no++;
- /*
- * If all bytes for this chunk have been mapped to an
- * SGE, move to the next SGE
- */
- if (ch_bytes == 0) {
- chl_map->ch[ch_no].count =
- sge_no - chl_map->ch[ch_no].start;
- ch_no++;
- ch++;
- chl_map->ch[ch_no].start = sge_no;
- ch_bytes = ntohl(ch->rc_target.rs_length);
- /* If bytes remaining account for next chunk */
- if (byte_count) {
- head->arg.page_len += ch_bytes;
- head->arg.len += ch_bytes;
- head->arg.buflen += ch_bytes;
- }
+ /* The lkey here is either a local dma lkey or a dma_mr lkey */
+ ctxt->sge[pno].lkey = xprt->sc_dma_lkey;
+ ctxt->sge[pno].length = len;
+ ctxt->count++;
+
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
}
- /*
- * If this SGE consumed all of the page, move to the
- * next page
- */
- if ((sge_bytes + page_off) == PAGE_SIZE) {
- page_no++;
- page_off = 0;
- /*
- * If there are still bytes left to map, bump
- * the page count
- */
- if (byte_count)
- head->count++;
- } else
- page_off += sge_bytes;
+ rs_length -= len;
}
- BUG_ON(byte_count != 0);
- return sge_no;
+
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.opcode = IB_WR_RDMA_READ;
+ ctxt->wr_op = read_wr.opcode;
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = pages_needed;
+
+ ret = svc_rdma_send(xprt, &read_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
+ }
+
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ return ret;
}
-/* Map a read-chunk-list to an XDR and fast register the page-list.
- *
- * Assumptions:
- * - chunk[0] position points to pages[0] at an offset of 0
- * - pages[] will be made physically contiguous by creating a one-off memory
- * region using the fastreg verb.
- * - byte_count is # of bytes in read-chunk-list
- * - ch_count is # of chunks in read-chunk-list
- *
- * Output:
- * - sge array pointing into pages[] array.
- * - chunk_sge array specifying sge index and count for each
- * chunk in the read list
- */
-static int fast_reg_read_chunks(struct svcxprt_rdma *xprt,
+/* Issue an RDMA_READ using an FRMR to map the data sink */
+static int rdma_read_chunk_frmr(struct svcxprt_rdma *xprt,
struct svc_rqst *rqstp,
struct svc_rdma_op_ctxt *head,
- struct rpcrdma_msg *rmsgp,
- struct svc_rdma_req_map *rpl_map,
- struct svc_rdma_req_map *chl_map,
- int ch_count,
- int byte_count)
+ int *page_no,
+ u32 *page_offset,
+ u32 rs_handle,
+ u32 rs_length,
+ u64 rs_offset,
+ int last)
{
- int page_no;
- int ch_no;
- u32 offset;
- struct rpcrdma_read_chunk *ch;
- struct svc_rdma_fastreg_mr *frmr;
- int ret = 0;
+ struct ib_send_wr read_wr;
+ struct ib_send_wr inv_wr;
+ struct ib_send_wr fastreg_wr;
+ u8 key;
+ int pages_needed = PAGE_ALIGN(*page_offset + rs_length) >> PAGE_SHIFT;
+ struct svc_rdma_op_ctxt *ctxt = svc_rdma_get_context(xprt);
+ struct svc_rdma_fastreg_mr *frmr = svc_rdma_get_frmr(xprt);
+ int ret, read, pno;
+ u32 pg_off = *page_offset;
+ u32 pg_no = *page_no;
- frmr = svc_rdma_get_frmr(xprt);
if (IS_ERR(frmr))
return -ENOMEM;
- head->frmr = frmr;
- head->arg.head[0] = rqstp->rq_arg.head[0];
- head->arg.tail[0] = rqstp->rq_arg.tail[0];
- head->arg.pages = &head->pages[head->count];
- head->hdr_count = head->count; /* save count of hdr pages */
- head->arg.page_base = 0;
- head->arg.page_len = byte_count;
- head->arg.len = rqstp->rq_arg.len + byte_count;
- head->arg.buflen = rqstp->rq_arg.buflen + byte_count;
+ ctxt->direction = DMA_FROM_DEVICE;
+ ctxt->frmr = frmr;
+ pages_needed = min_t(int, pages_needed, xprt->sc_frmr_pg_list_len);
+ read = min_t(int, pages_needed << PAGE_SHIFT, rs_length);
- /* Fast register the page list */
- frmr->kva = page_address(rqstp->rq_arg.pages[0]);
+ frmr->kva = page_address(rqstp->rq_arg.pages[pg_no]);
frmr->direction = DMA_FROM_DEVICE;
frmr->access_flags = (IB_ACCESS_LOCAL_WRITE|IB_ACCESS_REMOTE_WRITE);
- frmr->map_len = byte_count;
- frmr->page_list_len = PAGE_ALIGN(byte_count) >> PAGE_SHIFT;
- for (page_no = 0; page_no < frmr->page_list_len; page_no++) {
- frmr->page_list->page_list[page_no] =
+ frmr->map_len = pages_needed << PAGE_SHIFT;
+ frmr->page_list_len = pages_needed;
+
+ for (pno = 0; pno < pages_needed; pno++) {
+ int len = min_t(int, rs_length, PAGE_SIZE - pg_off);
+
+ head->arg.pages[pg_no] = rqstp->rq_arg.pages[pg_no];
+ head->arg.page_len += len;
+ head->arg.len += len;
+ if (!pg_off)
+ head->count++;
+ rqstp->rq_respages = &rqstp->rq_arg.pages[pg_no+1];
+ rqstp->rq_next_page = rqstp->rq_respages + 1;
+ frmr->page_list->page_list[pno] =
ib_dma_map_page(xprt->sc_cm_id->device,
- rqstp->rq_arg.pages[page_no], 0,
+ head->arg.pages[pg_no], 0,
PAGE_SIZE, DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
+ ret = ib_dma_mapping_error(xprt->sc_cm_id->device,
+ frmr->page_list->page_list[pno]);
+ if (ret)
+ goto err;
atomic_inc(&xprt->sc_dma_used);
- head->arg.pages[page_no] = rqstp->rq_arg.pages[page_no];
- }
- head->count += page_no;
-
- /* rq_respages points one past arg pages */
- rqstp->rq_respages = &rqstp->rq_arg.pages[page_no];
- rqstp->rq_next_page = rqstp->rq_respages + 1;
- /* Create the reply and chunk maps */
- offset = 0;
- ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- for (ch_no = 0; ch_no < ch_count; ch_no++) {
- int len = ntohl(ch->rc_target.rs_length);
- rpl_map->sge[ch_no].iov_base = frmr->kva + offset;
- rpl_map->sge[ch_no].iov_len = len;
- chl_map->ch[ch_no].count = 1;
- chl_map->ch[ch_no].start = ch_no;
- offset += len;
- ch++;
+ /* adjust offset and wrap to next page if needed */
+ pg_off += len;
+ if (pg_off == PAGE_SIZE) {
+ pg_off = 0;
+ pg_no++;
+ }
+ rs_length -= len;
}
- ret = svc_rdma_fastreg(xprt, frmr);
- if (ret)
- goto fatal_err;
-
- return ch_no;
-
- fatal_err:
- printk("svcrdma: error fast registering xdr for xprt %p", xprt);
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
-static int rdma_set_ctxt_sge(struct svcxprt_rdma *xprt,
- struct svc_rdma_op_ctxt *ctxt,
- struct svc_rdma_fastreg_mr *frmr,
- struct kvec *vec,
- u64 *sgl_offset,
- int count)
-{
- int i;
- unsigned long off;
+ if (last && rs_length == 0)
+ set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
+ else
+ clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- ctxt->count = count;
- ctxt->direction = DMA_FROM_DEVICE;
- for (i = 0; i < count; i++) {
- ctxt->sge[i].length = 0; /* in case map fails */
- if (!frmr) {
- BUG_ON(!virt_to_page(vec[i].iov_base));
- off = (unsigned long)vec[i].iov_base & ~PAGE_MASK;
- ctxt->sge[i].addr =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(vec[i].iov_base),
- off,
- vec[i].iov_len,
- DMA_FROM_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- ctxt->sge[i].addr))
- return -EINVAL;
- ctxt->sge[i].lkey = xprt->sc_dma_lkey;
- atomic_inc(&xprt->sc_dma_used);
- } else {
- ctxt->sge[i].addr = (unsigned long)vec[i].iov_base;
- ctxt->sge[i].lkey = frmr->mr->lkey;
- }
- ctxt->sge[i].length = vec[i].iov_len;
- *sgl_offset = *sgl_offset + vec[i].iov_len;
+ /* Bump the key */
+ key = (u8)(frmr->mr->lkey & 0x000000FF);
+ ib_update_fast_reg_key(frmr->mr, ++key);
+
+ ctxt->sge[0].addr = (unsigned long)frmr->kva + *page_offset;
+ ctxt->sge[0].lkey = frmr->mr->lkey;
+ ctxt->sge[0].length = read;
+ ctxt->count = 1;
+ ctxt->read_hdr = head;
+
+ /* Prepare FASTREG WR */
+ memset(&fastreg_wr, 0, sizeof(fastreg_wr));
+ fastreg_wr.opcode = IB_WR_FAST_REG_MR;
+ fastreg_wr.send_flags = IB_SEND_SIGNALED;
+ fastreg_wr.wr.fast_reg.iova_start = (unsigned long)frmr->kva;
+ fastreg_wr.wr.fast_reg.page_list = frmr->page_list;
+ fastreg_wr.wr.fast_reg.page_list_len = frmr->page_list_len;
+ fastreg_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
+ fastreg_wr.wr.fast_reg.length = frmr->map_len;
+ fastreg_wr.wr.fast_reg.access_flags = frmr->access_flags;
+ fastreg_wr.wr.fast_reg.rkey = frmr->mr->lkey;
+ fastreg_wr.next = &read_wr;
+
+ /* Prepare RDMA_READ */
+ memset(&read_wr, 0, sizeof(read_wr));
+ read_wr.send_flags = IB_SEND_SIGNALED;
+ read_wr.wr.rdma.rkey = rs_handle;
+ read_wr.wr.rdma.remote_addr = rs_offset;
+ read_wr.sg_list = ctxt->sge;
+ read_wr.num_sge = 1;
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_READ_W_INV) {
+ read_wr.opcode = IB_WR_RDMA_READ_WITH_INV;
+ read_wr.wr_id = (unsigned long)ctxt;
+ read_wr.ex.invalidate_rkey = ctxt->frmr->mr->lkey;
+ } else {
+ read_wr.opcode = IB_WR_RDMA_READ;
+ read_wr.next = &inv_wr;
+ /* Prepare invalidate */
+ memset(&inv_wr, 0, sizeof(inv_wr));
+ inv_wr.wr_id = (unsigned long)ctxt;
+ inv_wr.opcode = IB_WR_LOCAL_INV;
+ inv_wr.send_flags = IB_SEND_SIGNALED | IB_SEND_FENCE;
+ inv_wr.ex.invalidate_rkey = frmr->mr->lkey;
+ }
+ ctxt->wr_op = read_wr.opcode;
+
+ /* Post the chain */
+ ret = svc_rdma_send(xprt, &fastreg_wr);
+ if (ret) {
+ pr_err("svcrdma: Error %d posting RDMA_READ\n", ret);
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ goto err;
}
- return 0;
-}
-static int rdma_read_max_sge(struct svcxprt_rdma *xprt, int sge_count)
-{
- if ((rdma_node_get_transport(xprt->sc_cm_id->device->node_type) ==
- RDMA_TRANSPORT_IWARP) &&
- sge_count > 1)
- return 1;
- else
- return min_t(int, sge_count, xprt->sc_max_sge);
+ /* return current location in page array */
+ *page_no = pg_no;
+ *page_offset = pg_off;
+ ret = read;
+ atomic_inc(&rdma_stat_read);
+ return ret;
+ err:
+ svc_rdma_unmap_dma(ctxt);
+ svc_rdma_put_context(ctxt, 0);
+ svc_rdma_put_frmr(xprt, frmr);
+ return ret;
}
-/*
- * Use RDMA_READ to read data from the advertised client buffer into the
- * XDR stream starting at rq_arg.head[0].iov_base.
- * Each chunk in the array
- * contains the following fields:
- * discrim - '1', This isn't used for data placement
- * position - The xdr stream offset (the same for every chunk)
- * handle - RMR for client memory region
- * length - data transfer length
- * offset - 64 bit tagged offset in remote memory region
- *
- * On our side, we need to read into a pagelist. The first page immediately
- * follows the RPC header.
- *
- * This function returns:
- * 0 - No error and no read-list found.
- *
- * 1 - Successful read-list processing. The data is not yet in
- * the pagelist and therefore the RPC request must be deferred. The
- * I/O completion will enqueue the transport again and
- * svc_rdma_recvfrom will complete the request.
- *
- * <0 - Error processing/posting read-list.
- *
- * NOTE: The ctxt must not be touched after the last WR has been posted
- * because the I/O completion processing may occur on another
- * processor and free / modify the context. Ne touche pas!
- */
-static int rdma_read_xdr(struct svcxprt_rdma *xprt,
- struct rpcrdma_msg *rmsgp,
- struct svc_rqst *rqstp,
- struct svc_rdma_op_ctxt *hdr_ctxt)
+static int rdma_read_chunks(struct svcxprt_rdma *xprt,
+ struct rpcrdma_msg *rmsgp,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_op_ctxt *head)
{
- struct ib_send_wr read_wr;
- struct ib_send_wr inv_wr;
- int err = 0;
- int ch_no;
- int ch_count;
- int byte_count;
- int sge_count;
- u64 sgl_offset;
+ int page_no, ch_count, ret;
struct rpcrdma_read_chunk *ch;
- struct svc_rdma_op_ctxt *ctxt = NULL;
- struct svc_rdma_req_map *rpl_map;
- struct svc_rdma_req_map *chl_map;
+ u32 page_offset, byte_count;
+ u64 rs_offset;
+ rdma_reader_fn reader;
/* If no read list is present, return 0 */
ch = svc_rdma_get_read_chunk(rmsgp);
@@ -408,122 +384,55 @@ static int rdma_read_xdr(struct svcxprt_rdma *xprt,
if (ch_count > RPCSVC_MAXPAGES)
return -EINVAL;
- /* Allocate temporary reply and chunk maps */
- rpl_map = svc_rdma_get_req_map();
- chl_map = svc_rdma_get_req_map();
+ /* The request is completed when the RDMA_READs complete. The
+ * head context keeps all the pages that comprise the
+ * request.
+ */
+ head->arg.head[0] = rqstp->rq_arg.head[0];
+ head->arg.tail[0] = rqstp->rq_arg.tail[0];
+ head->arg.pages = &head->pages[head->count];
+ head->hdr_count = head->count;
+ head->arg.page_base = 0;
+ head->arg.page_len = 0;
+ head->arg.len = rqstp->rq_arg.len;
+ head->arg.buflen = rqstp->rq_arg.buflen;
- if (!xprt->sc_frmr_pg_list_len)
- sge_count = map_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
+ /* Use FRMR if supported */
+ if (xprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)
+ reader = rdma_read_chunk_frmr;
else
- sge_count = fast_reg_read_chunks(xprt, rqstp, hdr_ctxt, rmsgp,
- rpl_map, chl_map, ch_count,
- byte_count);
- if (sge_count < 0) {
- err = -EIO;
- goto out;
- }
-
- sgl_offset = 0;
- ch_no = 0;
+ reader = rdma_read_chunk_lcl;
+ page_no = 0; page_offset = 0;
for (ch = (struct rpcrdma_read_chunk *)&rmsgp->rm_body.rm_chunks[0];
- ch->rc_discrim != 0; ch++, ch_no++) {
- u64 rs_offset;
-next_sge:
- ctxt = svc_rdma_get_context(xprt);
- ctxt->direction = DMA_FROM_DEVICE;
- ctxt->frmr = hdr_ctxt->frmr;
- ctxt->read_hdr = NULL;
- clear_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
+ ch->rc_discrim != 0; ch++) {
- /* Prepare READ WR */
- memset(&read_wr, 0, sizeof read_wr);
- read_wr.wr_id = (unsigned long)ctxt;
- read_wr.opcode = IB_WR_RDMA_READ;
- ctxt->wr_op = read_wr.opcode;
- read_wr.send_flags = IB_SEND_SIGNALED;
- read_wr.wr.rdma.rkey = ntohl(ch->rc_target.rs_handle);
xdr_decode_hyper((__be32 *)&ch->rc_target.rs_offset,
&rs_offset);
- read_wr.wr.rdma.remote_addr = rs_offset + sgl_offset;
- read_wr.sg_list = ctxt->sge;
- read_wr.num_sge =
- rdma_read_max_sge(xprt, chl_map->ch[ch_no].count);
- err = rdma_set_ctxt_sge(xprt, ctxt, hdr_ctxt->frmr,
- &rpl_map->sge[chl_map->ch[ch_no].start],
- &sgl_offset,
- read_wr.num_sge);
- if (err) {
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
- }
- if (((ch+1)->rc_discrim == 0) &&
- (read_wr.num_sge == chl_map->ch[ch_no].count)) {
- /*
- * Mark the last RDMA_READ with a bit to
- * indicate all RPC data has been fetched from
- * the client and the RPC needs to be enqueued.
- */
- set_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags);
- if (hdr_ctxt->frmr) {
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- /*
- * Invalidate the local MR used to map the data
- * sink.
- */
- if (xprt->sc_dev_caps &
- SVCRDMA_DEVCAP_READ_W_INV) {
- read_wr.opcode =
- IB_WR_RDMA_READ_WITH_INV;
- ctxt->wr_op = read_wr.opcode;
- read_wr.ex.invalidate_rkey =
- ctxt->frmr->mr->lkey;
- } else {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- hdr_ctxt->frmr->mr->lkey;
- read_wr.next = &inv_wr;
- }
- }
- ctxt->read_hdr = hdr_ctxt;
- }
- /* Post the read */
- err = svc_rdma_send(xprt, &read_wr);
- if (err) {
- printk(KERN_ERR "svcrdma: Error %d posting RDMA_READ\n",
- err);
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
- svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_context(ctxt, 0);
- goto out;
+ byte_count = ntohl(ch->rc_target.rs_length);
+
+ while (byte_count > 0) {
+ ret = reader(xprt, rqstp, head,
+ &page_no, &page_offset,
+ ntohl(ch->rc_target.rs_handle),
+ byte_count, rs_offset,
+ ((ch+1)->rc_discrim == 0) /* last */
+ );
+ if (ret < 0)
+ goto err;
+ byte_count -= ret;
+ rs_offset += ret;
+ head->arg.buflen += ret;
}
- atomic_inc(&rdma_stat_read);
-
- if (read_wr.num_sge < chl_map->ch[ch_no].count) {
- chl_map->ch[ch_no].count -= read_wr.num_sge;
- chl_map->ch[ch_no].start += read_wr.num_sge;
- goto next_sge;
- }
- sgl_offset = 0;
- err = 1;
}
-
- out:
- svc_rdma_put_req_map(rpl_map);
- svc_rdma_put_req_map(chl_map);
-
+ ret = 1;
+ err:
/* Detach arg pages. svc_recv will replenish them */
- for (ch_no = 0; &rqstp->rq_pages[ch_no] < rqstp->rq_respages; ch_no++)
- rqstp->rq_pages[ch_no] = NULL;
+ for (page_no = 0;
+ &rqstp->rq_pages[page_no] < rqstp->rq_respages; page_no++)
+ rqstp->rq_pages[page_no] = NULL;
- return err;
+ return ret;
}
static int rdma_read_complete(struct svc_rqst *rqstp,
@@ -595,13 +504,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svc_rdma_op_ctxt,
dto_q);
list_del_init(&ctxt->dto_q);
- }
- if (ctxt) {
spin_unlock_bh(&rdma_xprt->sc_rq_dto_lock);
return rdma_read_complete(rqstp, ctxt);
- }
-
- if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
+ } else if (!list_empty(&rdma_xprt->sc_rq_dto_q)) {
ctxt = list_entry(rdma_xprt->sc_rq_dto_q.next,
struct svc_rdma_op_ctxt,
dto_q);
@@ -621,7 +526,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
if (test_bit(XPT_CLOSE, &xprt->xpt_flags))
goto close_out;
- BUG_ON(ret);
goto out;
}
dprintk("svcrdma: processing ctxt=%p on xprt=%p, rqstp=%p, status=%d\n",
@@ -644,12 +548,11 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
}
/* Read read-list data. */
- ret = rdma_read_xdr(rdma_xprt, rmsgp, rqstp, ctxt);
+ ret = rdma_read_chunks(rdma_xprt, rmsgp, rqstp, ctxt);
if (ret > 0) {
/* read-list posted, defer until data received from client. */
goto defer;
- }
- if (ret < 0) {
+ } else if (ret < 0) {
/* Post of read-list failed, free context. */
svc_rdma_put_context(ctxt, 1);
return 0;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index 7e024a51617e..49fd21a5c215 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2006 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -49,152 +50,6 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
-/* Encode an XDR as an array of IB SGE
- *
- * Assumptions:
- * - head[0] is physically contiguous.
- * - tail[0] is physically contiguous.
- * - pages[] is not physically or virtually contiguous and consists of
- * PAGE_SIZE elements.
- *
- * Output:
- * SGE[0] reserved for RCPRDMA header
- * SGE[1] data from xdr->head[]
- * SGE[2..sge_count-2] data from xdr->pages[]
- * SGE[sge_count-1] data from xdr->tail.
- *
- * The max SGE we need is the length of the XDR / pagesize + one for
- * head + one for tail + one for RPCRDMA header. Since RPCSVC_MAXPAGES
- * reserves a page for both the request and the reply header, and this
- * array is only concerned with the reply we are assured that we have
- * on extra page for the RPCRMDA header.
- */
-static int fast_reg_xdr(struct svcxprt_rdma *xprt,
- struct xdr_buf *xdr,
- struct svc_rdma_req_map *vec)
-{
- int sge_no;
- u32 sge_bytes;
- u32 page_bytes;
- u32 page_off;
- int page_no = 0;
- u8 *frva;
- struct svc_rdma_fastreg_mr *frmr;
-
- frmr = svc_rdma_get_frmr(xprt);
- if (IS_ERR(frmr))
- return -ENOMEM;
- vec->frmr = frmr;
-
- /* Skip the RPCRDMA header */
- sge_no = 1;
-
- /* Map the head. */
- frva = (void *)((unsigned long)(xdr->head[0].iov_base) & PAGE_MASK);
- vec->sge[sge_no].iov_base = xdr->head[0].iov_base;
- vec->sge[sge_no].iov_len = xdr->head[0].iov_len;
- vec->count = 2;
- sge_no++;
-
- /* Map the XDR head */
- frmr->kva = frva;
- frmr->direction = DMA_TO_DEVICE;
- frmr->access_flags = 0;
- frmr->map_len = PAGE_SIZE;
- frmr->page_list_len = 1;
- page_off = (unsigned long)xdr->head[0].iov_base & ~PAGE_MASK;
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- virt_to_page(xdr->head[0].iov_base),
- page_off,
- PAGE_SIZE - page_off,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
-
- /* Map the XDR page list */
- page_off = xdr->page_base;
- page_bytes = xdr->page_len + page_off;
- if (!page_bytes)
- goto encode_tail;
-
- /* Map the pages */
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
- vec->sge[sge_no].iov_len = page_bytes;
- sge_no++;
- while (page_bytes) {
- struct page *page;
-
- page = xdr->pages[page_no++];
- sge_bytes = min_t(u32, page_bytes, (PAGE_SIZE - page_off));
- page_bytes -= sge_bytes;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device,
- page, page_off,
- sge_bytes, DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
-
- atomic_inc(&xprt->sc_dma_used);
- page_off = 0; /* reset for next time through loop */
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
- vec->count++;
-
- encode_tail:
- /* Map tail */
- if (0 == xdr->tail[0].iov_len)
- goto done;
-
- vec->count++;
- vec->sge[sge_no].iov_len = xdr->tail[0].iov_len;
-
- if (((unsigned long)xdr->tail[0].iov_base & PAGE_MASK) ==
- ((unsigned long)xdr->head[0].iov_base & PAGE_MASK)) {
- /*
- * If head and tail use the same page, we don't need
- * to map it again.
- */
- vec->sge[sge_no].iov_base = xdr->tail[0].iov_base;
- } else {
- void *va;
-
- /* Map another page for the tail */
- page_off = (unsigned long)xdr->tail[0].iov_base & ~PAGE_MASK;
- va = (void *)((unsigned long)xdr->tail[0].iov_base & PAGE_MASK);
- vec->sge[sge_no].iov_base = frva + frmr->map_len + page_off;
-
- frmr->page_list->page_list[page_no] =
- ib_dma_map_page(xprt->sc_cm_id->device, virt_to_page(va),
- page_off,
- PAGE_SIZE,
- DMA_TO_DEVICE);
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- frmr->page_list->page_list[page_no]))
- goto fatal_err;
- atomic_inc(&xprt->sc_dma_used);
- frmr->map_len += PAGE_SIZE;
- frmr->page_list_len++;
- }
-
- done:
- if (svc_rdma_fastreg(xprt, frmr))
- goto fatal_err;
-
- return 0;
-
- fatal_err:
- printk("svcrdma: Error fast registering memory for xprt %p\n", xprt);
- vec->frmr = NULL;
- svc_rdma_put_frmr(xprt, frmr);
- return -EIO;
-}
-
static int map_xdr(struct svcxprt_rdma *xprt,
struct xdr_buf *xdr,
struct svc_rdma_req_map *vec)
@@ -208,9 +63,6 @@ static int map_xdr(struct svcxprt_rdma *xprt,
BUG_ON(xdr->len !=
(xdr->head[0].iov_len + xdr->page_len + xdr->tail[0].iov_len));
- if (xprt->sc_frmr_pg_list_len)
- return fast_reg_xdr(xprt, xdr, vec);
-
/* Skip the first sge, this is for the RPCRDMA header */
sge_no = 1;
@@ -282,8 +134,6 @@ static dma_addr_t dma_map_xdr(struct svcxprt_rdma *xprt,
}
/* Assumptions:
- * - We are using FRMR
- * - or -
* - The specified write_len can be represented in sc_max_sge * PAGE_SIZE
*/
static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
@@ -327,23 +177,16 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
sge_bytes = min_t(size_t,
bc, vec->sge[xdr_sge_no].iov_len-sge_off);
sge[sge_no].length = sge_bytes;
- if (!vec->frmr) {
- sge[sge_no].addr =
- dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(xprt->sc_cm_id->device,
- sge[sge_no].addr))
- goto err;
- atomic_inc(&xprt->sc_dma_used);
- sge[sge_no].lkey = xprt->sc_dma_lkey;
- } else {
- sge[sge_no].addr = (unsigned long)
- vec->sge[xdr_sge_no].iov_base + sge_off;
- sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ sge[sge_no].addr =
+ dma_map_xdr(xprt, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(xprt->sc_cm_id->device,
+ sge[sge_no].addr))
+ goto err;
+ atomic_inc(&xprt->sc_dma_used);
+ sge[sge_no].lkey = xprt->sc_dma_lkey;
ctxt->count++;
- ctxt->frmr = vec->frmr;
sge_off = 0;
sge_no++;
xdr_sge_no++;
@@ -369,7 +212,6 @@ static int send_write(struct svcxprt_rdma *xprt, struct svc_rqst *rqstp,
return 0;
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(xprt, vec->frmr);
svc_rdma_put_context(ctxt, 0);
/* Fatal error, close transport */
return -EIO;
@@ -397,10 +239,7 @@ static int send_write_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[1];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* Write chunks start at the pagelist */
for (xdr_off = rqstp->rq_res.head[0].iov_len, chunk_no = 0;
@@ -472,10 +311,7 @@ static int send_reply_chunks(struct svcxprt_rdma *xprt,
res_ary = (struct rpcrdma_write_array *)
&rdma_resp->rm_body.rm_chunks[2];
- if (vec->frmr)
- max_write = vec->frmr->map_len;
- else
- max_write = xprt->sc_max_sge * PAGE_SIZE;
+ max_write = xprt->sc_max_sge * PAGE_SIZE;
/* xdr offset starts at RPC message */
nchunks = ntohl(arg_ary->wc_nchunks);
@@ -545,7 +381,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
int byte_count)
{
struct ib_send_wr send_wr;
- struct ib_send_wr inv_wr;
int sge_no;
int sge_bytes;
int page_no;
@@ -559,7 +394,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
"svcrdma: could not post a receive buffer, err=%d."
"Closing transport %p.\n", ret, rdma);
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 0);
return -ENOTCONN;
}
@@ -567,11 +401,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
/* Prepare the context */
ctxt->pages[0] = page;
ctxt->count = 1;
- ctxt->frmr = vec->frmr;
- if (vec->frmr)
- set_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
- else
- clear_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags);
/* Prepare the SGE for the RPCRDMA Header */
ctxt->sge[0].lkey = rdma->sc_dma_lkey;
@@ -590,21 +419,15 @@ static int send_reply(struct svcxprt_rdma *rdma,
int xdr_off = 0;
sge_bytes = min_t(size_t, vec->sge[sge_no].iov_len, byte_count);
byte_count -= sge_bytes;
- if (!vec->frmr) {
- ctxt->sge[sge_no].addr =
- dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
- sge_bytes, DMA_TO_DEVICE);
- xdr_off += sge_bytes;
- if (ib_dma_mapping_error(rdma->sc_cm_id->device,
- ctxt->sge[sge_no].addr))
- goto err;
- atomic_inc(&rdma->sc_dma_used);
- ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
- } else {
- ctxt->sge[sge_no].addr = (unsigned long)
- vec->sge[sge_no].iov_base;
- ctxt->sge[sge_no].lkey = vec->frmr->mr->lkey;
- }
+ ctxt->sge[sge_no].addr =
+ dma_map_xdr(rdma, &rqstp->rq_res, xdr_off,
+ sge_bytes, DMA_TO_DEVICE);
+ xdr_off += sge_bytes;
+ if (ib_dma_mapping_error(rdma->sc_cm_id->device,
+ ctxt->sge[sge_no].addr))
+ goto err;
+ atomic_inc(&rdma->sc_dma_used);
+ ctxt->sge[sge_no].lkey = rdma->sc_dma_lkey;
ctxt->sge[sge_no].length = sge_bytes;
}
BUG_ON(byte_count != 0);
@@ -627,6 +450,7 @@ static int send_reply(struct svcxprt_rdma *rdma,
ctxt->sge[page_no+1].length = 0;
}
rqstp->rq_next_page = rqstp->rq_respages + 1;
+
BUG_ON(sge_no > rdma->sc_max_sge);
memset(&send_wr, 0, sizeof send_wr);
ctxt->wr_op = IB_WR_SEND;
@@ -635,15 +459,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
send_wr.num_sge = sge_no;
send_wr.opcode = IB_WR_SEND;
send_wr.send_flags = IB_SEND_SIGNALED;
- if (vec->frmr) {
- /* Prepare INVALIDATE WR */
- memset(&inv_wr, 0, sizeof inv_wr);
- inv_wr.opcode = IB_WR_LOCAL_INV;
- inv_wr.send_flags = IB_SEND_SIGNALED;
- inv_wr.ex.invalidate_rkey =
- vec->frmr->mr->lkey;
- send_wr.next = &inv_wr;
- }
ret = svc_rdma_send(rdma, &send_wr);
if (ret)
@@ -653,7 +468,6 @@ static int send_reply(struct svcxprt_rdma *rdma,
err:
svc_rdma_unmap_dma(ctxt);
- svc_rdma_put_frmr(rdma, vec->frmr);
svc_rdma_put_context(ctxt, 1);
return -EIO;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 25688fa2207f..e7323fbbd348 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -1,4 +1,5 @@
/*
+ * Copyright (c) 2014 Open Grid Computing, Inc. All rights reserved.
* Copyright (c) 2005-2007 Network Appliance, Inc. All rights reserved.
*
* This software is available to you under a choice of one of two
@@ -65,6 +66,7 @@ static void dto_tasklet_func(unsigned long data);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
+static int svc_rdma_secure_port(struct svc_rqst *);
static void rq_cq_reap(struct svcxprt_rdma *xprt);
static void sq_cq_reap(struct svcxprt_rdma *xprt);
@@ -82,6 +84,7 @@ static struct svc_xprt_ops svc_rdma_ops = {
.xpo_prep_reply_hdr = svc_rdma_prep_reply_hdr,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
+ .xpo_secure_port = svc_rdma_secure_port,
};
struct svc_xprt_class svc_rdma_class = {
@@ -160,7 +163,6 @@ struct svc_rdma_req_map *svc_rdma_get_req_map(void)
schedule_timeout_uninterruptible(msecs_to_jiffies(500));
}
map->count = 0;
- map->frmr = NULL;
return map;
}
@@ -336,22 +338,21 @@ static void process_context(struct svcxprt_rdma *xprt,
switch (ctxt->wr_op) {
case IB_WR_SEND:
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 1);
break;
case IB_WR_RDMA_WRITE:
+ BUG_ON(ctxt->frmr);
svc_rdma_put_context(ctxt, 0);
break;
case IB_WR_RDMA_READ:
case IB_WR_RDMA_READ_WITH_INV:
+ svc_rdma_put_frmr(xprt, ctxt->frmr);
if (test_bit(RDMACTXT_F_LAST_CTXT, &ctxt->flags)) {
struct svc_rdma_op_ctxt *read_hdr = ctxt->read_hdr;
BUG_ON(!read_hdr);
- if (test_bit(RDMACTXT_F_FAST_UNREG, &ctxt->flags))
- svc_rdma_put_frmr(xprt, ctxt->frmr);
spin_lock_bh(&xprt->sc_rq_dto_lock);
set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags);
list_add_tail(&read_hdr->dto_q,
@@ -363,6 +364,7 @@ static void process_context(struct svcxprt_rdma *xprt,
break;
default:
+ BUG_ON(1);
printk(KERN_ERR "svcrdma: unexpected completion type, "
"opcode=%d\n",
ctxt->wr_op);
@@ -378,29 +380,42 @@ static void process_context(struct svcxprt_rdma *xprt,
static void sq_cq_reap(struct svcxprt_rdma *xprt)
{
struct svc_rdma_op_ctxt *ctxt = NULL;
- struct ib_wc wc;
+ struct ib_wc wc_a[6];
+ struct ib_wc *wc;
struct ib_cq *cq = xprt->sc_sq_cq;
int ret;
+ memset(wc_a, 0, sizeof(wc_a));
+
if (!test_and_clear_bit(RDMAXPRT_SQ_PENDING, &xprt->sc_flags))
return;
ib_req_notify_cq(xprt->sc_sq_cq, IB_CQ_NEXT_COMP);
atomic_inc(&rdma_stat_sq_poll);
- while ((ret = ib_poll_cq(cq, 1, &wc)) > 0) {
- if (wc.status != IB_WC_SUCCESS)
- /* Close the transport */
- set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ while ((ret = ib_poll_cq(cq, ARRAY_SIZE(wc_a), wc_a)) > 0) {
+ int i;
- /* Decrement used SQ WR count */
- atomic_dec(&xprt->sc_sq_count);
- wake_up(&xprt->sc_send_wait);
+ for (i = 0; i < ret; i++) {
+ wc = &wc_a[i];
+ if (wc->status != IB_WC_SUCCESS) {
+ dprintk("svcrdma: sq wc err status %d\n",
+ wc->status);
- ctxt = (struct svc_rdma_op_ctxt *)(unsigned long)wc.wr_id;
- if (ctxt)
- process_context(xprt, ctxt);
+ /* Close the transport */
+ set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags);
+ }
- svc_xprt_put(&xprt->sc_xprt);
+ /* Decrement used SQ WR count */
+ atomic_dec(&xprt->sc_sq_count);
+ wake_up(&xprt->sc_send_wait);
+
+ ctxt = (struct svc_rdma_op_ctxt *)
+ (unsigned long)wc->wr_id;
+ if (ctxt)
+ process_context(xprt, ctxt);
+
+ svc_xprt_put(&xprt->sc_xprt);
+ }
}
if (ctxt)
@@ -993,7 +1008,11 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
need_dma_mr = 0;
break;
case RDMA_TRANSPORT_IB:
- if (!(devattr.device_cap_flags & IB_DEVICE_LOCAL_DMA_LKEY)) {
+ if (!(newxprt->sc_dev_caps & SVCRDMA_DEVCAP_FAST_REG)) {
+ need_dma_mr = 1;
+ dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
+ } else if (!(devattr.device_cap_flags &
+ IB_DEVICE_LOCAL_DMA_LKEY)) {
need_dma_mr = 1;
dma_mr_acc = IB_ACCESS_LOCAL_WRITE;
} else
@@ -1190,14 +1209,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
container_of(xprt, struct svcxprt_rdma, sc_xprt);
/*
- * If there are fewer SQ WR available than required to send a
- * simple response, return false.
- */
- if ((rdma->sc_sq_depth - atomic_read(&rdma->sc_sq_count) < 3))
- return 0;
-
- /*
- * ...or there are already waiters on the SQ,
+ * If there are already waiters on the SQ,
* return false.
*/
if (waitqueue_active(&rdma->sc_send_wait))
@@ -1207,6 +1219,11 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
+static int svc_rdma_secure_port(struct svc_rqst *rqstp)
+{
+ return 1;
+}
+
/*
* Attempt to register the kvec representing the RPC memory with the
* device.
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 1eb9c468d0c9..66f91f0d071a 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -149,6 +149,11 @@ static struct ctl_table sunrpc_table[] = {
#endif
+#define RPCRDMA_BIND_TO (60U * HZ)
+#define RPCRDMA_INIT_REEST_TO (5U * HZ)
+#define RPCRDMA_MAX_REEST_TO (30U * HZ)
+#define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ)
+
static struct rpc_xprt_ops xprt_rdma_procs; /* forward reference */
static void
@@ -229,7 +234,6 @@ static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
{
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int rc;
dprintk("RPC: %s: called\n", __func__);
@@ -238,10 +242,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt)
xprt_clear_connected(xprt);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
- rc = rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- dprintk("RPC: %s: rpcrdma_ep_destroy returned %i\n",
- __func__, rc);
+ rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_ia_close(&r_xprt->rx_ia);
xprt_rdma_free_addresses(xprt);
@@ -289,9 +290,9 @@ xprt_setup_rdma(struct xprt_create *args)
/* 60 second timeout, no retries */
xprt->timeout = &xprt_rdma_default_timeout;
- xprt->bind_timeout = (60U * HZ);
- xprt->reestablish_timeout = (5U * HZ);
- xprt->idle_timeout = (5U * 60 * HZ);
+ xprt->bind_timeout = RPCRDMA_BIND_TO;
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
+ xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
xprt->resvport = 0; /* privileged port not needed */
xprt->tsh_size = 0; /* RPC-RDMA handles framing */
@@ -391,7 +392,7 @@ out4:
xprt_rdma_free_addresses(xprt);
rc = -EINVAL;
out3:
- (void) rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
+ rpcrdma_ep_destroy(new_ep, &new_xprt->rx_ia);
out2:
rpcrdma_ia_close(&new_xprt->rx_ia);
out1:
@@ -436,10 +437,10 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
schedule_delayed_work(&r_xprt->rdma_connect,
xprt->reestablish_timeout);
xprt->reestablish_timeout <<= 1;
- if (xprt->reestablish_timeout > (30 * HZ))
- xprt->reestablish_timeout = (30 * HZ);
- else if (xprt->reestablish_timeout < (5 * HZ))
- xprt->reestablish_timeout = (5 * HZ);
+ if (xprt->reestablish_timeout > RPCRDMA_MAX_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_MAX_REEST_TO;
+ else if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO)
+ xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
} else {
schedule_delayed_work(&r_xprt->rdma_connect, 0);
if (!RPC_IS_ASYNC(task))
@@ -447,23 +448,6 @@ xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
}
}
-static int
-xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
-
- /* == RPC_CWNDSCALE @ init, but *after* setup */
- if (r_xprt->rx_buf.rb_cwndscale == 0UL) {
- r_xprt->rx_buf.rb_cwndscale = xprt->cwnd;
- dprintk("RPC: %s: cwndscale %lu\n", __func__,
- r_xprt->rx_buf.rb_cwndscale);
- BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
- }
- xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
- return xprt_reserve_xprt_cong(xprt, task);
-}
-
/*
* The RDMA allocate/free functions need the task structure as a place
* to hide the struct rpcrdma_req, which is necessary for the actual send/recv
@@ -479,7 +463,8 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
struct rpcrdma_req *req, *nreq;
req = rpcrdma_buffer_get(&rpcx_to_rdmax(xprt)->rx_buf);
- BUG_ON(NULL == req);
+ if (req == NULL)
+ return NULL;
if (size > req->rl_size) {
dprintk("RPC: %s: size %zd too large for buffer[%zd]: "
@@ -503,18 +488,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
* If the allocation or registration fails, the RPC framework
* will (doggedly) retry.
*/
- if (rpcx_to_rdmax(xprt)->rx_ia.ri_memreg_strategy ==
- RPCRDMA_BOUNCEBUFFERS) {
- /* forced to "pure inline" */
- dprintk("RPC: %s: too much data (%zd) for inline "
- "(r/w max %d/%d)\n", __func__, size,
- rpcx_to_rdmad(xprt).inline_rsize,
- rpcx_to_rdmad(xprt).inline_wsize);
- size = req->rl_size;
- rpc_exit(task, -EIO); /* fail the operation */
- rpcx_to_rdmax(xprt)->rx_stats.failed_marshal_count++;
- goto out;
- }
if (task->tk_flags & RPC_TASK_SWAPPER)
nreq = kmalloc(sizeof *req + size, GFP_ATOMIC);
else
@@ -543,7 +516,6 @@ xprt_rdma_allocate(struct rpc_task *task, size_t size)
req = nreq;
}
dprintk("RPC: %s: size %zd, request 0x%p\n", __func__, size, req);
-out:
req->rl_connect_cookie = 0; /* our reserved value */
return req->rl_xdr_buf;
@@ -579,9 +551,7 @@ xprt_rdma_free(void *buffer)
__func__, rep, (rep && rep->rr_func) ? " (with waiter)" : "");
/*
- * Finish the deregistration. When using mw bind, this was
- * begun in rpcrdma_reply_handler(). In all other modes, we
- * do it here, in thread context. The process is considered
+ * Finish the deregistration. The process is considered
* complete when the rr_func vector becomes NULL - this
* was put in place during rpcrdma_reply_handler() - the wait
* call below will not block if the dereg is "done". If
@@ -590,12 +560,7 @@ xprt_rdma_free(void *buffer)
for (i = 0; req->rl_nchunks;) {
--req->rl_nchunks;
i += rpcrdma_deregister_external(
- &req->rl_segments[i], r_xprt, NULL);
- }
-
- if (rep && wait_event_interruptible(rep->rr_unbind, !rep->rr_func)) {
- rep->rr_func = NULL; /* abandon the callback */
- req->rl_reply = NULL;
+ &req->rl_segments[i], r_xprt);
}
if (req->rl_iov.length == 0) { /* see allocate above */
@@ -630,13 +595,12 @@ xprt_rdma_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
+ int rc;
- /* marshal the send itself */
- if (req->rl_niovs == 0 && rpcrdma_marshal_req(rqst) != 0) {
- r_xprt->rx_stats.failed_marshal_count++;
- dprintk("RPC: %s: rpcrdma_marshal_req failed\n",
- __func__);
- return -EIO;
+ if (req->rl_niovs == 0) {
+ rc = rpcrdma_marshal_req(rqst);
+ if (rc < 0)
+ goto failed_marshal;
}
if (req->rl_reply == NULL) /* e.g. reconnection */
@@ -660,6 +624,12 @@ xprt_rdma_send_request(struct rpc_task *task)
rqst->rq_bytes_sent = 0;
return 0;
+failed_marshal:
+ r_xprt->rx_stats.failed_marshal_count++;
+ dprintk("RPC: %s: rpcrdma_marshal_req failed, status %i\n",
+ __func__, rc);
+ if (rc == -EIO)
+ return -EIO;
drop_connection:
xprt_disconnect_done(xprt);
return -ENOTCONN; /* implies disconnect */
@@ -705,7 +675,7 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
*/
static struct rpc_xprt_ops xprt_rdma_procs = {
- .reserve_xprt = xprt_rdma_reserve_xprt,
+ .reserve_xprt = xprt_reserve_xprt_cong,
.release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */
.alloc_slot = xprt_alloc_slot,
.release_request = xprt_release_rqst_cong, /* ditto */
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 93726560eaa8..13dbd1c389ff 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -48,8 +48,8 @@
*/
#include <linux/interrupt.h>
-#include <linux/pci.h> /* for Tavor hack below */
#include <linux/slab.h>
+#include <asm/bitops.h>
#include "xprt_rdma.h"
@@ -142,98 +142,139 @@ rpcrdma_cq_async_error_upcall(struct ib_event *event, void *context)
}
}
-static inline
-void rpcrdma_event_process(struct ib_wc *wc)
+static void
+rpcrdma_sendcq_process_wc(struct ib_wc *wc)
{
- struct rpcrdma_mw *frmr;
- struct rpcrdma_rep *rep =
- (struct rpcrdma_rep *)(unsigned long) wc->wr_id;
+ struct rpcrdma_mw *frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
- dprintk("RPC: %s: event rep %p status %X opcode %X length %u\n",
- __func__, rep, wc->status, wc->opcode, wc->byte_len);
+ dprintk("RPC: %s: frmr %p status %X opcode %d\n",
+ __func__, frmr, wc->status, wc->opcode);
- if (!rep) /* send or bind completion that we don't care about */
+ if (wc->wr_id == 0ULL)
return;
-
- if (IB_WC_SUCCESS != wc->status) {
- dprintk("RPC: %s: WC opcode %d status %X, connection lost\n",
- __func__, wc->opcode, wc->status);
- rep->rr_len = ~0U;
- if (wc->opcode != IB_WC_FAST_REG_MR && wc->opcode != IB_WC_LOCAL_INV)
- rpcrdma_schedule_tasklet(rep);
+ if (wc->status != IB_WC_SUCCESS)
return;
- }
- switch (wc->opcode) {
- case IB_WC_FAST_REG_MR:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ if (wc->opcode == IB_WC_FAST_REG_MR)
frmr->r.frmr.state = FRMR_IS_VALID;
- break;
- case IB_WC_LOCAL_INV:
- frmr = (struct rpcrdma_mw *)(unsigned long)wc->wr_id;
+ else if (wc->opcode == IB_WC_LOCAL_INV)
frmr->r.frmr.state = FRMR_IS_INVALID;
- break;
- case IB_WC_RECV:
- rep->rr_len = wc->byte_len;
- ib_dma_sync_single_for_cpu(
- rdmab_to_ia(rep->rr_buffer)->ri_id->device,
- rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
- /* Keep (only) the most recent credits, after check validity */
- if (rep->rr_len >= 16) {
- struct rpcrdma_msg *p =
- (struct rpcrdma_msg *) rep->rr_base;
- unsigned int credits = ntohl(p->rm_credit);
- if (credits == 0) {
- dprintk("RPC: %s: server"
- " dropped credits to 0!\n", __func__);
- /* don't deadlock */
- credits = 1;
- } else if (credits > rep->rr_buffer->rb_max_requests) {
- dprintk("RPC: %s: server"
- " over-crediting: %d (%d)\n",
- __func__, credits,
- rep->rr_buffer->rb_max_requests);
- credits = rep->rr_buffer->rb_max_requests;
- }
- atomic_set(&rep->rr_buffer->rb_credits, credits);
- }
- /* fall through */
- case IB_WC_BIND_MW:
- rpcrdma_schedule_tasklet(rep);
- break;
- default:
- dprintk("RPC: %s: unexpected WC event %X\n",
- __func__, wc->opcode);
- break;
- }
}
-static inline int
-rpcrdma_cq_poll(struct ib_cq *cq)
+static int
+rpcrdma_sendcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
{
- struct ib_wc wc;
- int rc;
+ struct ib_wc *wcs;
+ int budget, count, rc;
- for (;;) {
- rc = ib_poll_cq(cq, 1, &wc);
- if (rc < 0) {
- dprintk("RPC: %s: ib_poll_cq failed %i\n",
- __func__, rc);
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_send_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
return rc;
- }
- if (rc == 0)
- break;
- rpcrdma_event_process(&wc);
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_sendcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
+ return 0;
+}
+
+/*
+ * Handle send, fast_reg_mr, and local_inv completions.
+ *
+ * Send events are typically suppressed and thus do not result
+ * in an upcall. Occasionally one is signaled, however. This
+ * prevents the provider's completion queue from wrapping and
+ * losing a completion.
+ */
+static void
+rpcrdma_sendcq_upcall(struct ib_cq *cq, void *cq_context)
+{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
+ int rc;
+
+ rc = rpcrdma_sendcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
+ return;
}
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ return;
+ }
+
+ rpcrdma_sendcq_poll(cq, ep);
+}
+
+static void
+rpcrdma_recvcq_process_wc(struct ib_wc *wc)
+{
+ struct rpcrdma_rep *rep =
+ (struct rpcrdma_rep *)(unsigned long)wc->wr_id;
+
+ dprintk("RPC: %s: rep %p status %X opcode %X length %u\n",
+ __func__, rep, wc->status, wc->opcode, wc->byte_len);
+
+ if (wc->status != IB_WC_SUCCESS) {
+ rep->rr_len = ~0U;
+ goto out_schedule;
+ }
+ if (wc->opcode != IB_WC_RECV)
+ return;
+
+ rep->rr_len = wc->byte_len;
+ ib_dma_sync_single_for_cpu(rdmab_to_ia(rep->rr_buffer)->ri_id->device,
+ rep->rr_iov.addr, rep->rr_len, DMA_FROM_DEVICE);
+
+ if (rep->rr_len >= 16) {
+ struct rpcrdma_msg *p = (struct rpcrdma_msg *)rep->rr_base;
+ unsigned int credits = ntohl(p->rm_credit);
+
+ if (credits == 0)
+ credits = 1; /* don't deadlock */
+ else if (credits > rep->rr_buffer->rb_max_requests)
+ credits = rep->rr_buffer->rb_max_requests;
+ atomic_set(&rep->rr_buffer->rb_credits, credits);
+ }
+
+out_schedule:
+ rpcrdma_schedule_tasklet(rep);
+}
+
+static int
+rpcrdma_recvcq_poll(struct ib_cq *cq, struct rpcrdma_ep *ep)
+{
+ struct ib_wc *wcs;
+ int budget, count, rc;
+
+ budget = RPCRDMA_WC_BUDGET / RPCRDMA_POLLSIZE;
+ do {
+ wcs = ep->rep_recv_wcs;
+
+ rc = ib_poll_cq(cq, RPCRDMA_POLLSIZE, wcs);
+ if (rc <= 0)
+ return rc;
+
+ count = rc;
+ while (count-- > 0)
+ rpcrdma_recvcq_process_wc(wcs++);
+ } while (rc == RPCRDMA_POLLSIZE && --budget);
return 0;
}
/*
- * rpcrdma_cq_event_upcall
+ * Handle receive completions.
*
- * This upcall handles recv, send, bind and unbind events.
* It is reentrant but processes single events in order to maintain
* ordering of receives to keep server credits.
*
@@ -242,26 +283,31 @@ rpcrdma_cq_poll(struct ib_cq *cq)
* connection shutdown. That is, the structures required for
* the completion of the reply handler must remain intact until
* all memory has been reclaimed.
- *
- * Note that send events are suppressed and do not result in an upcall.
*/
static void
-rpcrdma_cq_event_upcall(struct ib_cq *cq, void *context)
+rpcrdma_recvcq_upcall(struct ib_cq *cq, void *cq_context)
{
+ struct rpcrdma_ep *ep = (struct rpcrdma_ep *)cq_context;
int rc;
- rc = rpcrdma_cq_poll(cq);
- if (rc)
+ rc = rpcrdma_recvcq_poll(cq, ep);
+ if (rc) {
+ dprintk("RPC: %s: ib_poll_cq failed: %i\n",
+ __func__, rc);
return;
+ }
- rc = ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
- if (rc) {
- dprintk("RPC: %s: ib_req_notify_cq failed %i\n",
+ rc = ib_req_notify_cq(cq,
+ IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS);
+ if (rc == 0)
+ return;
+ if (rc < 0) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
return;
}
- rpcrdma_cq_poll(cq);
+ rpcrdma_recvcq_poll(cq, ep);
}
#ifdef RPC_DEBUG
@@ -493,54 +539,32 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
ia->ri_dma_lkey = ia->ri_id->device->local_dma_lkey;
}
- switch (memreg) {
- case RPCRDMA_MEMWINDOWS:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- if (!(devattr.device_cap_flags & IB_DEVICE_MEM_WINDOW)) {
- dprintk("RPC: %s: MEMWINDOWS registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
- }
- break;
- case RPCRDMA_MTHCAFMR:
- if (!ia->ri_id->device->alloc_fmr) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
- memreg = RPCRDMA_ALLPHYSICAL;
-#else
- dprintk("RPC: %s: MTHCAFMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
-#endif
- }
- break;
- case RPCRDMA_FRMR:
+ if (memreg == RPCRDMA_FRMR) {
/* Requires both frmr reg and local dma lkey */
if ((devattr.device_cap_flags &
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) !=
(IB_DEVICE_MEM_MGT_EXTENSIONS|IB_DEVICE_LOCAL_DMA_LKEY)) {
-#if RPCRDMA_PERSISTENT_REGISTRATION
dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using riskier RPCRDMA_ALLPHYSICAL\n",
- __func__);
+ "not supported by HCA\n", __func__);
+ memreg = RPCRDMA_MTHCAFMR;
+ } else {
+ /* Mind the ia limit on FRMR page list depth */
+ ia->ri_max_frmr_depth = min_t(unsigned int,
+ RPCRDMA_MAX_DATA_SEGS,
+ devattr.max_fast_reg_page_list_len);
+ }
+ }
+ if (memreg == RPCRDMA_MTHCAFMR) {
+ if (!ia->ri_id->device->alloc_fmr) {
+ dprintk("RPC: %s: MTHCAFMR registration "
+ "not supported by HCA\n", __func__);
+#if RPCRDMA_PERSISTENT_REGISTRATION
memreg = RPCRDMA_ALLPHYSICAL;
#else
- dprintk("RPC: %s: FRMR registration "
- "specified but not supported by adapter, "
- "using slower RPCRDMA_REGISTER\n",
- __func__);
- memreg = RPCRDMA_REGISTER;
+ rc = -ENOMEM;
+ goto out2;
#endif
}
- break;
}
/*
@@ -552,8 +576,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
* adapter.
*/
switch (memreg) {
- case RPCRDMA_BOUNCEBUFFERS:
- case RPCRDMA_REGISTER:
case RPCRDMA_FRMR:
break;
#if RPCRDMA_PERSISTENT_REGISTRATION
@@ -563,30 +585,26 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
IB_ACCESS_REMOTE_READ;
goto register_setup;
#endif
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- mem_priv = IB_ACCESS_LOCAL_WRITE |
- IB_ACCESS_MW_BIND;
- goto register_setup;
case RPCRDMA_MTHCAFMR:
if (ia->ri_have_dma_lkey)
break;
mem_priv = IB_ACCESS_LOCAL_WRITE;
+#if RPCRDMA_PERSISTENT_REGISTRATION
register_setup:
+#endif
ia->ri_bind_mem = ib_get_dma_mr(ia->ri_pd, mem_priv);
if (IS_ERR(ia->ri_bind_mem)) {
printk(KERN_ALERT "%s: ib_get_dma_mr for "
- "phys register failed with %lX\n\t"
- "Will continue with degraded performance\n",
+ "phys register failed with %lX\n",
__func__, PTR_ERR(ia->ri_bind_mem));
- memreg = RPCRDMA_REGISTER;
- ia->ri_bind_mem = NULL;
+ rc = -ENOMEM;
+ goto out2;
}
break;
default:
- printk(KERN_ERR "%s: invalid memory registration mode %d\n",
- __func__, memreg);
- rc = -EINVAL;
+ printk(KERN_ERR "RPC: Unsupported memory "
+ "registration mode: %d\n", memreg);
+ rc = -ENOMEM;
goto out2;
}
dprintk("RPC: %s: memory registration strategy is %d\n",
@@ -640,6 +658,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
struct rpcrdma_create_data_internal *cdata)
{
struct ib_device_attr devattr;
+ struct ib_cq *sendcq, *recvcq;
int rc, err;
rc = ib_query_device(ia->ri_id->device, &devattr);
@@ -659,32 +678,42 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_wr = cdata->max_requests;
switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
+ case RPCRDMA_FRMR: {
+ int depth = 7;
+
/* Add room for frmr register and invalidate WRs.
* 1. FRMR reg WR for head
* 2. FRMR invalidate WR for head
- * 3. FRMR reg WR for pagelist
- * 4. FRMR invalidate WR for pagelist
+ * 3. N FRMR reg WRs for pagelist
+ * 4. N FRMR invalidate WRs for pagelist
* 5. FRMR reg WR for tail
* 6. FRMR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
- ep->rep_attr.cap.max_send_wr *= 7;
+
+ /* Calculate N if the device max FRMR depth is smaller than
+ * RPCRDMA_MAX_DATA_SEGS.
+ */
+ if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ int delta = RPCRDMA_MAX_DATA_SEGS -
+ ia->ri_max_frmr_depth;
+
+ do {
+ depth += 2; /* FRMR reg + invalidate */
+ delta -= ia->ri_max_frmr_depth;
+ } while (delta > 0);
+
+ }
+ ep->rep_attr.cap.max_send_wr *= depth;
if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr) {
- cdata->max_requests = devattr.max_qp_wr / 7;
+ cdata->max_requests = devattr.max_qp_wr / depth;
if (!cdata->max_requests)
return -EINVAL;
- ep->rep_attr.cap.max_send_wr = cdata->max_requests * 7;
+ ep->rep_attr.cap.max_send_wr = cdata->max_requests *
+ depth;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Add room for mw_binds+unbinds - overkill! */
- ep->rep_attr.cap.max_send_wr++;
- ep->rep_attr.cap.max_send_wr *= (2 * RPCRDMA_MAX_SEGS);
- if (ep->rep_attr.cap.max_send_wr > devattr.max_qp_wr)
- return -EINVAL;
- break;
+ }
default:
break;
}
@@ -705,46 +734,51 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
ep->rep_attr.cap.max_recv_sge);
/* set trigger for requesting send completion */
- ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 /* - 1*/;
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- ep->rep_cqinit -= RPCRDMA_MAX_SEGS;
- break;
- default:
- break;
- }
+ ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1;
if (ep->rep_cqinit <= 2)
ep->rep_cqinit = 0;
INIT_CQCOUNT(ep);
ep->rep_ia = ia;
init_waitqueue_head(&ep->rep_connect_wait);
+ INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
- /*
- * Create a single cq for receive dto and mw_bind (only ever
- * care about unbind, really). Send completions are suppressed.
- * Use single threaded tasklet upcalls to maintain ordering.
- */
- ep->rep_cq = ib_create_cq(ia->ri_id->device, rpcrdma_cq_event_upcall,
- rpcrdma_cq_async_error_upcall, NULL,
- ep->rep_attr.cap.max_recv_wr +
+ sendcq = ib_create_cq(ia->ri_id->device, rpcrdma_sendcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
ep->rep_attr.cap.max_send_wr + 1, 0);
- if (IS_ERR(ep->rep_cq)) {
- rc = PTR_ERR(ep->rep_cq);
- dprintk("RPC: %s: ib_create_cq failed: %i\n",
+ if (IS_ERR(sendcq)) {
+ rc = PTR_ERR(sendcq);
+ dprintk("RPC: %s: failed to create send CQ: %i\n",
__func__, rc);
goto out1;
}
- rc = ib_req_notify_cq(ep->rep_cq, IB_CQ_NEXT_COMP);
+ rc = ib_req_notify_cq(sendcq, IB_CQ_NEXT_COMP);
+ if (rc) {
+ dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ recvcq = ib_create_cq(ia->ri_id->device, rpcrdma_recvcq_upcall,
+ rpcrdma_cq_async_error_upcall, ep,
+ ep->rep_attr.cap.max_recv_wr + 1, 0);
+ if (IS_ERR(recvcq)) {
+ rc = PTR_ERR(recvcq);
+ dprintk("RPC: %s: failed to create recv CQ: %i\n",
+ __func__, rc);
+ goto out2;
+ }
+
+ rc = ib_req_notify_cq(recvcq, IB_CQ_NEXT_COMP);
if (rc) {
dprintk("RPC: %s: ib_req_notify_cq failed: %i\n",
__func__, rc);
+ ib_destroy_cq(recvcq);
goto out2;
}
- ep->rep_attr.send_cq = ep->rep_cq;
- ep->rep_attr.recv_cq = ep->rep_cq;
+ ep->rep_attr.send_cq = sendcq;
+ ep->rep_attr.recv_cq = recvcq;
/* Initialize cma parameters */
@@ -754,9 +788,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
/* Client offers RDMA Read but does not initiate */
ep->rep_remote_cma.initiator_depth = 0;
- if (ia->ri_memreg_strategy == RPCRDMA_BOUNCEBUFFERS)
- ep->rep_remote_cma.responder_resources = 0;
- else if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
+ if (devattr.max_qp_rd_atom > 32) /* arbitrary but <= 255 */
ep->rep_remote_cma.responder_resources = 32;
else
ep->rep_remote_cma.responder_resources = devattr.max_qp_rd_atom;
@@ -768,7 +800,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia,
return 0;
out2:
- err = ib_destroy_cq(ep->rep_cq);
+ err = ib_destroy_cq(sendcq);
if (err)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, err);
@@ -782,11 +814,8 @@ out1:
* Disconnect and destroy endpoint. After this, the only
* valid operations on the ep are to free it (if dynamically
* allocated) or re-create it.
- *
- * The caller's error handling must be sure to not leak the endpoint
- * if this function fails.
*/
-int
+void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
@@ -794,6 +823,8 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
dprintk("RPC: %s: entering, connected is %d\n",
__func__, ep->rep_connected);
+ cancel_delayed_work_sync(&ep->rep_connect_worker);
+
if (ia->ri_id->qp) {
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc)
@@ -809,13 +840,17 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
ep->rep_pad_mr = NULL;
}
- rpcrdma_clean_cq(ep->rep_cq);
- rc = ib_destroy_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rc = ib_destroy_cq(ep->rep_attr.recv_cq);
if (rc)
dprintk("RPC: %s: ib_destroy_cq returned %i\n",
__func__, rc);
- return rc;
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
+ rc = ib_destroy_cq(ep->rep_attr.send_cq);
+ if (rc)
+ dprintk("RPC: %s: ib_destroy_cq returned %i\n",
+ __func__, rc);
}
/*
@@ -831,17 +866,20 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
if (ep->rep_connected != 0) {
struct rpcrdma_xprt *xprt;
retry:
+ dprintk("RPC: %s: reconnecting...\n", __func__);
rc = rpcrdma_ep_disconnect(ep, ia);
if (rc && rc != -ENOTCONN)
dprintk("RPC: %s: rpcrdma_ep_disconnect"
" status %i\n", __func__, rc);
- rpcrdma_clean_cq(ep->rep_cq);
+
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
xprt = container_of(ia, struct rpcrdma_xprt, rx_ia);
id = rpcrdma_create_id(xprt, ia,
(struct sockaddr *)&xprt->rx_data.addr);
if (IS_ERR(id)) {
- rc = PTR_ERR(id);
+ rc = -EHOSTUNREACH;
goto out;
}
/* TEMP TEMP TEMP - fail if new device:
@@ -855,35 +893,32 @@ retry:
printk("RPC: %s: can't reconnect on "
"different device!\n", __func__);
rdma_destroy_id(id);
- rc = -ENETDOWN;
+ rc = -ENETUNREACH;
goto out;
}
/* END TEMP */
+ rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ rdma_destroy_id(id);
+ rc = -ENETUNREACH;
+ goto out;
+ }
rdma_destroy_qp(ia->ri_id);
rdma_destroy_id(ia->ri_id);
ia->ri_id = id;
+ } else {
+ dprintk("RPC: %s: connecting...\n", __func__);
+ rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
+ if (rc) {
+ dprintk("RPC: %s: rdma_create_qp failed %i\n",
+ __func__, rc);
+ /* do not update ep->rep_connected */
+ return -ENETUNREACH;
+ }
}
- rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
- if (rc) {
- dprintk("RPC: %s: rdma_create_qp failed %i\n",
- __func__, rc);
- goto out;
- }
-
-/* XXX Tavor device performs badly with 2K MTU! */
-if (strnicmp(ia->ri_id->device->dma_device->bus->name, "pci", 3) == 0) {
- struct pci_dev *pcid = to_pci_dev(ia->ri_id->device->dma_device);
- if (pcid->device == PCI_DEVICE_ID_MELLANOX_TAVOR &&
- (pcid->vendor == PCI_VENDOR_ID_MELLANOX ||
- pcid->vendor == PCI_VENDOR_ID_TOPSPIN)) {
- struct ib_qp_attr attr = {
- .path_mtu = IB_MTU_1024
- };
- rc = ib_modify_qp(ia->ri_id->qp, &attr, IB_QP_PATH_MTU);
- }
-}
-
ep->rep_connected = 0;
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
@@ -944,7 +979,8 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
int rc;
- rpcrdma_clean_cq(ep->rep_cq);
+ rpcrdma_clean_cq(ep->rep_attr.recv_cq);
+ rpcrdma_clean_cq(ep->rep_attr.send_cq);
rc = rdma_disconnect(ia->ri_id);
if (!rc) {
/* returns without wait if not connected */
@@ -967,7 +1003,7 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
struct rpcrdma_ia *ia, struct rpcrdma_create_data_internal *cdata)
{
char *p;
- size_t len;
+ size_t len, rlen, wlen;
int i, rc;
struct rpcrdma_mw *r;
@@ -997,11 +1033,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
sizeof(struct rpcrdma_mw);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- len += (buf->rb_max_requests + 1) * RPCRDMA_MAX_SEGS *
- sizeof(struct rpcrdma_mw);
- break;
default:
break;
}
@@ -1032,32 +1063,29 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
}
p += cdata->padding;
- /*
- * Allocate the fmr's, or mw's for mw_bind chunk registration.
- * We "cycle" the mw's in order to minimize rkey reuse,
- * and also reduce unbind-to-bind collision.
- */
INIT_LIST_HEAD(&buf->rb_mws);
r = (struct rpcrdma_mw *)p;
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
for (i = buf->rb_max_requests * RPCRDMA_MAX_SEGS; i; i--) {
r->r.frmr.fr_mr = ib_alloc_fast_reg_mr(ia->ri_pd,
- RPCRDMA_MAX_SEGS);
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_mr)) {
rc = PTR_ERR(r->r.frmr.fr_mr);
dprintk("RPC: %s: ib_alloc_fast_reg_mr"
" failed %i\n", __func__, rc);
goto out;
}
- r->r.frmr.fr_pgl =
- ib_alloc_fast_reg_page_list(ia->ri_id->device,
- RPCRDMA_MAX_SEGS);
+ r->r.frmr.fr_pgl = ib_alloc_fast_reg_page_list(
+ ia->ri_id->device,
+ ia->ri_max_frmr_depth);
if (IS_ERR(r->r.frmr.fr_pgl)) {
rc = PTR_ERR(r->r.frmr.fr_pgl);
dprintk("RPC: %s: "
"ib_alloc_fast_reg_page_list "
"failed %i\n", __func__, rc);
+
+ ib_dereg_mr(r->r.frmr.fr_mr);
goto out;
}
list_add(&r->mw_list, &buf->rb_mws);
@@ -1082,21 +1110,6 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
++r;
}
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- /* Allocate one extra request's worth, for full cycling */
- for (i = (buf->rb_max_requests+1) * RPCRDMA_MAX_SEGS; i; i--) {
- r->r.mw = ib_alloc_mw(ia->ri_pd, IB_MW_TYPE_1);
- if (IS_ERR(r->r.mw)) {
- rc = PTR_ERR(r->r.mw);
- dprintk("RPC: %s: ib_alloc_mw"
- " failed %i\n", __func__, rc);
- goto out;
- }
- list_add(&r->mw_list, &buf->rb_mws);
- ++r;
- }
- break;
default:
break;
}
@@ -1105,16 +1118,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
* Allocate/init the request/reply buffers. Doing this
* using kmalloc for now -- one for each buf.
*/
+ wlen = 1 << fls(cdata->inline_wsize + sizeof(struct rpcrdma_req));
+ rlen = 1 << fls(cdata->inline_rsize + sizeof(struct rpcrdma_rep));
+ dprintk("RPC: %s: wlen = %zu, rlen = %zu\n",
+ __func__, wlen, rlen);
+
for (i = 0; i < buf->rb_max_requests; i++) {
struct rpcrdma_req *req;
struct rpcrdma_rep *rep;
- len = cdata->inline_wsize + sizeof(struct rpcrdma_req);
- /* RPC layer requests *double* size + 1K RPC_SLACK_SPACE! */
- /* Typical ~2400b, so rounding up saves work later */
- if (len < 4096)
- len = 4096;
- req = kmalloc(len, GFP_KERNEL);
+ req = kmalloc(wlen, GFP_KERNEL);
if (req == NULL) {
dprintk("RPC: %s: request buffer %d alloc"
" failed\n", __func__, i);
@@ -1126,16 +1139,16 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
buf->rb_send_bufs[i]->rl_buffer = buf;
rc = rpcrdma_register_internal(ia, req->rl_base,
- len - offsetof(struct rpcrdma_req, rl_base),
+ wlen - offsetof(struct rpcrdma_req, rl_base),
&buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
if (rc)
goto out;
- buf->rb_send_bufs[i]->rl_size = len-sizeof(struct rpcrdma_req);
+ buf->rb_send_bufs[i]->rl_size = wlen -
+ sizeof(struct rpcrdma_req);
- len = cdata->inline_rsize + sizeof(struct rpcrdma_rep);
- rep = kmalloc(len, GFP_KERNEL);
+ rep = kmalloc(rlen, GFP_KERNEL);
if (rep == NULL) {
dprintk("RPC: %s: reply buffer %d alloc failed\n",
__func__, i);
@@ -1145,10 +1158,9 @@ rpcrdma_buffer_create(struct rpcrdma_buffer *buf, struct rpcrdma_ep *ep,
memset(rep, 0, sizeof(struct rpcrdma_rep));
buf->rb_recv_bufs[i] = rep;
buf->rb_recv_bufs[i]->rr_buffer = buf;
- init_waitqueue_head(&rep->rr_unbind);
rc = rpcrdma_register_internal(ia, rep->rr_base,
- len - offsetof(struct rpcrdma_rep, rr_base),
+ rlen - offsetof(struct rpcrdma_rep, rr_base),
&buf->rb_recv_bufs[i]->rr_handle,
&buf->rb_recv_bufs[i]->rr_iov);
if (rc)
@@ -1179,7 +1191,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
/* clean up in reverse order from create
* 1. recv mr memory (mr free, then kfree)
- * 1a. bind mw memory
* 2. send mr memory (mr free, then kfree)
* 3. padding (if any) [moved to rpcrdma_ep_destroy]
* 4. arrays
@@ -1194,41 +1205,6 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
kfree(buf->rb_recv_bufs[i]);
}
if (buf->rb_send_bufs && buf->rb_send_bufs[i]) {
- while (!list_empty(&buf->rb_mws)) {
- r = list_entry(buf->rb_mws.next,
- struct rpcrdma_mw, mw_list);
- list_del(&r->mw_list);
- switch (ia->ri_memreg_strategy) {
- case RPCRDMA_FRMR:
- rc = ib_dereg_mr(r->r.frmr.fr_mr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dereg_mr"
- " failed %i\n",
- __func__, rc);
- ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
- break;
- case RPCRDMA_MTHCAFMR:
- rc = ib_dealloc_fmr(r->r.fmr);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_fmr"
- " failed %i\n",
- __func__, rc);
- break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = ib_dealloc_mw(r->r.mw);
- if (rc)
- dprintk("RPC: %s:"
- " ib_dealloc_mw"
- " failed %i\n",
- __func__, rc);
- break;
- default:
- break;
- }
- }
rpcrdma_deregister_internal(ia,
buf->rb_send_bufs[i]->rl_handle,
&buf->rb_send_bufs[i]->rl_iov);
@@ -1236,6 +1212,33 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
}
}
+ while (!list_empty(&buf->rb_mws)) {
+ r = list_entry(buf->rb_mws.next,
+ struct rpcrdma_mw, mw_list);
+ list_del(&r->mw_list);
+ switch (ia->ri_memreg_strategy) {
+ case RPCRDMA_FRMR:
+ rc = ib_dereg_mr(r->r.frmr.fr_mr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dereg_mr"
+ " failed %i\n",
+ __func__, rc);
+ ib_free_fast_reg_page_list(r->r.frmr.fr_pgl);
+ break;
+ case RPCRDMA_MTHCAFMR:
+ rc = ib_dealloc_fmr(r->r.fmr);
+ if (rc)
+ dprintk("RPC: %s:"
+ " ib_dealloc_fmr"
+ " failed %i\n",
+ __func__, rc);
+ break;
+ default:
+ break;
+ }
+ }
+
kfree(buf->rb_pool);
}
@@ -1299,21 +1302,17 @@ rpcrdma_buffer_put(struct rpcrdma_req *req)
int i;
unsigned long flags;
- BUG_ON(req->rl_nchunks != 0);
spin_lock_irqsave(&buffers->rb_lock, flags);
buffers->rb_send_bufs[--buffers->rb_send_index] = req;
req->rl_niovs = 0;
if (req->rl_reply) {
buffers->rb_recv_bufs[--buffers->rb_recv_index] = req->rl_reply;
- init_waitqueue_head(&req->rl_reply->rr_unbind);
req->rl_reply->rr_func = NULL;
req->rl_reply = NULL;
}
switch (ia->ri_memreg_strategy) {
case RPCRDMA_FRMR:
case RPCRDMA_MTHCAFMR:
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
/*
* Cycle mw's back in reverse order, and "spin" them.
* This delays and scrambles reuse as much as possible.
@@ -1358,8 +1357,7 @@ rpcrdma_recv_buffer_get(struct rpcrdma_req *req)
/*
* Put reply buffers back into pool when not attached to
- * request. This happens in error conditions, and when
- * aborting unbinds. Pre-decrement counter/array index.
+ * request. This happens in error conditions.
*/
void
rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
@@ -1498,8 +1496,8 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
seg1->mr_offset -= pageoff; /* start of page */
seg1->mr_len += pageoff;
len = -pageoff;
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
+ if (*nsegs > ia->ri_max_frmr_depth)
+ *nsegs = ia->ri_max_frmr_depth;
for (page_no = i = 0; i < *nsegs;) {
rpcrdma_map_one(ia, seg, writing);
pa = seg->mr_dma;
@@ -1536,10 +1534,6 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
} else
post_wr = &frmr_wr;
- /* Bump the key */
- key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
-
/* Prepare FRMR WR */
memset(&frmr_wr, 0, sizeof frmr_wr);
frmr_wr.wr_id = (unsigned long)(void *)seg1->mr_chunk.rl_mw;
@@ -1550,7 +1544,16 @@ rpcrdma_register_frmr_external(struct rpcrdma_mr_seg *seg,
frmr_wr.wr.fast_reg.page_list_len = page_no;
frmr_wr.wr.fast_reg.page_shift = PAGE_SHIFT;
frmr_wr.wr.fast_reg.length = page_no << PAGE_SHIFT;
- BUG_ON(frmr_wr.wr.fast_reg.length < len);
+ if (frmr_wr.wr.fast_reg.length < len) {
+ while (seg1->mr_nsegs--)
+ rpcrdma_unmap_one(ia, seg++);
+ return -EIO;
+ }
+
+ /* Bump the key */
+ key = (u8)(seg1->mr_chunk.rl_mw->r.frmr.fr_mr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(seg1->mr_chunk.rl_mw->r.frmr.fr_mr, ++key);
+
frmr_wr.wr.fast_reg.access_flags = (writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ);
@@ -1661,135 +1664,6 @@ rpcrdma_deregister_fmr_external(struct rpcrdma_mr_seg *seg,
return rc;
}
-static int
-rpcrdma_register_memwin_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct ib_mw_bind param;
- int rc;
-
- *nsegs = 1;
- rpcrdma_map_one(ia, seg, writing);
- param.bind_info.mr = ia->ri_bind_mem;
- param.wr_id = 0ULL; /* no send cookie */
- param.bind_info.addr = seg->mr_dma;
- param.bind_info.length = seg->mr_len;
- param.send_flags = 0;
- param.bind_info.mw_access_flags = mem_priv;
-
- DECR_CQCOUNT(&r_xprt->rx_ep);
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- if (rc) {
- dprintk("RPC: %s: failed ib_bind_mw "
- "%u@0x%llx status %i\n",
- __func__, seg->mr_len,
- (unsigned long long)seg->mr_dma, rc);
- rpcrdma_unmap_one(ia, seg);
- } else {
- seg->mr_rkey = seg->mr_chunk.rl_mw->r.mw->rkey;
- seg->mr_base = param.bind_info.addr;
- seg->mr_nsegs = 1;
- }
- return rc;
-}
-
-static int
-rpcrdma_deregister_memwin_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia,
- struct rpcrdma_xprt *r_xprt, void **r)
-{
- struct ib_mw_bind param;
- LIST_HEAD(l);
- int rc;
-
- BUG_ON(seg->mr_nsegs != 1);
- param.bind_info.mr = ia->ri_bind_mem;
- param.bind_info.addr = 0ULL; /* unbind */
- param.bind_info.length = 0;
- param.bind_info.mw_access_flags = 0;
- if (*r) {
- param.wr_id = (u64) (unsigned long) *r;
- param.send_flags = IB_SEND_SIGNALED;
- INIT_CQCOUNT(&r_xprt->rx_ep);
- } else {
- param.wr_id = 0ULL;
- param.send_flags = 0;
- DECR_CQCOUNT(&r_xprt->rx_ep);
- }
- rc = ib_bind_mw(ia->ri_id->qp, seg->mr_chunk.rl_mw->r.mw, &param);
- rpcrdma_unmap_one(ia, seg);
- if (rc)
- dprintk("RPC: %s: failed ib_(un)bind_mw,"
- " status %i\n", __func__, rc);
- else
- *r = NULL; /* will upcall on completion */
- return rc;
-}
-
-static int
-rpcrdma_register_default_external(struct rpcrdma_mr_seg *seg,
- int *nsegs, int writing, struct rpcrdma_ia *ia)
-{
- int mem_priv = (writing ? IB_ACCESS_REMOTE_WRITE :
- IB_ACCESS_REMOTE_READ);
- struct rpcrdma_mr_seg *seg1 = seg;
- struct ib_phys_buf ipb[RPCRDMA_MAX_DATA_SEGS];
- int len, i, rc = 0;
-
- if (*nsegs > RPCRDMA_MAX_DATA_SEGS)
- *nsegs = RPCRDMA_MAX_DATA_SEGS;
- for (len = 0, i = 0; i < *nsegs;) {
- rpcrdma_map_one(ia, seg, writing);
- ipb[i].addr = seg->mr_dma;
- ipb[i].size = seg->mr_len;
- len += seg->mr_len;
- ++seg;
- ++i;
- /* Check for holes */
- if ((i < *nsegs && offset_in_page(seg->mr_offset)) ||
- offset_in_page((seg-1)->mr_offset+(seg-1)->mr_len))
- break;
- }
- seg1->mr_base = seg1->mr_dma;
- seg1->mr_chunk.rl_mr = ib_reg_phys_mr(ia->ri_pd,
- ipb, i, mem_priv, &seg1->mr_base);
- if (IS_ERR(seg1->mr_chunk.rl_mr)) {
- rc = PTR_ERR(seg1->mr_chunk.rl_mr);
- dprintk("RPC: %s: failed ib_reg_phys_mr "
- "%u@0x%llx (%d)... status %i\n",
- __func__, len,
- (unsigned long long)seg1->mr_dma, i, rc);
- while (i--)
- rpcrdma_unmap_one(ia, --seg);
- } else {
- seg1->mr_rkey = seg1->mr_chunk.rl_mr->rkey;
- seg1->mr_nsegs = i;
- seg1->mr_len = len;
- }
- *nsegs = i;
- return rc;
-}
-
-static int
-rpcrdma_deregister_default_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_ia *ia)
-{
- struct rpcrdma_mr_seg *seg1 = seg;
- int rc;
-
- rc = ib_dereg_mr(seg1->mr_chunk.rl_mr);
- seg1->mr_chunk.rl_mr = NULL;
- while (seg1->mr_nsegs--)
- rpcrdma_unmap_one(ia, seg++);
- if (rc)
- dprintk("RPC: %s: failed ib_dereg_mr,"
- " status %i\n", __func__, rc);
- return rc;
-}
-
int
rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int nsegs, int writing, struct rpcrdma_xprt *r_xprt)
@@ -1819,16 +1693,8 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_register_fmr_external(seg, &nsegs, writing, ia);
break;
- /* Registration using memory windows */
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_register_memwin_external(seg, &nsegs, writing, ia, r_xprt);
- break;
-
- /* Default registration each time */
default:
- rc = rpcrdma_register_default_external(seg, &nsegs, writing, ia);
- break;
+ return -1;
}
if (rc)
return -1;
@@ -1838,7 +1704,7 @@ rpcrdma_register_external(struct rpcrdma_mr_seg *seg,
int
rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
- struct rpcrdma_xprt *r_xprt, void *r)
+ struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int nsegs = seg->mr_nsegs, rc;
@@ -1847,9 +1713,7 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
#if RPCRDMA_PERSISTENT_REGISTRATION
case RPCRDMA_ALLPHYSICAL:
- BUG_ON(nsegs != 1);
rpcrdma_unmap_one(ia, seg);
- rc = 0;
break;
#endif
@@ -1861,21 +1725,9 @@ rpcrdma_deregister_external(struct rpcrdma_mr_seg *seg,
rc = rpcrdma_deregister_fmr_external(seg, ia);
break;
- case RPCRDMA_MEMWINDOWS_ASYNC:
- case RPCRDMA_MEMWINDOWS:
- rc = rpcrdma_deregister_memwin_external(seg, ia, r_xprt, &r);
- break;
-
default:
- rc = rpcrdma_deregister_default_external(seg, ia);
break;
}
- if (r) {
- struct rpcrdma_rep *rep = r;
- void (*func)(struct rpcrdma_rep *) = rep->rr_func;
- rep->rr_func = NULL;
- func(rep); /* dereg done, callback now */
- }
return nsegs;
}
@@ -1950,7 +1802,6 @@ rpcrdma_ep_post_recv(struct rpcrdma_ia *ia,
ib_dma_sync_single_for_cpu(ia->ri_id->device,
rep->rr_iov.addr, rep->rr_iov.length, DMA_BIDIRECTIONAL);
- DECR_CQCOUNT(ep);
rc = ib_post_recv(ia->ri_id->qp, &recv_wr, &recv_wr_fail);
if (rc)
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index cc1445dc1d1a..89e7cd479705 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -43,6 +43,7 @@
#include <linux/wait.h> /* wait_queue_head_t, etc */
#include <linux/spinlock.h> /* spinlock_t, etc */
#include <linux/atomic.h> /* atomic_t, etc */
+#include <linux/workqueue.h> /* struct work_struct */
#include <rdma/rdma_cm.h> /* RDMA connection api */
#include <rdma/ib_verbs.h> /* RDMA verbs api */
@@ -66,18 +67,21 @@ struct rpcrdma_ia {
struct completion ri_done;
int ri_async_rc;
enum rpcrdma_memreg ri_memreg_strategy;
+ unsigned int ri_max_frmr_depth;
};
/*
* RDMA Endpoint -- one per transport instance
*/
+#define RPCRDMA_WC_BUDGET (128)
+#define RPCRDMA_POLLSIZE (16)
+
struct rpcrdma_ep {
atomic_t rep_cqcount;
int rep_cqinit;
int rep_connected;
struct rpcrdma_ia *rep_ia;
- struct ib_cq *rep_cq;
struct ib_qp_init_attr rep_attr;
wait_queue_head_t rep_connect_wait;
struct ib_sge rep_pad; /* holds zeroed pad */
@@ -86,6 +90,9 @@ struct rpcrdma_ep {
struct rpc_xprt *rep_xprt; /* for rep_func */
struct rdma_conn_param rep_remote_cma;
struct sockaddr_storage rep_remote_addr;
+ struct delayed_work rep_connect_worker;
+ struct ib_wc rep_send_wcs[RPCRDMA_POLLSIZE];
+ struct ib_wc rep_recv_wcs[RPCRDMA_POLLSIZE];
};
#define INIT_CQCOUNT(ep) atomic_set(&(ep)->rep_cqcount, (ep)->rep_cqinit)
@@ -124,7 +131,6 @@ struct rpcrdma_rep {
struct rpc_xprt *rr_xprt; /* needed for request/reply matching */
void (*rr_func)(struct rpcrdma_rep *);/* called by tasklet in softint */
struct list_head rr_list; /* tasklet list */
- wait_queue_head_t rr_unbind; /* optional unbind wait */
struct ib_sge rr_iov; /* for posting */
struct ib_mr *rr_handle; /* handle for mem in rr_iov */
char rr_base[MAX_RPCRDMAHDR]; /* minimal inline receive buffer */
@@ -159,7 +165,6 @@ struct rpcrdma_mr_seg { /* chunk descriptors */
struct ib_mr *rl_mr; /* if registered directly */
struct rpcrdma_mw { /* if registered from region */
union {
- struct ib_mw *mw;
struct ib_fmr *fmr;
struct {
struct ib_fast_reg_page_list *fr_pgl;
@@ -207,7 +212,6 @@ struct rpcrdma_req {
struct rpcrdma_buffer {
spinlock_t rb_lock; /* protects indexes */
atomic_t rb_credits; /* most recent server credits */
- unsigned long rb_cwndscale; /* cached framework rpc_cwndscale */
int rb_max_requests;/* client max requests */
struct list_head rb_mws; /* optional memory windows/fmrs/frmrs */
int rb_send_index;
@@ -300,7 +304,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *);
*/
int rpcrdma_ep_create(struct rpcrdma_ep *, struct rpcrdma_ia *,
struct rpcrdma_create_data_internal *);
-int rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
+void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
@@ -330,11 +334,12 @@ int rpcrdma_deregister_internal(struct rpcrdma_ia *,
int rpcrdma_register_external(struct rpcrdma_mr_seg *,
int, int, struct rpcrdma_xprt *);
int rpcrdma_deregister_external(struct rpcrdma_mr_seg *,
- struct rpcrdma_xprt *, void *);
+ struct rpcrdma_xprt *);
/*
* RPC/RDMA connection management calls - xprtrdma/rpc_rdma.c
*/
+void rpcrdma_connect_worker(struct work_struct *);
void rpcrdma_conn_func(struct rpcrdma_ep *);
void rpcrdma_reply_handler(struct rpcrdma_rep *);
diff --git a/scripts/decode_stacktrace.sh b/scripts/decode_stacktrace.sh
new file mode 100755
index 000000000000..515c4c00e957
--- /dev/null
+++ b/scripts/decode_stacktrace.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+# (c) 2014, Sasha Levin <sasha.levin@oracle.com>
+#set -x
+
+if [[ $# != 2 ]]; then
+ echo "Usage:"
+ echo " $0 [vmlinux] [base path]"
+ exit 1
+fi
+
+vmlinux=$1
+basepath=$2
+declare -A cache
+
+parse_symbol() {
+ # The structure of symbol at this point is:
+ # [name]+[offset]/[total length]
+ #
+ # For example:
+ # do_basic_setup+0x9c/0xbf
+
+
+ # Strip the symbol name so that we could look it up
+ local name=${symbol%+*}
+
+ # Use 'nm vmlinux' to figure out the base address of said symbol.
+ # It's actually faster to call it every time than to load it
+ # all into bash.
+ if [[ "${cache[$name]+isset}" == "isset" ]]; then
+ local base_addr=${cache[$name]}
+ else
+ local base_addr=$(nm "$vmlinux" | grep -i ' t ' | awk "/ $name\$/ {print \$1}" | head -n1)
+ cache["$name"]="$base_addr"
+ fi
+ # Let's start doing the math to get the exact address into the
+ # symbol. First, strip out the symbol total length.
+ local expr=${symbol%/*}
+
+ # Now, replace the symbol name with the base address we found
+ # before.
+ expr=${expr/$name/0x$base_addr}
+
+ # Evaluate it to find the actual address
+ expr=$((expr))
+ local address=$(printf "%x\n" "$expr")
+
+ # Pass it to addr2line to get filename and line number
+ # Could get more than one result
+ if [[ "${cache[$address]+isset}" == "isset" ]]; then
+ local code=${cache[$address]}
+ else
+ local code=$(addr2line -i -e "$vmlinux" "$address")
+ cache[$address]=$code
+ fi
+
+ # addr2line doesn't return a proper error code if it fails, so
+ # we detect it using the value it prints so that we could preserve
+ # the offset/size into the function and bail out
+ if [[ $code == "??:0" ]]; then
+ return
+ fi
+
+ # Strip out the base of the path
+ code=${code//$basepath/""}
+
+ # In the case of inlines, move everything to same line
+ code=${code//$'\n'/' '}
+
+ # Replace old address with pretty line numbers
+ symbol="$name ($code)"
+}
+
+decode_code() {
+ local scripts=`dirname "${BASH_SOURCE[0]}"`
+
+ echo "$1" | $scripts/decodecode
+}
+
+handle_line() {
+ local words
+
+ # Tokenize
+ read -a words <<<"$1"
+
+ # Remove hex numbers. Do it ourselves until it happens in the
+ # kernel
+
+ # We need to know the index of the last element before we
+ # remove elements because arrays are sparse
+ local last=$(( ${#words[@]} - 1 ))
+
+ for i in "${!words[@]}"; do
+ # Remove the address
+ if [[ ${words[$i]} =~ \[\<([^]]+)\>\] ]]; then
+ unset words[$i]
+ fi
+
+ # Format timestamps with tabs
+ if [[ ${words[$i]} == \[ && ${words[$i+1]} == *\] ]]; then
+ unset words[$i]
+ words[$i+1]=$(printf "[%13s\n" "${words[$i+1]}")
+ fi
+ done
+
+ # The symbol is the last element, process it
+ symbol=${words[$last]}
+ unset words[$last]
+ parse_symbol # modifies $symbol
+
+ # Add up the line number to the symbol
+ echo "${words[@]}" "$symbol"
+}
+
+while read line; do
+ # Let's see if we have an address in the line
+ if [[ $line =~ \[\<([^]]+)\>\] ]]; then
+ # Translate address to line numbers
+ handle_line "$line"
+ # Is it a code line?
+ elif [[ $line == *Code:* ]]; then
+ decode_code "$line"
+ else
+ # Nothing special in this line, show it as is
+ echo "$line"
+ fi
+done