summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2013-03-22 15:35:12 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2013-03-22 15:35:12 +1100
commitbcc94e162326c90edc1e72dfbf0874869ee53caf (patch)
treec4f753ff71487cc87bd9da5453e79116fa8a132b
parent5119bb9078f599d5340a912f0fac6d88e2ce0f4b (diff)
parent889fc8ba21f58a4a39fa675bd7624c83222cf829 (diff)
Merge branch 'akpm/master'
-rw-r--r--Documentation/cgroups/memory.txt16
-rw-r--r--Documentation/filesystems/vfat.txt26
-rw-r--r--Documentation/sysctl/vm.txt33
-rw-r--r--MAINTAINERS14
-rw-r--r--arch/alpha/kernel/sys_nautilus.c5
-rw-r--r--arch/alpha/mm/init.c24
-rw-r--r--arch/alpha/mm/numa.c3
-rw-r--r--arch/arc/mm/init.c23
-rw-r--r--arch/arm/Kconfig1
-rw-r--r--arch/arm/boot/compressed/.gitignore1
-rw-r--r--arch/arm/boot/compressed/Makefile6
-rw-r--r--arch/arm/boot/compressed/decompress.c4
-rw-r--r--arch/arm/boot/compressed/piggy.lz4.S6
-rw-r--r--arch/arm/include/asm/pgtable.h9
-rw-r--r--arch/arm/kernel/early_printk.c17
-rw-r--r--arch/arm/mm/init.c50
-rw-r--r--arch/arm/mm/mmap.c2
-rw-r--r--arch/arm64/mm/init.c26
-rw-r--r--arch/arm64/mm/mmap.c2
-rw-r--r--arch/avr32/mm/init.c24
-rw-r--r--arch/blackfin/kernel/early_printk.c2
-rw-r--r--arch/blackfin/mm/init.c22
-rw-r--r--arch/c6x/mm/init.c30
-rw-r--r--arch/cris/mm/init.c16
-rw-r--r--arch/frv/mm/init.c38
-rw-r--r--arch/h8300/mm/init.c30
-rw-r--r--arch/ia64/include/asm/hugetlb.h1
-rw-r--r--arch/ia64/mm/contig.c2
-rw-r--r--arch/ia64/mm/discontig.c2
-rw-r--r--arch/ia64/mm/init.c23
-rw-r--r--arch/m32r/mm/init.c26
-rw-r--r--arch/m68k/mm/init.c24
-rw-r--r--arch/metag/mm/init.c31
-rw-r--r--arch/microblaze/include/asm/setup.h1
-rw-r--r--arch/microblaze/kernel/early_printk.c26
-rw-r--r--arch/microblaze/mm/init.c34
-rw-r--r--arch/mips/include/asm/hugetlb.h1
-rw-r--r--arch/mips/include/asm/kvm_host.h2
-rw-r--r--arch/mips/kernel/early_printk.c12
-rw-r--r--arch/mips/mm/init.c37
-rw-r--r--arch/mips/mm/mmap.c2
-rw-r--r--arch/mips/sgi-ip27/ip27-memory.c4
-rw-r--r--arch/mn10300/mm/init.c23
-rw-r--r--arch/openrisc/mm/init.c27
-rw-r--r--arch/parisc/Kconfig1
-rw-r--r--arch/parisc/Kconfig.debug14
-rw-r--r--arch/parisc/mm/init.c25
-rw-r--r--arch/powerpc/include/asm/hugetlb.h1
-rw-r--r--arch/powerpc/include/asm/page_64.h3
-rw-r--r--arch/powerpc/kernel/crash_dump.c5
-rw-r--r--arch/powerpc/kernel/fadump.c5
-rw-r--r--arch/powerpc/kernel/kvm.c7
-rw-r--r--arch/powerpc/kernel/udbg.c6
-rw-r--r--arch/powerpc/mm/hugetlbpage.c2
-rw-r--r--arch/powerpc/mm/mem.c35
-rw-r--r--arch/powerpc/mm/mmap_64.c2
-rw-r--r--arch/powerpc/mm/slice.c223
-rw-r--r--arch/powerpc/platforms/512x/mpc512x_shared.c5
-rw-r--r--arch/powerpc/platforms/cell/spufs/file.c2
-rw-r--r--arch/s390/Kconfig1
-rw-r--r--arch/s390/Kconfig.debug14
-rw-r--r--arch/s390/hypfs/inode.c1
-rw-r--r--arch/s390/include/asm/hugetlb.h56
-rw-r--r--arch/s390/include/asm/pgtable.h95
-rw-r--r--arch/s390/lib/Makefile1
-rw-r--r--arch/s390/mm/hugetlbpage.c2
-rw-r--r--arch/s390/mm/init.c35
-rw-r--r--arch/s390/mm/mmap.c4
-rw-r--r--arch/score/mm/init.c33
-rw-r--r--arch/sh/include/asm/hugetlb.h1
-rw-r--r--arch/sh/kernel/sh_bios.c2
-rw-r--r--arch/sh/mm/init.c26
-rw-r--r--arch/sparc/include/asm/hugetlb.h1
-rw-r--r--arch/sparc/kernel/leon_smp.c15
-rw-r--r--arch/sparc/kernel/sys_sparc_64.c2
-rw-r--r--arch/sparc/lib/Makefile1
-rw-r--r--arch/sparc/lib/usercopy.c9
-rw-r--r--arch/sparc/mm/init_32.c49
-rw-r--r--arch/sparc/mm/init_64.c28
-rw-r--r--arch/tile/Kconfig8
-rw-r--r--arch/tile/include/asm/hugetlb.h1
-rw-r--r--arch/tile/include/asm/uaccess.h7
-rw-r--r--arch/tile/kernel/early_printk.c27
-rw-r--r--arch/tile/lib/uaccess.c8
-rw-r--r--arch/tile/mm/mmap.c2
-rw-r--r--arch/tile/mm/pgtable.c7
-rw-r--r--arch/um/kernel/early_printk.c8
-rw-r--r--arch/um/kernel/mem.c26
-rw-r--r--arch/unicore32/kernel/early_printk.c12
-rw-r--r--arch/unicore32/mm/init.c31
-rw-r--r--arch/unicore32/mm/ioremap.c17
-rw-r--r--arch/x86/Kconfig2
-rw-r--r--arch/x86/Kconfig.debug14
-rw-r--r--arch/x86/boot/compressed/Makefile5
-rw-r--r--arch/x86/boot/compressed/misc.c4
-rw-r--r--arch/x86/ia32/ia32_aout.c3
-rw-r--r--arch/x86/include/asm/e820.h2
-rw-r--r--arch/x86/include/asm/hugetlb.h1
-rw-r--r--arch/x86/kernel/cpu/Makefile4
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl48
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.sh41
-rw-r--r--arch/x86/kernel/e820.c72
-rw-r--r--arch/x86/kernel/early_printk.c21
-rw-r--r--arch/x86/lib/usercopy_32.c6
-rw-r--r--arch/x86/mm/highmem_32.c1
-rw-r--r--arch/x86/mm/init.c5
-rw-r--r--arch/x86/mm/init_32.c10
-rw-r--r--arch/x86/mm/init_64.c7
-rw-r--r--arch/x86/mm/ioremap.c7
-rw-r--r--arch/x86/mm/mmap.c2
-rw-r--r--arch/x86/mm/pageattr-test.c5
-rw-r--r--arch/x86/platform/efi/efi.c15
-rw-r--r--arch/xtensa/mm/init.c21
-rw-r--r--block/blk-core.c34
-rw-r--r--block/blk-flush.c2
-rw-r--r--block/blk.h3
-rw-r--r--block/genhd.c2
-rw-r--r--block/partition-generic.c1
-rw-r--r--block/scsi_ioctl.c1
-rw-r--r--crypto/Kconfig16
-rw-r--r--crypto/Makefile2
-rw-r--r--crypto/async_tx/raid6test.c9
-rw-r--r--crypto/lz4.c105
-rw-r--r--crypto/lz4hc.c105
-rw-r--r--drivers/Kconfig4
-rw-r--r--drivers/block/cciss.c2
-rw-r--r--drivers/block/drbd/drbd_receiver.c5
-rw-r--r--drivers/block/loop.c19
-rw-r--r--drivers/block/mtip32xx/mtip32xx.c86
-rw-r--r--drivers/block/mtip32xx/mtip32xx.h8
-rw-r--r--drivers/block/swim3.c2
-rw-r--r--drivers/block/virtio_blk.c31
-rw-r--r--drivers/char/mem.c36
-rw-r--r--drivers/firmware/dmi_scan.c80
-rw-r--r--drivers/gpu/drm/drm_fb_helper.c8
-rw-r--r--drivers/infiniband/hw/cxgb3/cxio_resource.c4
-rw-r--r--drivers/infiniband/hw/cxgb4/id_table.c4
-rw-r--r--drivers/infiniband/hw/ipath/ipath_file_ops.c1
-rw-r--r--drivers/infiniband/hw/mlx4/mad.c2
-rw-r--r--drivers/infiniband/hw/qib/qib_file_ops.c2
-rw-r--r--drivers/infiniband/ulp/ipoib/ipoib_cm.c2
-rw-r--r--drivers/leds/leds-ot200.c14
-rw-r--r--drivers/lguest/page_tables.c2
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/memstick/host/r592.c8
-rw-r--r--drivers/message/i2o/i2o_config.c10
-rw-r--r--drivers/mmc/core/core.c4
-rw-r--r--drivers/net/ethernet/broadcom/cnic.c4
-rw-r--r--drivers/net/hamradio/baycom_epp.c2
-rw-r--r--drivers/net/hamradio/hdlcdrv.c2
-rw-r--r--drivers/net/hamradio/yam.c2
-rw-r--r--drivers/net/team/team_mode_random.c2
-rw-r--r--drivers/net/wireless/brcm80211/brcmfmac/p2p.c2
-rw-r--r--drivers/net/wireless/mwifiex/cfg80211.c4
-rw-r--r--drivers/platform/x86/thinkpad_acpi.c10
-rw-r--r--drivers/rtc/class.c70
-rw-r--r--drivers/rtc/rtc-88pm80x.c3
-rw-r--r--drivers/rtc/rtc-ab3100.c21
-rw-r--r--drivers/rtc/rtc-at32ap700x.c12
-rw-r--r--drivers/rtc/rtc-at91rm9200.c29
-rw-r--r--drivers/rtc/rtc-au1xxx.c18
-rw-r--r--drivers/rtc/rtc-bq32k.c5
-rw-r--r--drivers/rtc/rtc-coh901331.c18
-rw-r--r--drivers/rtc/rtc-da9052.c13
-rw-r--r--drivers/rtc/rtc-da9055.c5
-rw-r--r--drivers/rtc/rtc-davinci.c24
-rw-r--r--drivers/rtc/rtc-dm355evm.c7
-rw-r--r--drivers/rtc/rtc-ds1286.c3
-rw-r--r--drivers/rtc/rtc-ds1302.c22
-rw-r--r--drivers/rtc/rtc-ds1307.c70
-rw-r--r--drivers/rtc/rtc-ds1511.c8
-rw-r--r--drivers/rtc/rtc-ds1553.c6
-rw-r--r--drivers/rtc/rtc-ds1672.c8
-rw-r--r--drivers/rtc/rtc-ds1742.c8
-rw-r--r--drivers/rtc/rtc-ds3234.c7
-rw-r--r--drivers/rtc/rtc-efi.c19
-rw-r--r--drivers/rtc/rtc-em3027.c7
-rw-r--r--drivers/rtc/rtc-ep93xx.c11
-rw-r--r--drivers/rtc/rtc-fm3130.c2
-rw-r--r--drivers/rtc/rtc-generic.c21
-rw-r--r--drivers/rtc/rtc-hid-sensor-time.c7
-rw-r--r--drivers/rtc/rtc-imxdi.c23
-rw-r--r--drivers/rtc/rtc-lp8788.c5
-rw-r--r--drivers/rtc/rtc-lpc32xx.c5
-rw-r--r--drivers/rtc/rtc-ls1x.c5
-rw-r--r--drivers/rtc/rtc-m41t93.c9
-rw-r--r--drivers/rtc/rtc-m41t94.c9
-rw-r--r--drivers/rtc/rtc-m48t86.c11
-rw-r--r--drivers/rtc/rtc-max6900.c9
-rw-r--r--drivers/rtc/rtc-max6902.c7
-rw-r--r--drivers/rtc/rtc-max77686.c63
-rw-r--r--drivers/rtc/rtc-max8907.c7
-rw-r--r--drivers/rtc/rtc-max8997.c10
-rw-r--r--drivers/rtc/rtc-mc13xxx.c12
-rw-r--r--drivers/rtc/rtc-msm6242.c13
-rw-r--r--drivers/rtc/rtc-mv.c23
-rw-r--r--drivers/rtc/rtc-mxc.c15
-rw-r--r--drivers/rtc/rtc-nuc900.c19
-rw-r--r--drivers/rtc/rtc-omap.c12
-rw-r--r--drivers/rtc/rtc-palmas.c9
-rw-r--r--drivers/rtc/rtc-pcap.c19
-rw-r--r--drivers/rtc/rtc-pcf8523.c6
-rw-r--r--drivers/rtc/rtc-ps3.c16
-rw-r--r--drivers/rtc/rtc-pxa.c88
-rw-r--r--drivers/rtc/rtc-r9701.c7
-rw-r--r--drivers/rtc/rtc-rc5t583.c19
-rw-r--r--drivers/rtc/rtc-rp5c01.c13
-rw-r--r--drivers/rtc/rtc-rs5c313.c6
-rw-r--r--drivers/rtc/rtc-rv3029c2.c16
-rw-r--r--drivers/rtc/rtc-rx4581.c7
-rw-r--r--drivers/rtc/rtc-rx8581.c8
-rw-r--r--drivers/rtc/rtc-s3c.c32
-rw-r--r--drivers/rtc/rtc-sa1100.c12
-rw-r--r--drivers/rtc/rtc-sh.c22
-rw-r--r--drivers/rtc/rtc-snvs.c6
-rw-r--r--drivers/rtc/rtc-spear.c5
-rw-r--r--drivers/rtc/rtc-starfire.c23
-rw-r--r--drivers/rtc/rtc-stk17ta8.c6
-rw-r--r--drivers/rtc/rtc-sun4v.c22
-rw-r--r--drivers/rtc/rtc-tegra.c63
-rw-r--r--drivers/rtc/rtc-test.c10
-rw-r--r--drivers/rtc/rtc-tile.c9
-rw-r--r--drivers/rtc/rtc-tps6586x.c9
-rw-r--r--drivers/rtc/rtc-tps65910.c11
-rw-r--r--drivers/rtc/rtc-tps80031.c11
-rw-r--r--drivers/rtc/rtc-tx4939.c19
-rw-r--r--drivers/rtc/rtc-v3020.c37
-rw-r--r--drivers/rtc/rtc-vt8500.c8
-rw-r--r--drivers/rtc/rtc-wm831x.c6
-rw-r--r--drivers/rtc/rtc-wm8350.c19
-rw-r--r--drivers/rtc/rtc-x1205.c14
-rw-r--r--drivers/scsi/fcoe/fcoe_ctlr.c4
-rw-r--r--drivers/scsi/lpfc/lpfc_hbadisc.c6
-rw-r--r--drivers/scsi/qla2xxx/qla_attr.c11
-rw-r--r--drivers/scsi/sg.c1
-rw-r--r--drivers/staging/android/logger.c1
-rw-r--r--drivers/staging/zcache/Kconfig6
-rw-r--r--drivers/staging/zcache/ramster.h6
-rw-r--r--drivers/staging/zcache/ramster/nodemanager.c9
-rw-r--r--drivers/staging/zcache/ramster/ramster.c29
-rw-r--r--drivers/staging/zcache/ramster/ramster.h2
-rw-r--r--drivers/staging/zcache/ramster/ramster_nodemanager.h2
-rw-r--r--drivers/staging/zcache/tmem.c6
-rw-r--r--drivers/staging/zcache/tmem.h8
-rw-r--r--drivers/staging/zcache/zcache-main.c64
-rw-r--r--drivers/staging/zcache/zcache.h2
-rw-r--r--drivers/usb/gadget/amd5536udc.c4
-rw-r--r--drivers/usb/gadget/inode.c42
-rw-r--r--drivers/uwb/rsv.c4
-rw-r--r--drivers/video/Kconfig9
-rw-r--r--drivers/video/Makefile1
-rw-r--r--drivers/video/backlight/Kconfig9
-rw-r--r--drivers/video/backlight/Makefile1
-rw-r--r--drivers/video/backlight/adp5520_bl.c28
-rw-r--r--drivers/video/backlight/adp8860_bl.c21
-rw-r--r--drivers/video/backlight/adp8870_bl.c31
-rw-r--r--drivers/video/backlight/ams369fg06.c21
-rw-r--r--drivers/video/backlight/atmel-pwm-bl.c14
-rw-r--r--drivers/video/backlight/corgi_lcd.c18
-rw-r--r--drivers/video/backlight/da903x_bl.c30
-rw-r--r--drivers/video/backlight/ep93xx_bl.c20
-rw-r--r--drivers/video/backlight/generic_bl.c6
-rw-r--r--drivers/video/backlight/hp680_bl.c18
-rw-r--r--drivers/video/backlight/ili922x.c555
-rw-r--r--drivers/video/backlight/jornada720_bl.c18
-rw-r--r--drivers/video/backlight/jornada720_lcd.c21
-rw-r--r--drivers/video/backlight/kb3886_bl.c18
-rw-r--r--drivers/video/backlight/l4f00242t03.c27
-rw-r--r--drivers/video/backlight/ld9040.c20
-rw-r--r--drivers/video/backlight/lm3533_bl.c22
-rw-r--r--drivers/video/backlight/lms501kf03.c22
-rw-r--r--drivers/video/backlight/locomolcd.c16
-rw-r--r--drivers/video/backlight/lp855x_bl.c7
-rw-r--r--drivers/video/backlight/ltv350qv.c18
-rw-r--r--drivers/video/backlight/omap1_bl.c4
-rw-r--r--drivers/video/backlight/platform_lcd.c6
-rw-r--r--drivers/video/backlight/s6e63m0.c20
-rw-r--r--drivers/video/backlight/tdo24m.c18
-rw-r--r--drivers/video/console/fbcon_cw.c3
-rw-r--r--drivers/video/cyber2000fb.c3
-rw-r--r--drivers/video/exynos/exynos_mipi_dsi.c8
-rw-r--r--drivers/video/hyperv_fb.c829
-rw-r--r--drivers/video/matrox/matroxfb_maven.c16
-rw-r--r--drivers/video/uvesafb.c2
-rw-r--r--drivers/xen/Kconfig4
-rw-r--r--drivers/xen/tmem.c55
-rw-r--r--drivers/xen/xen-selfballoon.c13
-rw-r--r--fs/9p/vfs_addr.c1
-rw-r--r--fs/Makefile3
-rw-r--r--fs/afs/write.c1
-rw-r--r--fs/aio.c1790
-rw-r--r--fs/binfmt_aout.c3
-rw-r--r--fs/binfmt_elf.c26
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/bio.c52
-rw-r--r--fs/block_dev.c9
-rw-r--r--fs/btrfs/file.c4
-rw-r--r--fs/btrfs/inode.c1
-rw-r--r--fs/buffer.c9
-rw-r--r--fs/ceph/file.c1
-rw-r--r--fs/cifs/file.c3
-rw-r--r--fs/compat.c1
-rw-r--r--fs/coredump.c85
-rw-r--r--fs/direct-io.c24
-rw-r--r--fs/drop_caches.c2
-rw-r--r--fs/ecryptfs/file.c1
-rw-r--r--fs/eventpoll.c168
-rw-r--r--fs/exec.c14
-rw-r--r--fs/ext2/inode.c1
-rw-r--r--fs/ext3/inode.c1
-rw-r--r--fs/ext3/super.c1
-rw-r--r--fs/ext4/file.c1
-rw-r--r--fs/ext4/indirect.c1
-rw-r--r--fs/ext4/inode.c1
-rw-r--r--fs/ext4/page-io.c1
-rw-r--r--fs/f2fs/data.c1
-rw-r--r--fs/fat/dir.c23
-rw-r--r--fs/fat/fat.h40
-rw-r--r--fs/fat/file.c5
-rw-r--r--fs/fat/inode.c75
-rw-r--r--fs/fat/nfs.c221
-rw-r--r--fs/fuse/cuse.c1
-rw-r--r--fs/fuse/dev.c1
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/gfs2/aops.c1
-rw-r--r--fs/gfs2/file.c1
-rw-r--r--fs/hfs/inode.c1
-rw-r--r--fs/hfsplus/bfind.c8
-rw-r--r--fs/hfsplus/inode.c1
-rw-r--r--fs/jbd/commit.c4
-rw-r--r--fs/jfs/inode.c1
-rw-r--r--fs/nilfs2/inode.c2
-rw-r--r--fs/ntfs/file.c4
-rw-r--r--fs/ntfs/inode.c1
-rw-r--r--fs/ocfs2/aops.h2
-rw-r--r--fs/ocfs2/dlmglue.c2
-rw-r--r--fs/ocfs2/file.c7
-rw-r--r--fs/ocfs2/inode.h2
-rw-r--r--fs/ocfs2/ioctl.c18
-rw-r--r--fs/pipe.c1
-rw-r--r--fs/proc/Makefile2
-rw-r--r--fs/proc/generic.c62
-rw-r--r--fs/proc/inode.c176
-rw-r--r--fs/proc/internal.h18
-rw-r--r--fs/proc/meminfo.c1
-rw-r--r--fs/proc/mmu.c60
-rw-r--r--fs/read_write.c35
-rw-r--r--fs/reiserfs/inode.c1
-rw-r--r--fs/splice.c3
-rw-r--r--fs/ubifs/file.c1
-rw-r--r--fs/udf/inode.c1
-rw-r--r--fs/xfs/xfs_aops.c1
-rw-r--r--fs/xfs/xfs_file.c4
-rw-r--r--include/asm-generic/hugetlb.h40
-rw-r--r--include/asm-generic/pgtable.h10
-rw-r--r--include/linux/aio.h199
-rw-r--r--include/linux/balloon_compaction.h7
-rw-r--r--include/linux/batch_complete.h23
-rw-r--r--include/linux/bio.h36
-rw-r--r--include/linux/blk_types.h12
-rw-r--r--include/linux/blkdev.h12
-rw-r--r--include/linux/buffer_head.h1
-rw-r--r--include/linux/cgroup.h8
-rw-r--r--include/linux/cleancache.h4
-rw-r--r--include/linux/console.h1
-rw-r--r--include/linux/debug_locks.h2
-rw-r--r--include/linux/decompress/unlz4.h10
-rw-r--r--include/linux/errno.h1
-rw-r--r--include/linux/exportfs.h11
-rw-r--r--include/linux/frontswap.h36
-rw-r--r--include/linux/fs.h12
-rw-r--r--include/linux/hardirq.h5
-rw-r--r--include/linux/hyperv.h12
-rw-r--r--include/linux/irq_work.h2
-rw-r--r--include/linux/kernel.h1
-rw-r--r--include/linux/kmod.h17
-rw-r--r--include/linux/lockdep.h92
-rw-r--r--include/linux/lz4.h87
-rw-r--r--include/linux/mm.h62
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/mmzone.h22
-rw-r--r--include/linux/net.h4
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/linux/percpu-refcount.h114
-rw-r--r--include/linux/pid_namespace.h5
-rw-r--r--include/linux/posix-timers.h16
-rw-r--r--include/linux/printk.h13
-rw-r--r--include/linux/proc_fs.h6
-rw-r--r--include/linux/ramfs.h8
-rw-r--r--include/linux/random.h7
-rw-r--r--include/linux/relay.h3
-rw-r--r--include/linux/rtc-pxa.h18
-rw-r--r--include/linux/rtc.h6
-rw-r--r--include/linux/sched.h5
-rw-r--r--include/linux/vm_event_item.h7
-rw-r--r--include/linux/vmalloc.h34
-rw-r--r--include/linux/wait.h86
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/net/ip_vs.h8
-rw-r--r--include/trace/events/filemap.h58
-rw-r--r--include/trace/events/printk.h25
-rw-r--r--include/uapi/linux/eventpoll.h1
-rw-r--r--include/uapi/linux/fs.h1
-rw-r--r--include/uapi/linux/ptrace.h12
-rw-r--r--include/xen/tmem.h8
-rw-r--r--init/Kconfig34
-rw-r--r--init/do_mounts_initrd.c8
-rw-r--r--init/main.c13
-rw-r--r--ipc/mqueue.c3
-rw-r--r--ipc/msg.c82
-rw-r--r--ipc/msgutil.c110
-rw-r--r--ipc/sem.c250
-rw-r--r--kernel/acct.c3
-rw-r--r--kernel/cgroup.c49
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/kexec.c43
-rw-r--r--kernel/kmod.c98
-rw-r--r--kernel/kthread.c44
-rw-r--r--kernel/lglock.c12
-rw-r--r--kernel/pid.c11
-rw-r--r--kernel/pid_namespace.c2
-rw-r--r--kernel/posix-cpu-timers.c390
-rw-r--r--kernel/printk.c116
-rw-r--r--kernel/ptrace.c81
-rw-r--r--kernel/range.c3
-rw-r--r--kernel/rcutree.c2
-rw-r--r--kernel/relay.c15
-rw-r--r--kernel/sched/sched.h1
-rw-r--r--kernel/semaphore.c8
-rw-r--r--kernel/signal.c6
-rw-r--r--kernel/smp.c102
-rw-r--r--kernel/sys.c67
-rw-r--r--kernel/sysctl.c1
-rw-r--r--kernel/test_kprobes.c2
-rw-r--r--kernel/time/timer_list.c112
-rw-r--r--kernel/watchdog.c15
-rw-r--r--lib/Kconfig13
-rw-r--r--lib/Kconfig.debug18
-rw-r--r--lib/Makefile7
-rw-r--r--lib/argv_split.c87
-rw-r--r--lib/bust_spinlocks.c3
-rw-r--r--lib/decompress.c5
-rw-r--r--lib/decompress_unlz4.c187
-rw-r--r--lib/dma-debug.c45
-rw-r--r--lib/fault-inject.c2
-rw-r--r--lib/int_sqrt.c32
-rw-r--r--lib/list_sort.c2
-rw-r--r--lib/lz4/Makefile3
-rw-r--r--lib/lz4/lz4_compress.c443
-rw-r--r--lib/lz4/lz4_decompress.c326
-rw-r--r--lib/lz4/lz4defs.h156
-rw-r--r--lib/lz4/lz4hc_compress.c539
-rw-r--r--lib/percpu-refcount.c243
-rw-r--r--lib/show_mem.c3
-rw-r--r--lib/usercopy.c (renamed from arch/s390/lib/usercopy.c)3
-rw-r--r--lib/uuid.c8
-rw-r--r--mm/balloon_compaction.c2
-rw-r--r--mm/bounce.c21
-rw-r--r--mm/cleancache.c265
-rw-r--r--mm/dmapool.c5
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/frontswap.c156
-rw-r--r--mm/hugetlb.c31
-rw-r--r--mm/memblock.c3
-rw-r--r--mm/memcontrol.c346
-rw-r--r--mm/memory-failure.c4
-rw-r--r--mm/memory_hotplug.c2
-rw-r--r--mm/migrate.c1
-rw-r--r--mm/mmap.c35
-rw-r--r--mm/mmu_context.c3
-rw-r--r--mm/nommu.c7
-rw-r--r--mm/page-writeback.c4
-rw-r--r--mm/page_alloc.c41
-rw-r--r--mm/page_io.c1
-rw-r--r--mm/rmap.c3
-rw-r--r--mm/shmem.c6
-rw-r--r--mm/swap.c1
-rw-r--r--mm/swapfile.c19
-rw-r--r--mm/util.c1
-rw-r--r--mm/vmalloc.c218
-rw-r--r--mm/vmscan.c4
-rw-r--r--mm/vmstat.c9
-rw-r--r--net/core/pktgen.c42
-rw-r--r--net/netfilter/ipvs/ip_vs_ctl.c4
-rw-r--r--net/netfilter/nf_conntrack_core.c4
-rw-r--r--net/sched/sch_choke.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c4
-rw-r--r--scripts/Makefile.headersinst4
-rw-r--r--scripts/Makefile.lib5
-rwxr-xr-xscripts/checkpatch.pl26
-rwxr-xr-xscripts/decodecode8
-rwxr-xr-xscripts/get_maintainer.pl2
-rw-r--r--scripts/headers_install.pl63
-rw-r--r--scripts/headers_install.sh43
-rw-r--r--security/keys/internal.h2
-rw-r--r--security/keys/keyctl.c1
-rw-r--r--security/keys/request_key.c13
-rw-r--r--sound/core/info.c8
-rw-r--r--sound/core/pcm_native.c2
-rw-r--r--tools/testing/selftests/Makefile3
-rw-r--r--tools/testing/selftests/epoll/Makefile11
-rw-r--r--tools/testing/selftests/epoll/test_epoll.c364
-rw-r--r--tools/testing/selftests/ptrace/Makefile10
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c214
-rw-r--r--tools/testing/selftests/timers/Makefile8
-rw-r--r--tools/testing/selftests/timers/posix_timers.c221
-rw-r--r--usr/Kconfig9
507 files changed, 10524 insertions, 5645 deletions
diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 8b8c28b9864c..addb1f110e9a 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -70,6 +70,7 @@ Brief summary of control files.
memory.move_charge_at_immigrate # set/show controls of moving charges
memory.oom_control # set/show oom controls.
memory.numa_stat # show the number of memory usage per numa node
+ memory.dangling_memcgs # show debugging information about dangling groups
memory.kmem.limit_in_bytes # set/show hard limit for kernel memory
memory.kmem.usage_in_bytes # show current kernel memory allocation
@@ -577,6 +578,21 @@ unevictable=<total anon pages> N0=<node 0 pages> N1=<node 1 pages> ...
And we have total = file + anon + unevictable.
+5.7 dangling_memcgs
+
+This file will only be ever present in the root cgroup, if the option
+CONFIG_MEMCG_DEBUG_ASYNC_DESTROY is set. When a memcg is destroyed, the memory
+consumed by it may not be immediately freed. This is because when some
+extensions are used, such as swap or kernel memory, objects can outlive the
+group and hold a reference to it.
+
+If this is the case, the dangling_memcgs file will show information about what
+are the memcgs still alive, and which references are still preventing it to be
+freed. There is nothing wrong with that, but it is very useful when debugging,
+to know where this memory is being held. This is a developer-oriented debugging
+facility only, and no guarantees of interface stability will be given. The file
+is read-only, and has the sole purpose of displaying information.
+
6. Hierarchy support
The memory controller supports a deep hierarchy and hierarchical accounting.
diff --git a/Documentation/filesystems/vfat.txt b/Documentation/filesystems/vfat.txt
index d230dd9c99b0..4a93e98b290a 100644
--- a/Documentation/filesystems/vfat.txt
+++ b/Documentation/filesystems/vfat.txt
@@ -150,12 +150,28 @@ discard -- If set, issues discard/TRIM commands to the block
device when blocks are freed. This is useful for SSD devices
and sparse/thinly-provisoned LUNs.
-nfs -- This option maintains an index (cache) of directory
- inodes by i_logstart which is used by the nfs-related code to
- improve look-ups.
+nfs=stale_rw|nostale_ro
+ Enable this only if you want to export the FAT filesystem
+ over NFS.
+
+ stale_rw: This option maintains an index (cache) of directory
+ inodes by i_logstart which is used by the nfs-related code to
+ improve look-ups. Full file operations (read/write) over NFS is
+ supported but with cache eviction at NFS server, this could
+ result in ESTALE issues.
+
+ nostale_ro: This option bases the inode number and filehandle
+ on the on-disk location of a file in the MS-DOS directory entry.
+ This ensures that ESTALE will not be returned after a file is
+ evicted from the inode cache. However, it means that operations
+ such as rename, create and unlink could cause filehandles that
+ previously pointed at one file to point at a different file,
+ potentially causing data corruption. For this reason, this
+ option also mounts the filesystem readonly.
+
+ To maintain backward compatibility, '-o nfs' is also accepted,
+ defaulting to stale_rw
- Enable this only if you want to export the FAT filesystem
- over NFS
<bool>: 0,1,yes,no,true,false
diff --git a/Documentation/sysctl/vm.txt b/Documentation/sysctl/vm.txt
index 078701fdbd4d..21ad1815be8c 100644
--- a/Documentation/sysctl/vm.txt
+++ b/Documentation/sysctl/vm.txt
@@ -138,18 +138,39 @@ Setting this to zero disables periodic writeback altogether.
drop_caches
-Writing to this will cause the kernel to drop clean caches, dentries and
-inodes from memory, causing that memory to become free.
+Writing to this will cause the kernel to drop clean caches, as well as
+reclaimable slab objects like dentries and inodes. Once dropped, their
+memory becomes free.
To free pagecache:
echo 1 > /proc/sys/vm/drop_caches
-To free dentries and inodes:
+To free reclaimable slab objects (includes dentries and inodes):
echo 2 > /proc/sys/vm/drop_caches
-To free pagecache, dentries and inodes:
+To free slab objects and pagecache:
echo 3 > /proc/sys/vm/drop_caches
-As this is a non-destructive operation and dirty objects are not freeable, the
-user should run `sync' first.
+This is a non-destructive operation and will not free any dirty objects.
+To increase the number of objects freed by this operation, the user may run
+`sync' prior to writing to /proc/sys/vm/drop_caches. This will minimize the
+number of dirty objects on the system and create more candidates to be
+dropped.
+
+This file is not a means to control the growth of the various kernel caches
+(inodes, dentries, pagecache, etc...) These objects are automatically
+reclaimed by the kernel when memory is needed elsewhere on the system.
+
+Use of this file can cause performance problems. Since it discards cached
+objects, it may cost a significant amount of I/O and CPU to recreate the
+dropped objects, especially if they were under heavy use. Because of this,
+use outside of a testing or debugging environment is not recommended.
+
+You may see informational messages in your kernel log when this file is
+used:
+
+ cat (1234): dropped kernel caches: 3
+
+These are informational only. They do not mean that anything is wrong
+with your system.
==============================================================
diff --git a/MAINTAINERS b/MAINTAINERS
index ab5588832c8a..334e9afd88c4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -90,6 +90,9 @@ Descriptions of section entries:
F: drivers/net/* all files in drivers/net, but not below
F: */net/* all files in "any top level directory"/net
One pattern per line. Multiple F: lines acceptable.
+ N: Files and directories with regex patterns.
+ N: [^a-z]tegra all files whose path contains the word tegra
+ One pattern per line. Multiple N: lines acceptable.
X: Files and directories that are NOT maintained, same rules as F:
Files exclusions are tested before file matches.
Can be useful for excluding a specific subdirectory, for instance:
@@ -97,13 +100,12 @@ Descriptions of section entries:
X: net/ipv6/
matches all files in and below net excluding net/ipv6/
K: Keyword perl extended regex pattern to match content in a
- patch or file, or an affected filename. For instance:
+ patch or file. For instance:
K: of_get_profile
- matches patch or file content, or filenames, that contain
- "of_get_profile"
+ matches patches or files that contain "of_get_profile"
K: \b(printk|pr_(info|err))\b
- matches patch or file content, or filenames, that contain one or
- more of the words printk, pr_info or pr_err
+ matches patches or files that contain one or more of the words
+ printk, pr_info or pr_err
One regex pattern per line. Multiple K: lines acceptable.
Note: For the hard of thinking, this list is meant to remain in alphabetical
@@ -7883,7 +7885,7 @@ L: linux-tegra@vger.kernel.org
Q: http://patchwork.ozlabs.org/project/linux-tegra/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra.git
S: Supported
-K: (?i)[^a-z]tegra
+N: [^a-z]tegra
TEHUTI ETHERNET DRIVER
M: Andy Gospodarek <andy@greyhouse.net>
diff --git a/arch/alpha/kernel/sys_nautilus.c b/arch/alpha/kernel/sys_nautilus.c
index 4d4c046f708d..a8b9d66502cc 100644
--- a/arch/alpha/kernel/sys_nautilus.c
+++ b/arch/alpha/kernel/sys_nautilus.c
@@ -185,7 +185,6 @@ nautilus_machine_check(unsigned long vector, unsigned long la_ptr)
mb();
}
-extern void free_reserved_mem(void *, void *);
extern void pcibios_claim_one_bus(struct pci_bus *);
static struct resource irongate_mem = {
@@ -234,8 +233,8 @@ nautilus_init_pci(void)
if (pci_mem < memtop)
memtop = pci_mem;
if (memtop > alpha_mv.min_mem_address) {
- free_reserved_mem(__va(alpha_mv.min_mem_address),
- __va(memtop));
+ free_reserved_area((unsigned long)__va(alpha_mv.min_mem_address),
+ (unsigned long)__va(memtop), 0, NULL);
printk("nautilus_init_pci: %ldk freed\n",
(memtop - alpha_mv.min_mem_address) >> 10);
}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
index 1ad6ca74bed2..0ba85ee4a466 100644
--- a/arch/alpha/mm/init.c
+++ b/arch/alpha/mm/init.c
@@ -31,6 +31,7 @@
#include <asm/console.h>
#include <asm/tlb.h>
#include <asm/setup.h>
+#include <asm/sections.h>
extern void die_if_kernel(char *,struct pt_regs *,long);
@@ -281,8 +282,6 @@ printk_memory_info(void)
{
unsigned long codesize, reservedpages, datasize, initsize, tmp;
extern int page_is_ram(unsigned long) __init;
- extern char _text, _etext, _data, _edata;
- extern char __init_begin, __init_end;
/* printk all informations */
reservedpages = 0;
@@ -318,32 +317,15 @@ mem_init(void)
#endif /* CONFIG_DISCONTIGMEM */
void
-free_reserved_mem(void *start, void *end)
-{
- void *__start = start;
- for (; __start < end; __start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(__start));
- init_page_count(virt_to_page(__start));
- free_page((long)__start);
- totalram_pages++;
- }
-}
-
-void
free_initmem(void)
{
- extern char __init_begin, __init_end;
-
- free_reserved_mem(&__init_begin, &__init_end);
- printk ("Freeing unused kernel memory: %ldk freed\n",
- (&__init_end - &__init_begin) >> 10);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
void
free_initrd_mem(unsigned long start, unsigned long end)
{
- free_reserved_mem((void *)start, (void *)end);
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
index 3973ae395772..33885048fa36 100644
--- a/arch/alpha/mm/numa.c
+++ b/arch/alpha/mm/numa.c
@@ -17,6 +17,7 @@
#include <asm/hwrpb.h>
#include <asm/pgalloc.h>
+#include <asm/sections.h>
pg_data_t node_data[MAX_NUMNODES];
EXPORT_SYMBOL(node_data);
@@ -325,8 +326,6 @@ void __init mem_init(void)
{
unsigned long codesize, reservedpages, datasize, initsize, pfn;
extern int page_is_ram(unsigned long) __init;
- extern char _text, _etext, _data, _edata;
- extern char __init_begin, __init_end;
unsigned long nid, i;
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
diff --git a/arch/arc/mm/init.c b/arch/arc/mm/init.c
index 6634cf50e3b4..4a177365b2c4 100644
--- a/arch/arc/mm/init.c
+++ b/arch/arc/mm/init.c
@@ -141,37 +141,18 @@ void __init mem_init(void)
PAGES_TO_KB(reserved_pages));
}
-static void __init free_init_pages(const char *what, unsigned long begin,
- unsigned long end)
-{
- unsigned long addr;
-
- pr_info("Freeing %s: %ldk [%lx] to [%lx]\n",
- what, TO_KB(end - begin), begin, end);
-
- /* need to check that the page we free is not a partial page */
- for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
-}
-
/*
* free_initmem: Free all the __init memory.
*/
void __init_refok free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)__init_begin,
- (unsigned long)__init_end);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 87b67cdbff75..5e3436432e01 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -38,6 +38,7 @@ config ARM
select HAVE_HW_BREAKPOINT if (PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7))
select HAVE_IDE if PCI || ISA || PCMCIA
select HAVE_KERNEL_GZIP
+ select HAVE_KERNEL_LZ4
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_LZO
select HAVE_KERNEL_XZ
diff --git a/arch/arm/boot/compressed/.gitignore b/arch/arm/boot/compressed/.gitignore
index f79a08efe000..47279aa96a6a 100644
--- a/arch/arm/boot/compressed/.gitignore
+++ b/arch/arm/boot/compressed/.gitignore
@@ -6,6 +6,7 @@ piggy.gzip
piggy.lzo
piggy.lzma
piggy.xzkern
+piggy.lz4
vmlinux
vmlinux.lds
diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile
index 3580d57ea218..6f124d1ee391 100644
--- a/arch/arm/boot/compressed/Makefile
+++ b/arch/arm/boot/compressed/Makefile
@@ -24,6 +24,9 @@ endif
AFLAGS_head.o += -DTEXT_OFFSET=$(TEXT_OFFSET)
HEAD = head.o
OBJS += misc.o decompress.o
+ifeq ($(CONFIG_KERNEL_LZ4),y)
+CFLAGS_decompress.o := -Os
+endif
ifeq ($(CONFIG_DEBUG_UNCOMPRESS),y)
OBJS += debug.o
endif
@@ -91,6 +94,7 @@ suffix_$(CONFIG_KERNEL_GZIP) = gzip
suffix_$(CONFIG_KERNEL_LZO) = lzo
suffix_$(CONFIG_KERNEL_LZMA) = lzma
suffix_$(CONFIG_KERNEL_XZ) = xzkern
+suffix_$(CONFIG_KERNEL_LZ4) = lz4
# Borrowed libfdt files for the ATAG compatibility mode
@@ -115,7 +119,7 @@ targets := vmlinux vmlinux.lds \
font.o font.c head.o misc.o $(OBJS)
# Make sure files are removed during clean
-extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern \
+extra-y += piggy.gzip piggy.lzo piggy.lzma piggy.xzkern piggy.lz4 \
lib1funcs.S ashldi3.S $(libfdt) $(libfdt_hdrs)
ifeq ($(CONFIG_FUNCTION_TRACER),y)
diff --git a/arch/arm/boot/compressed/decompress.c b/arch/arm/boot/compressed/decompress.c
index 24b0475cb8bf..bd245d34952d 100644
--- a/arch/arm/boot/compressed/decompress.c
+++ b/arch/arm/boot/compressed/decompress.c
@@ -51,6 +51,10 @@ extern char * strstr(const char * s1, const char *s2);
#include "../../../../lib/decompress_unxz.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
int do_decompress(u8 *input, int len, u8 *output, void (*error)(char *x))
{
return decompress(input, len, NULL, NULL, output, NULL, error);
diff --git a/arch/arm/boot/compressed/piggy.lz4.S b/arch/arm/boot/compressed/piggy.lz4.S
new file mode 100644
index 000000000000..3d9a575618a3
--- /dev/null
+++ b/arch/arm/boot/compressed/piggy.lz4.S
@@ -0,0 +1,6 @@
+ .section .piggydata,#alloc
+ .globl input_data
+input_data:
+ .incbin "arch/arm/boot/compressed/piggy.lz4"
+ .globl input_data_end
+input_data_end:
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 80d6fc4dbe4a..9bcd262a9008 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -61,6 +61,15 @@ extern void __pgd_error(const char *file, int line, pgd_t);
#define FIRST_USER_ADDRESS PAGE_SIZE
/*
+ * Use TASK_SIZE as the ceiling argument for free_pgtables() and
+ * free_pgd_range() to avoid freeing the modules pmd when LPAE is enabled (pmd
+ * page shared between user and kernel).
+ */
+#ifdef CONFIG_ARM_LPAE
+#define USER_PGTABLES_CEILING TASK_SIZE
+#endif
+
+/*
* The pgprot_* and protection_map entries will be fixed up in runtime
* to include the cachable and bufferable bits based on memory policy,
* as well as any architecture dependent bits like global/ASID and SMP
diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c
index 85aa2b292692..43076536965c 100644
--- a/arch/arm/kernel/early_printk.c
+++ b/arch/arm/kernel/early_printk.c
@@ -29,28 +29,17 @@ static void early_console_write(struct console *con, const char *s, unsigned n)
early_write(s, n);
}
-static struct console early_console = {
+static struct console early_console_dev = {
.name = "earlycon",
.write = early_console_write,
.flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1,
};
-asmlinkage void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_write(buf, n);
- va_end(ap);
-}
-
static int __init setup_early_printk(char *buf)
{
- register_console(&early_console);
+ early_console = &early_console_dev;
+ register_console(&early_console_dev);
return 0;
}
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index ad722f1208a5..9a5cdc01fcdf 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -99,6 +99,9 @@ void show_mem(unsigned int filter)
printk("Mem-info:\n");
show_free_areas(filter);
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
+
for_each_bank (i, mi) {
struct membank *bank = &mi->bank[i];
unsigned int pfn1, pfn2;
@@ -424,24 +427,6 @@ void __init bootmem_init(void)
max_pfn = max_high - PHYS_PFN_OFFSET;
}
-static inline int free_area(unsigned long pfn, unsigned long end, char *s)
-{
- unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10);
-
- for (; pfn < end; pfn++) {
- struct page *page = pfn_to_page(pfn);
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- pages++;
- }
-
- if (size && s)
- printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
-
- return pages;
-}
-
/*
* Poison init memory with an undefined instruction (ARM) or a branch to an
* undefined instruction (Thumb).
@@ -534,6 +519,14 @@ static void __init free_unused_memmap(struct meminfo *mi)
#endif
}
+#ifdef CONFIG_HIGHMEM
+static inline void free_area_high(unsigned long pfn, unsigned long end)
+{
+ for (; pfn < end; pfn++)
+ free_highmem_page(pfn_to_page(pfn));
+}
+#endif
+
static void __init free_highpages(void)
{
#ifdef CONFIG_HIGHMEM
@@ -569,8 +562,7 @@ static void __init free_highpages(void)
if (res_end > end)
res_end = end;
if (res_start != start)
- totalhigh_pages += free_area(start, res_start,
- NULL);
+ free_area_high(start, res_start);
start = res_end;
if (start == end)
break;
@@ -578,9 +570,8 @@ static void __init free_highpages(void)
/* And now free anything which remains */
if (start < end)
- totalhigh_pages += free_area(start, end, NULL);
+ free_area_high(start, end);
}
- totalram_pages += totalhigh_pages;
#endif
}
@@ -609,8 +600,7 @@ void __init mem_init(void)
#ifdef CONFIG_SA1111
/* now that our DMA memory is actually so designated, we can free it */
- totalram_pages += free_area(PHYS_PFN_OFFSET,
- __phys_to_pfn(__pa(swapper_pg_dir)), NULL);
+ free_reserved_area(__va(PHYS_PFN_OFFSET), swapper_pg_dir, 0, NULL);
#endif
free_highpages();
@@ -738,16 +728,12 @@ void free_initmem(void)
extern char __tcm_start, __tcm_end;
poison_init_mem(&__tcm_start, &__tcm_end - &__tcm_start);
- totalram_pages += free_area(__phys_to_pfn(__pa(&__tcm_start)),
- __phys_to_pfn(__pa(&__tcm_end)),
- "TCM link");
+ free_reserved_area(&__tcm_start, &__tcm_end, 0, "TCM link");
#endif
poison_init_mem(__init_begin, __init_end - __init_begin);
if (!machine_is_integrator() && !machine_is_cintegrator())
- totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
- __phys_to_pfn(__pa(__init_end)),
- "init");
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -758,9 +744,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd) {
poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
- totalram_pages += free_area(__phys_to_pfn(__pa(start)),
- __phys_to_pfn(__pa(end)),
- "initrd");
+ free_reserved_area(start, end, 0, "initrd");
}
}
diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c
index 10062ceadd1c..0c6356255fe3 100644
--- a/arch/arm/mm/mmap.c
+++ b/arch/arm/mm/mmap.c
@@ -181,11 +181,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 800aac306a08..f497ca77925a 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -197,24 +197,6 @@ void __init bootmem_init(void)
max_pfn = max_low_pfn = max;
}
-static inline int free_area(unsigned long pfn, unsigned long end, char *s)
-{
- unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10);
-
- for (; pfn < end; pfn++) {
- struct page *page = pfn_to_page(pfn);
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- pages++;
- }
-
- if (size && s)
- pr_info("Freeing %s memory: %dK\n", s, size);
-
- return pages;
-}
-
/*
* Poison init memory with an undefined instruction (0x0).
*/
@@ -405,9 +387,7 @@ void __init mem_init(void)
void free_initmem(void)
{
poison_init_mem(__init_begin, __init_end - __init_begin);
- totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
- __phys_to_pfn(__pa(__init_end)),
- "init");
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -418,9 +398,7 @@ void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd) {
poison_init_mem((void *)start, PAGE_ALIGN(end) - start);
- totalram_pages += free_area(__phys_to_pfn(__pa(start)),
- __phys_to_pfn(__pa(end)),
- "initrd");
+ free_reserved_area(start, end, 0, "initrd");
}
}
diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c
index 7c7be7855638..8ed6cb1a900f 100644
--- a/arch/arm64/mm/mmap.c
+++ b/arch/arm64/mm/mmap.c
@@ -90,11 +90,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
diff --git a/arch/avr32/mm/init.c b/arch/avr32/mm/init.c
index 2798c2d4a1cf..e66e8406f992 100644
--- a/arch/avr32/mm/init.c
+++ b/arch/avr32/mm/init.c
@@ -146,34 +146,14 @@ void __init mem_init(void)
initsize >> 10);
}
-static inline void free_area(unsigned long addr, unsigned long end, char *s)
-{
- unsigned int size = (end - addr) >> 10;
-
- for (; addr < end; addr += PAGE_SIZE) {
- struct page *page = virt_to_page(addr);
- ClearPageReserved(page);
- init_page_count(page);
- free_page(addr);
- totalram_pages++;
- }
-
- if (size && s)
- printk(KERN_INFO "Freeing %s memory: %dK (%lx - %lx)\n",
- s, size, end - (size << 10), end);
-}
-
void free_initmem(void)
{
- free_area((unsigned long)__init_begin, (unsigned long)__init_end,
- "init");
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
-
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_area(start, end, "initrd");
+ free_reserved_area(start, end, 0, "initrd");
}
-
#endif
diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c
index 84ed8375113c..61fbd2de993d 100644
--- a/arch/blackfin/kernel/early_printk.c
+++ b/arch/blackfin/kernel/early_printk.c
@@ -25,8 +25,6 @@ extern struct console *bfin_earlyserial_init(unsigned int port,
extern struct console *bfin_jc_early_init(void);
#endif
-static struct console *early_console;
-
/* Default console */
#define DEFAULT_PORT 0
#define DEFAULT_CFLAG CS8|B57600
diff --git a/arch/blackfin/mm/init.c b/arch/blackfin/mm/init.c
index 9cb85537bd2b..82d01a71207f 100644
--- a/arch/blackfin/mm/init.c
+++ b/arch/blackfin/mm/init.c
@@ -103,7 +103,7 @@ void __init mem_init(void)
max_mapnr = num_physpages = MAP_NR(high_memory);
printk(KERN_DEBUG "Kernel managed physical pages: %lu\n", num_physpages);
- /* This will put all memory onto the freelists. */
+ /* This will put all low memory onto the freelists. */
totalram_pages = free_all_bootmem();
reservedpages = 0;
@@ -129,24 +129,11 @@ void __init mem_init(void)
initk, codek, datak, DMA_UNCACHED_REGION >> 10, (reservedpages << (PAGE_SHIFT-10)));
}
-static void __init free_init_pages(const char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
- /* next to check that the page we free is not a partial page */
- for (addr = begin; addr + PAGE_SIZE <= end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
-}
-
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
#ifndef CONFIG_MPU
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, 0, "initrd");
#endif
}
#endif
@@ -154,10 +141,7 @@ void __init free_initrd_mem(unsigned long start, unsigned long end)
void __init_refok free_initmem(void)
{
#if defined CONFIG_RAMKERNEL && !defined CONFIG_MPU
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
-
+ free_initmem_default(0);
if (memory_start == (unsigned long)(&__init_end))
memory_start = (unsigned long)(&__init_begin);
#endif
diff --git a/arch/c6x/mm/init.c b/arch/c6x/mm/init.c
index 89395f09648a..a9fcd89b251b 100644
--- a/arch/c6x/mm/init.c
+++ b/arch/c6x/mm/init.c
@@ -77,37 +77,11 @@ void __init mem_init(void)
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- int pages = 0;
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- pages++;
- }
- printk(KERN_INFO "Freeing initrd memory: %luk freed\n",
- (pages * PAGE_SIZE) >> 10);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
void __init free_initmem(void)
{
- unsigned long addr;
-
- /*
- * The following code should be cool even if these sections
- * are not page aligned.
- */
- addr = PAGE_ALIGN((unsigned long)(__init_begin));
-
- /* next to check that the page we free is not a partial page */
- for (; addr + PAGE_SIZE < (unsigned long)(__init_end);
- addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing unused kernel memory: %dK freed\n",
- (int) ((addr - PAGE_ALIGN((long) &__init_begin)) >> 10));
+ free_initmem_default(0);
}
diff --git a/arch/cris/mm/init.c b/arch/cris/mm/init.c
index d72ab58fd83e..9ac80946dada 100644
--- a/arch/cris/mm/init.c
+++ b/arch/cris/mm/init.c
@@ -12,12 +12,10 @@
#include <linux/init.h>
#include <linux/bootmem.h>
#include <asm/tlb.h>
+#include <asm/sections.h>
unsigned long empty_zero_page;
-extern char _stext, _edata, _etext; /* From linkerscript */
-extern char __init_begin, __init_end;
-
void __init
mem_init(void)
{
@@ -67,15 +65,5 @@ mem_init(void)
void
free_initmem(void)
{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk (KERN_INFO "Freeing unused kernel memory: %luk freed\n",
- (unsigned long)((&__init_end - &__init_begin) >> 10));
+ free_initmem_default(0);
}
diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c
index 92e97b0894a6..dee354fa6b64 100644
--- a/arch/frv/mm/init.c
+++ b/arch/frv/mm/init.c
@@ -122,7 +122,7 @@ void __init mem_init(void)
#endif
int codek = 0, datak = 0;
- /* this will put all memory onto the freelists */
+ /* this will put all low memory onto the freelists */
totalram_pages = free_all_bootmem();
#ifdef CONFIG_MMU
@@ -131,14 +131,8 @@ void __init mem_init(void)
datapages++;
#ifdef CONFIG_HIGHMEM
- for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--) {
- struct page *page = &mem_map[pfn];
-
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalram_pages++;
- }
+ for (pfn = num_physpages - 1; pfn >= num_mappedpages; pfn--)
+ free_highmem_page(&mem_map[pfn]);
#endif
codek = ((unsigned long) &_etext - (unsigned long) &_stext) >> 10;
@@ -168,21 +162,7 @@ void __init mem_init(void)
void free_initmem(void)
{
#if defined(CONFIG_RAMKERNEL) && !defined(CONFIG_PROTECT_KERNEL)
- unsigned long start, end, addr;
-
- start = PAGE_ALIGN((unsigned long) &__init_begin); /* round up */
- end = ((unsigned long) &__init_end) & PAGE_MASK; /* round down */
-
- /* next to check that the page we free is not a partial page */
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
-
- printk("Freeing unused kernel memory: %ldKiB freed (0x%lx - 0x%lx)\n",
- (end - start) >> 10, start, end);
+ free_initmem_default(0);
#endif
} /* end free_initmem() */
@@ -193,14 +173,6 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- int pages = 0;
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- pages++;
- }
- printk("Freeing initrd memory: %dKiB freed\n", (pages * PAGE_SIZE) >> 10);
+ free_reserved_area(start, end, 0, "initrd");
} /* end free_initrd_mem() */
#endif
diff --git a/arch/h8300/mm/init.c b/arch/h8300/mm/init.c
index 981e25094b1a..ff349d70a29b 100644
--- a/arch/h8300/mm/init.c
+++ b/arch/h8300/mm/init.c
@@ -139,7 +139,7 @@ void __init mem_init(void)
start_mem = PAGE_ALIGN(start_mem);
max_mapnr = num_physpages = MAP_NR(high_memory);
- /* this will put all memory onto the freelists */
+ /* this will put all low memory onto the freelists */
totalram_pages = free_all_bootmem();
codek = (_etext - _stext) >> 10;
@@ -161,15 +161,7 @@ void __init mem_init(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- int pages = 0;
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- pages++;
- }
- printk ("Freeing initrd memory: %dk freed\n", pages);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
@@ -177,23 +169,7 @@ void
free_initmem(void)
{
#ifdef CONFIG_RAMKERNEL
- unsigned long addr;
-/*
- * the following code should be cool even if these sections
- * are not page aligned.
- */
- addr = PAGE_ALIGN((unsigned long)(__init_begin));
- /* next to check that the page we free is not a partial page */
- for (; addr + PAGE_SIZE < (unsigned long)__init_end; addr +=PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing unused kernel memory: %ldk freed (0x%x - 0x%x)\n",
- (addr - PAGE_ALIGN((long) __init_begin)) >> 10,
- (int)(PAGE_ALIGN((unsigned long)__init_begin)),
- (int)(addr - PAGE_SIZE));
+ free_initmem_default(0);
#endif
}
diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h
index 94eaa5bd5d0c..aa910054b8e7 100644
--- a/arch/ia64/include/asm/hugetlb.h
+++ b/arch/ia64/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#define _ASM_IA64_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr,
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 80dab509dfb0..67c59ebec899 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -47,6 +47,8 @@ void show_mem(unsigned int filter)
printk(KERN_INFO "Mem-info:\n");
show_free_areas(filter);
printk(KERN_INFO "Node memory in pages:\n");
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
for_each_online_pgdat(pgdat) {
unsigned long present;
unsigned long flags;
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index c2e955ee79a8..a57436e5d405 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -623,6 +623,8 @@ void show_mem(unsigned int filter)
printk(KERN_INFO "Mem-info:\n");
show_free_areas(filter);
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
printk(KERN_INFO "Node memory in pages:\n");
for_each_online_pgdat(pgdat) {
unsigned long present;
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 20bc967c7209..d1fe4b402601 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -154,25 +154,14 @@ ia64_init_addr_space (void)
void
free_initmem (void)
{
- unsigned long addr, eaddr;
-
- addr = (unsigned long) ia64_imva(__init_begin);
- eaddr = (unsigned long) ia64_imva(__init_end);
- while (addr < eaddr) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- ++totalram_pages;
- addr += PAGE_SIZE;
- }
- printk(KERN_INFO "Freeing unused kernel memory: %ldkB freed\n",
- (__init_end - __init_begin) >> 10);
+ free_reserved_area((unsigned long)ia64_imva(__init_begin),
+ (unsigned long)ia64_imva(__init_end),
+ 0, "unused kernel");
}
void __init
free_initrd_mem (unsigned long start, unsigned long end)
{
- struct page *page;
/*
* EFI uses 4KB pages while the kernel can use 4KB or bigger.
* Thus EFI and the kernel may have different page sizes. It is
@@ -213,11 +202,7 @@ free_initrd_mem (unsigned long start, unsigned long end)
for (; start < end; start += PAGE_SIZE) {
if (!virt_addr_valid(start))
continue;
- page = virt_to_page(start);
- ClearPageReserved(page);
- init_page_count(page);
- free_page(start);
- ++totalram_pages;
+ free_reserved_page(virt_to_page(start));
}
}
diff --git a/arch/m32r/mm/init.c b/arch/m32r/mm/init.c
index 78b660e903da..ab4cbce91a9b 100644
--- a/arch/m32r/mm/init.c
+++ b/arch/m32r/mm/init.c
@@ -28,10 +28,7 @@
#include <asm/mmu_context.h>
#include <asm/setup.h>
#include <asm/tlb.h>
-
-/* References to section boundaries */
-extern char _text, _etext, _edata;
-extern char __init_begin, __init_end;
+#include <asm/sections.h>
pgd_t swapper_pg_dir[1024];
@@ -184,17 +181,7 @@ void __init mem_init(void)
*======================================================================*/
void free_initmem(void)
{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk (KERN_INFO "Freeing unused kernel memory: %dk freed\n", \
- (int)(&__init_end - &__init_begin) >> 10);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -204,13 +191,6 @@ void free_initmem(void)
*======================================================================*/
void free_initrd_mem(unsigned long start, unsigned long end)
{
- unsigned long p;
- for (p = start; p < end; p += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(p));
- init_page_count(virt_to_page(p));
- free_page(p);
- totalram_pages++;
- }
- printk (KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
index 519aad8fa812..1af2ca3411f6 100644
--- a/arch/m68k/mm/init.c
+++ b/arch/m68k/mm/init.c
@@ -110,18 +110,7 @@ void __init paging_init(void)
void free_initmem(void)
{
#ifndef CONFIG_MMU_SUN3
- unsigned long addr;
-
- addr = (unsigned long) __init_begin;
- for (; addr < ((unsigned long) __init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- pr_notice("Freeing unused kernel memory: %luk freed (0x%x - 0x%x)\n",
- (addr - (unsigned long) __init_begin) >> 10,
- (unsigned int) __init_begin, (unsigned int) __init_end);
+ free_initmem_default(0);
#endif /* CONFIG_MMU_SUN3 */
}
@@ -213,15 +202,6 @@ void __init mem_init(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- int pages = 0;
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- pages++;
- }
- pr_notice("Freeing initrd memory: %dk freed\n",
- pages << (PAGE_SHIFT - 10));
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c
index 504a398d5f8b..d05b8455c44c 100644
--- a/arch/metag/mm/init.c
+++ b/arch/metag/mm/init.c
@@ -380,14 +380,8 @@ void __init mem_init(void)
#ifdef CONFIG_HIGHMEM
unsigned long tmp;
- for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
- struct page *page = pfn_to_page(tmp);
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
- }
- totalram_pages += totalhigh_pages;
+ for (tmp = highstart_pfn; tmp < highend_pfn; tmp++)
+ free_highmem_page(pfn_to_page(tmp));
num_physpages += totalhigh_pages;
#endif /* CONFIG_HIGHMEM */
@@ -412,32 +406,15 @@ void __init mem_init(void)
return;
}
-static void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- pr_info("Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-}
-
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- end = end & PAGE_MASK;
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
}
#endif
diff --git a/arch/microblaze/include/asm/setup.h b/arch/microblaze/include/asm/setup.h
index 0e0b0a5ec756..f05df5630c84 100644
--- a/arch/microblaze/include/asm/setup.h
+++ b/arch/microblaze/include/asm/setup.h
@@ -46,7 +46,6 @@ void machine_shutdown(void);
void machine_halt(void);
void machine_power_off(void);
-void free_init_pages(char *what, unsigned long begin, unsigned long end);
extern void *alloc_maybe_bootmem(size_t size, gfp_t mask);
extern void *zalloc_maybe_bootmem(size_t size, gfp_t mask);
diff --git a/arch/microblaze/kernel/early_printk.c b/arch/microblaze/kernel/early_printk.c
index 60dcacc68038..365f2d53f1b2 100644
--- a/arch/microblaze/kernel/early_printk.c
+++ b/arch/microblaze/kernel/early_printk.c
@@ -21,7 +21,6 @@
#include <asm/setup.h>
#include <asm/prom.h>
-static u32 early_console_initialized;
static u32 base_addr;
#ifdef CONFIG_SERIAL_UARTLITE_CONSOLE
@@ -109,27 +108,11 @@ static struct console early_serial_uart16550_console = {
};
#endif /* CONFIG_SERIAL_8250_CONSOLE */
-static struct console *early_console;
-
-void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- if (early_console_initialized) {
- va_start(ap, fmt);
- n = vscnprintf(buf, 512, fmt, ap);
- early_console->write(early_console, buf, n);
- va_end(ap);
- }
-}
-
int __init setup_early_printk(char *opt)
{
int version = 0;
- if (early_console_initialized)
+ if (early_console)
return 1;
base_addr = of_early_console(&version);
@@ -159,7 +142,6 @@ int __init setup_early_printk(char *opt)
}
register_console(early_console);
- early_console_initialized = 1;
return 0;
}
return 1;
@@ -169,7 +151,7 @@ int __init setup_early_printk(char *opt)
* only for early console because of performance degression */
void __init remap_early_printk(void)
{
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
pr_info("early_printk_console remapping from 0x%x to ", base_addr);
base_addr = (u32) ioremap(base_addr, PAGE_SIZE);
@@ -194,9 +176,9 @@ void __init remap_early_printk(void)
void __init disable_early_printk(void)
{
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
pr_warn("disabling early console\n");
unregister_console(early_console);
- early_console_initialized = 0;
+ early_console = NULL;
}
diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 8f8b367c079e..4ec137d13ad7 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -82,13 +82,9 @@ static unsigned long highmem_setup(void)
/* FIXME not sure about */
if (memblock_is_reserved(pfn << PAGE_SHIFT))
continue;
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
+ free_highmem_page(page);
reservedpages++;
}
- totalram_pages += totalhigh_pages;
pr_info("High memory: %luk\n",
totalhigh_pages << (PAGE_SHIFT-10));
@@ -236,40 +232,16 @@ void __init setup_memory(void)
paging_init();
}
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- pr_info("Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
-}
-
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- int pages = 0;
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- pages++;
- }
- pr_notice("Freeing initrd memory: %dk freed\n",
- (int)(pages * (PAGE_SIZE / 1024)));
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)(&__init_begin),
- (unsigned long)(&__init_end));
+ free_initmem_default(0);
}
void __init mem_init(void)
diff --git a/arch/mips/include/asm/hugetlb.h b/arch/mips/include/asm/hugetlb.h
index ef99db994c2f..fe0d15d32660 100644
--- a/arch/mips/include/asm/hugetlb.h
+++ b/arch/mips/include/asm/hugetlb.h
@@ -10,6 +10,7 @@
#define __ASM_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index c8cddd10630c..143875c6c95a 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -21,7 +21,7 @@
#define KVM_MAX_VCPUS 1
-#define KVM_MEMORY_SLOTS 8
+#define KVM_USER_MEM_SLOTS 8
/* memory slots that does not exposed to userspace */
#define KVM_PRIVATE_MEM_SLOTS 0
diff --git a/arch/mips/kernel/early_printk.c b/arch/mips/kernel/early_printk.c
index 9e6440eaa455..505cb77d1280 100644
--- a/arch/mips/kernel/early_printk.c
+++ b/arch/mips/kernel/early_printk.c
@@ -7,7 +7,9 @@
* Copyright (C) 2007 MIPS Technologies, Inc.
* written by Ralf Baechle (ralf@linux-mips.org)
*/
+#include <linux/kernel.h>
#include <linux/console.h>
+#include <linux/printk.h>
#include <linux/init.h>
#include <asm/setup.h>
@@ -24,20 +26,18 @@ static void early_console_write(struct console *con, const char *s, unsigned n)
}
}
-static struct console early_console = {
+static struct console early_console_prom = {
.name = "early",
.write = early_console_write,
.flags = CON_PRINTBUFFER | CON_BOOT,
.index = -1
};
-static int early_console_initialized __initdata;
-
void __init setup_early_printk(void)
{
- if (early_console_initialized)
+ if (early_console)
return;
- early_console_initialized = 1;
+ early_console = &early_console_prom;
- register_console(&early_console);
+ register_console(&early_console_prom);
}
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 67929251286c..3d0346dbccf4 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -77,10 +77,9 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
/*
* Not static inline because used by IP27 special magic initialization code
*/
-unsigned long setup_zero_pages(void)
+void setup_zero_pages(void)
{
- unsigned int order;
- unsigned long size;
+ unsigned int order, i;
struct page *page;
if (cpu_has_vce)
@@ -94,15 +93,10 @@ unsigned long setup_zero_pages(void)
page = virt_to_page((void *)empty_zero_page);
split_page(page, order);
- while (page < virt_to_page((void *)(empty_zero_page + (PAGE_SIZE << order)))) {
- SetPageReserved(page);
- page++;
- }
-
- size = PAGE_SIZE << order;
- zero_page_mask = (size - 1) & PAGE_MASK;
+ for (i = 0; i < (1 << order); i++, page++)
+ mark_page_reserved(page);
- return 1UL << order;
+ zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
#ifdef CONFIG_MIPS_MT_SMTC
@@ -380,7 +374,7 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
totalram_pages += free_all_bootmem();
- totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */
+ setup_zero_pages(); /* Setup zeroed pages. */
reservedpages = ram = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
@@ -399,12 +393,8 @@ void __init mem_init(void)
SetPageReserved(page);
continue;
}
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
+ free_highmem_page(page);
}
- totalram_pages += totalhigh_pages;
num_physpages += totalhigh_pages;
#endif
@@ -440,11 +430,8 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
struct page *page = pfn_to_page(pfn);
void *addr = phys_to_virt(PFN_PHYS(pfn));
- ClearPageReserved(page);
- init_page_count(page);
memset(addr, POISON_FREE_INITMEM, PAGE_SIZE);
- __free_page(page);
- totalram_pages++;
+ free_reserved_page(page);
}
printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
}
@@ -452,18 +439,14 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory",
- virt_to_phys((void *)start),
- virt_to_phys((void *)end));
+ free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
}
#endif
void __init_refok free_initmem(void)
{
prom_free_prom_memory();
- free_init_pages("unused kernel memory",
- __pa_symbol(&__init_begin),
- __pa_symbol(&__init_end));
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifndef CONFIG_MIPS_PGD_C0_CONTEXT
diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c
index 7e5fe2790d8a..f1baadd56e82 100644
--- a/arch/mips/mm/mmap.c
+++ b/arch/mips/mm/mmap.c
@@ -158,11 +158,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index 3505d08ff2fd..5f2bddb1860e 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -457,7 +457,7 @@ void __init prom_free_prom_memory(void)
/* We got nothing to free here ... */
}
-extern unsigned long setup_zero_pages(void);
+extern void setup_zero_pages(void);
void __init paging_init(void)
{
@@ -492,7 +492,7 @@ void __init mem_init(void)
totalram_pages += free_all_bootmem_node(NODE_DATA(node));
}
- totalram_pages -= setup_zero_pages(); /* This comes from node 0 */
+ setup_zero_pages(); /* This comes from node 0 */
codesize = (unsigned long) &_etext - (unsigned long) &_text;
datasize = (unsigned long) &_edata - (unsigned long) &_etext;
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index e57e5bc23562..5a8ace63a6b4 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -139,30 +139,11 @@ void __init mem_init(void)
}
/*
- *
- */
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr;
-
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *) addr, 0xcc, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
-}
-
-/*
* recycle memory containing stuff only required for initialisation
*/
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long) &__init_begin,
- (unsigned long) &__init_end);
+ free_initmem_default(POISON_FREE_INITMEM);
}
/*
@@ -171,6 +152,6 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
}
#endif
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index e7fdc50c4bf0..b3cbc6703837 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -43,6 +43,7 @@
#include <asm/kmap_types.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
+#include <asm/sections.h>
int mem_init_done;
@@ -201,9 +202,6 @@ void __init paging_init(void)
/* References to section boundaries */
-extern char _stext, _etext, _edata, __bss_start, _end;
-extern char __init_begin, __init_end;
-
static int __init free_pages_init(void)
{
int reservedpages, pfn;
@@ -263,30 +261,11 @@ void __init mem_init(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
- (end - start) >> 10);
-
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
void free_initmem(void)
{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing unused kernel memory: %luk freed\n",
- ((unsigned long)&__init_end -
- (unsigned long)&__init_begin) >> 10);
+ free_initmem_default(0);
}
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index 0339181bf3ac..433e75a2ee9a 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -1,5 +1,6 @@
config PARISC
def_bool y
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select HAVE_IDE
select HAVE_OPROFILE
select HAVE_FUNCTION_TRACER if 64BIT
diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug
index 7305ac8f7f5b..bc989e522a04 100644
--- a/arch/parisc/Kconfig.debug
+++ b/arch/parisc/Kconfig.debug
@@ -12,18 +12,4 @@ config DEBUG_RODATA
portion of the kernel code won't be covered by a TLB anymore.
If in doubt, say "N".
-config DEBUG_STRICT_USER_COPY_CHECKS
- bool "Strict copy size checks"
- depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
- ---help---
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time failures.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, or if you run an older (pre 4.4) gcc, say N.
-
endmenu
diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c
index 3ac462de53a4..157b931e7b09 100644
--- a/arch/parisc/mm/init.c
+++ b/arch/parisc/mm/init.c
@@ -505,7 +505,6 @@ static void __init map_pages(unsigned long start_vaddr,
void free_initmem(void)
{
- unsigned long addr;
unsigned long init_begin = (unsigned long)__init_begin;
unsigned long init_end = (unsigned long)__init_end;
@@ -533,19 +532,10 @@ void free_initmem(void)
* pages are no-longer executable */
flush_icache_range(init_begin, init_end);
- for (addr = init_begin; addr < init_end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- num_physpages++;
- totalram_pages++;
- }
+ num_physpages += free_initmem_default(0);
/* set up a new led state on systems shipped LED State panel */
pdc_chassis_send_status(PDC_CHASSIS_DIRECT_BCOMPLETE);
-
- printk(KERN_INFO "Freeing unused kernel memory: %luk freed\n",
- (init_end - init_begin) >> 10);
}
@@ -697,6 +687,8 @@ void show_mem(unsigned int filter)
printk(KERN_INFO "Mem-info:\n");
show_free_areas(filter);
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
#ifndef CONFIG_DISCONTIGMEM
i = max_mapnr;
while (i-- > 0) {
@@ -1107,15 +1099,6 @@ void flush_tlb_all(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start >= end)
- return;
- printk(KERN_INFO "Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- num_physpages++;
- totalram_pages++;
- }
+ num_physpages += free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 62e11a32c4c2..4fcbd6b14a3a 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -3,6 +3,7 @@
#ifdef CONFIG_HUGETLB_PAGE
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
extern struct kmem_cache *hugepte_cache;
diff --git a/arch/powerpc/include/asm/page_64.h b/arch/powerpc/include/asm/page_64.h
index cd915d6b093d..88693cef4f3d 100644
--- a/arch/powerpc/include/asm/page_64.h
+++ b/arch/powerpc/include/asm/page_64.h
@@ -99,8 +99,7 @@ extern unsigned long slice_get_unmapped_area(unsigned long addr,
unsigned long len,
unsigned long flags,
unsigned int psize,
- int topdown,
- int use_cache);
+ int topdown);
extern unsigned int get_slice_psize(struct mm_struct *mm,
unsigned long addr);
diff --git a/arch/powerpc/kernel/crash_dump.c b/arch/powerpc/kernel/crash_dump.c
index b3ba5163eae2..9ec3fe174cba 100644
--- a/arch/powerpc/kernel/crash_dump.c
+++ b/arch/powerpc/kernel/crash_dump.c
@@ -150,10 +150,7 @@ void crash_free_reserved_phys_range(unsigned long begin, unsigned long end)
if (addr <= rtas_end && ((addr + PAGE_SIZE) > rtas_start))
continue;
- ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
- init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
- free_page((unsigned long)__va(addr));
- totalram_pages++;
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
}
}
#endif
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 06c8202a69cf..2230fd0ca3e4 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1045,10 +1045,7 @@ static void fadump_release_memory(unsigned long begin, unsigned long end)
if (addr <= ra_end && ((addr + PAGE_SIZE) > ra_start))
continue;
- ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
- init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
- free_page((unsigned long)__va(addr));
- totalram_pages++;
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
}
}
diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c
index a61b133c4f99..6782221d49bd 100644
--- a/arch/powerpc/kernel/kvm.c
+++ b/arch/powerpc/kernel/kvm.c
@@ -756,12 +756,7 @@ static __init void kvm_free_tmp(void)
end = (ulong)&kvm_tmp[ARRAY_SIZE(kvm_tmp)] & PAGE_MASK;
/* Free the tmp space we don't need */
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_reserved_area(start, end, 0, NULL);
}
static int __init kvm_guest_init(void)
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index f9748498fe58..13b867093499 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -156,15 +156,13 @@ static struct console udbg_console = {
.index = 0,
};
-static int early_console_initialized;
-
/*
* Called by setup_system after ppc_md->probe and ppc_md->early_init.
* Call it again after setting udbg_putc in ppc_md->setup_arch.
*/
void __init register_early_udbg_console(void)
{
- if (early_console_initialized)
+ if (early_console)
return;
if (!udbg_putc)
@@ -174,7 +172,7 @@ void __init register_early_udbg_console(void)
printk(KERN_INFO "early console immortal !\n");
udbg_console.flags &= ~CON_BOOT;
}
- early_console_initialized = 1;
+ early_console = &udbg_console;
register_console(&udbg_console);
}
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 1a6de0a7d8eb..5dc52d803ed8 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -742,7 +742,7 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
struct hstate *hstate = hstate_file(file);
int mmu_psize = shift_to_mmu_psize(huge_page_shift(hstate));
- return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1, 0);
+ return slice_get_unmapped_area(addr, len, flags, mmu_psize, 1);
}
#endif
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index f1f7409a4183..cd76c454942f 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -352,13 +352,9 @@ void __init mem_init(void)
struct page *page = pfn_to_page(pfn);
if (memblock_is_reserved(paddr))
continue;
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
+ free_highmem_page(page);
reservedpages--;
}
- totalram_pages += totalhigh_pages;
printk(KERN_DEBUG "High memory: %luk\n",
totalhigh_pages << (PAGE_SHIFT-10));
}
@@ -405,39 +401,14 @@ void __init mem_init(void)
void free_initmem(void)
{
- unsigned long addr;
-
ppc_md.progress = ppc_printk_progress;
-
- addr = (unsigned long)__init_begin;
- for (; addr < (unsigned long)__init_end; addr += PAGE_SIZE) {
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- pr_info("Freeing unused kernel memory: %luk freed\n",
- ((unsigned long)__init_end -
- (unsigned long)__init_begin) >> 10);
+ free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start >= end)
- return;
-
- start = _ALIGN_DOWN(start, PAGE_SIZE);
- end = _ALIGN_UP(end, PAGE_SIZE);
- pr_info("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
-
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/powerpc/mm/mmap_64.c b/arch/powerpc/mm/mmap_64.c
index 67a42ed0d2fc..cb8bdbe4972f 100644
--- a/arch/powerpc/mm/mmap_64.c
+++ b/arch/powerpc/mm/mmap_64.c
@@ -92,10 +92,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index cf9dada734b6..3e99c149271a 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -237,134 +237,112 @@ static void slice_convert(struct mm_struct *mm, struct slice_mask mask, int psiz
#endif
}
+/*
+ * Compute which slice addr is part of;
+ * set *boundary_addr to the start or end boundary of that slice
+ * (depending on 'end' parameter);
+ * return boolean indicating if the slice is marked as available in the
+ * 'available' slice_mark.
+ */
+static bool slice_scan_available(unsigned long addr,
+ struct slice_mask available,
+ int end,
+ unsigned long *boundary_addr)
+{
+ unsigned long slice;
+ if (addr < SLICE_LOW_TOP) {
+ slice = GET_LOW_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
+ return !!(available.low_slices & (1u << slice));
+ } else {
+ slice = GET_HIGH_SLICE_INDEX(addr);
+ *boundary_addr = (slice + end) ?
+ ((slice + end) << SLICE_HIGH_SHIFT) : SLICE_LOW_TOP;
+ return !!(available.high_slices & (1u << slice));
+ }
+}
+
static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize, int use_cache)
+ int psize)
{
- struct vm_area_struct *vma;
- unsigned long start_addr, addr;
- struct slice_mask mask;
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-
- if (use_cache) {
- if (len <= mm->cached_hole_size) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- } else
- start_addr = addr = mm->free_area_cache;
- } else
- start_addr = addr = TASK_UNMAPPED_BASE;
-
-full_search:
- for (;;) {
- addr = _ALIGN_UP(addr, 1ul << pshift);
- if ((TASK_SIZE - len) < addr)
- break;
- vma = find_vma(mm, addr);
- BUG_ON(vma && (addr >= vma->vm_end));
-
- mask = slice_range_to_mask(addr, len);
- if (!slice_check_fit(mask, available)) {
- if (addr < SLICE_LOW_TOP)
- addr = _ALIGN_UP(addr + 1, 1ul << SLICE_LOW_SHIFT);
- else
- addr = _ALIGN_UP(addr + 1, 1ul << SLICE_HIGH_SHIFT);
+ unsigned long addr, found, next_end;
+ struct vm_unmapped_area_info info;
+
+ info.flags = 0;
+ info.length = len;
+ info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+ info.align_offset = 0;
+
+ addr = TASK_UNMAPPED_BASE;
+ while (addr < TASK_SIZE) {
+ info.low_limit = addr;
+ if (!slice_scan_available(addr, available, 1, &addr))
continue;
+
+ next_slice:
+ /*
+ * At this point [info.low_limit; addr) covers
+ * available slices only and ends at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the next available slice.
+ */
+ if (addr >= TASK_SIZE)
+ addr = TASK_SIZE;
+ else if (slice_scan_available(addr, available, 1, &next_end)) {
+ addr = next_end;
+ goto next_slice;
}
- if (!vma || addr + len <= vma->vm_start) {
- /*
- * Remember the place where we stopped the search:
- */
- if (use_cache)
- mm->free_area_cache = addr + len;
- return addr;
- }
- if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
- addr = vma->vm_end;
- }
+ info.high_limit = addr;
- /* Make sure we didn't miss any holes */
- if (use_cache && start_addr != TASK_UNMAPPED_BASE) {
- start_addr = addr = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = 0;
- goto full_search;
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
}
+
return -ENOMEM;
}
static unsigned long slice_find_area_topdown(struct mm_struct *mm,
unsigned long len,
struct slice_mask available,
- int psize, int use_cache)
+ int psize)
{
- struct vm_area_struct *vma;
- unsigned long addr;
- struct slice_mask mask;
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
+ unsigned long addr, found, prev;
+ struct vm_unmapped_area_info info;
- /* check if free_area_cache is useful for us */
- if (use_cache) {
- if (len <= mm->cached_hole_size) {
- mm->cached_hole_size = 0;
- mm->free_area_cache = mm->mmap_base;
- }
-
- /* either no address requested or can't fit in requested
- * address hole
- */
- addr = mm->free_area_cache;
-
- /* make sure it can fit in the remaining address space */
- if (addr > len) {
- addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
- mask = slice_range_to_mask(addr, len);
- if (slice_check_fit(mask, available) &&
- slice_area_is_free(mm, addr, len))
- /* remember the address as a hint for
- * next time
- */
- return (mm->free_area_cache = addr);
- }
- }
+ info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+ info.length = len;
+ info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
+ info.align_offset = 0;
addr = mm->mmap_base;
- while (addr > len) {
- /* Go down by chunk size */
- addr = _ALIGN_DOWN(addr - len, 1ul << pshift);
-
- /* Check for hit with different page size */
- mask = slice_range_to_mask(addr, len);
- if (!slice_check_fit(mask, available)) {
- if (addr < SLICE_LOW_TOP)
- addr = _ALIGN_DOWN(addr, 1ul << SLICE_LOW_SHIFT);
- else if (addr < (1ul << SLICE_HIGH_SHIFT))
- addr = SLICE_LOW_TOP;
- else
- addr = _ALIGN_DOWN(addr, 1ul << SLICE_HIGH_SHIFT);
+ while (addr > PAGE_SIZE) {
+ info.high_limit = addr;
+ if (!slice_scan_available(addr - 1, available, 0, &addr))
continue;
- }
+ prev_slice:
/*
- * Lookup failure means no vma is above this address,
- * else if new region fits below vma->vm_start,
- * return with success:
+ * At this point [addr; info.high_limit) covers
+ * available slices only and starts at a slice boundary.
+ * Check if we need to reduce the range, or if we can
+ * extend it to cover the previous available slice.
*/
- vma = find_vma(mm, addr);
- if (!vma || (addr + len) <= vma->vm_start) {
- /* remember the address as a hint for next time */
- if (use_cache)
- mm->free_area_cache = addr;
- return addr;
+ if (addr < PAGE_SIZE)
+ addr = PAGE_SIZE;
+ else if (slice_scan_available(addr - 1, available, 0, &prev)) {
+ addr = prev;
+ goto prev_slice;
}
+ info.low_limit = addr;
- /* remember the largest hole we saw so far */
- if (use_cache && (addr + mm->cached_hole_size) < vma->vm_start)
- mm->cached_hole_size = vma->vm_start - addr;
-
- /* try just below the current vma->vm_start */
- addr = vma->vm_start;
+ found = vm_unmapped_area(&info);
+ if (!(found & ~PAGE_MASK))
+ return found;
}
/*
@@ -373,28 +351,18 @@ static unsigned long slice_find_area_topdown(struct mm_struct *mm,
* can happen with large stack limits and large mmap()
* allocations.
*/
- addr = slice_find_area_bottomup(mm, len, available, psize, 0);
-
- /*
- * Restore the topdown base:
- */
- if (use_cache) {
- mm->free_area_cache = mm->mmap_base;
- mm->cached_hole_size = ~0UL;
- }
-
- return addr;
+ return slice_find_area_bottomup(mm, len, available, psize);
}
static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
struct slice_mask mask, int psize,
- int topdown, int use_cache)
+ int topdown)
{
if (topdown)
- return slice_find_area_topdown(mm, len, mask, psize, use_cache);
+ return slice_find_area_topdown(mm, len, mask, psize);
else
- return slice_find_area_bottomup(mm, len, mask, psize, use_cache);
+ return slice_find_area_bottomup(mm, len, mask, psize);
}
#define or_mask(dst, src) do { \
@@ -415,7 +383,7 @@ static unsigned long slice_find_area(struct mm_struct *mm, unsigned long len,
unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
unsigned long flags, unsigned int psize,
- int topdown, int use_cache)
+ int topdown)
{
struct slice_mask mask = {0, 0};
struct slice_mask good_mask;
@@ -430,8 +398,8 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
BUG_ON(mm->task_size == 0);
slice_dbg("slice_get_unmapped_area(mm=%p, psize=%d...\n", mm, psize);
- slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d, use_cache=%d\n",
- addr, len, flags, topdown, use_cache);
+ slice_dbg(" addr=%lx, len=%lx, flags=%lx, topdown=%d\n",
+ addr, len, flags, topdown);
if (len > mm->task_size)
return -ENOMEM;
@@ -503,8 +471,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing
* slices for that size
*/
- newaddr = slice_find_area(mm, len, good_mask, psize, topdown,
- use_cache);
+ newaddr = slice_find_area(mm, len, good_mask, psize, topdown);
if (newaddr != -ENOMEM) {
/* Found within the good mask, we don't have to setup,
* we thus return directly
@@ -536,8 +503,7 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
* anywhere in the good area.
*/
if (addr) {
- addr = slice_find_area(mm, len, good_mask, psize, topdown,
- use_cache);
+ addr = slice_find_area(mm, len, good_mask, psize, topdown);
if (addr != -ENOMEM) {
slice_dbg(" found area at 0x%lx\n", addr);
return addr;
@@ -547,15 +513,14 @@ unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
/* Now let's see if we can find something in the existing slices
* for that size plus free slices
*/
- addr = slice_find_area(mm, len, potential_mask, psize, topdown,
- use_cache);
+ addr = slice_find_area(mm, len, potential_mask, psize, topdown);
#ifdef CONFIG_PPC_64K_PAGES
if (addr == -ENOMEM && psize == MMU_PAGE_64K) {
/* retry the search with 4k-page slices included */
or_mask(potential_mask, compat_mask);
addr = slice_find_area(mm, len, potential_mask, psize,
- topdown, use_cache);
+ topdown);
}
#endif
@@ -586,8 +551,7 @@ unsigned long arch_get_unmapped_area(struct file *filp,
unsigned long flags)
{
return slice_get_unmapped_area(addr, len, flags,
- current->mm->context.user_psize,
- 0, 1);
+ current->mm->context.user_psize, 0);
}
unsigned long arch_get_unmapped_area_topdown(struct file *filp,
@@ -597,8 +561,7 @@ unsigned long arch_get_unmapped_area_topdown(struct file *filp,
const unsigned long flags)
{
return slice_get_unmapped_area(addr0, len, flags,
- current->mm->context.user_psize,
- 1, 1);
+ current->mm->context.user_psize, 1);
}
unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr)
diff --git a/arch/powerpc/platforms/512x/mpc512x_shared.c b/arch/powerpc/platforms/512x/mpc512x_shared.c
index 2765cd1c7678..76885d5756e3 100644
--- a/arch/powerpc/platforms/512x/mpc512x_shared.c
+++ b/arch/powerpc/platforms/512x/mpc512x_shared.c
@@ -172,12 +172,9 @@ static struct fsl_diu_shared_fb __attribute__ ((__aligned__(8))) diu_shared_fb;
static inline void mpc512x_free_bootmem(struct page *page)
{
- __ClearPageReserved(page);
BUG_ON(PageTail(page));
BUG_ON(atomic_read(&page->_count) > 1);
- atomic_set(&page->_count, 1);
- __free_page(page);
- totalram_pages++;
+ free_reserved_page(page);
}
void mpc512x_release_bootmem(void)
diff --git a/arch/powerpc/platforms/cell/spufs/file.c b/arch/powerpc/platforms/cell/spufs/file.c
index 68c57d38745a..0026a37e21fd 100644
--- a/arch/powerpc/platforms/cell/spufs/file.c
+++ b/arch/powerpc/platforms/cell/spufs/file.c
@@ -352,7 +352,7 @@ static unsigned long spufs_get_unmapped_area(struct file *file,
/* Else, try to obtain a 64K pages slice */
return slice_get_unmapped_area(addr, len, flags,
- MMU_PAGE_64K, 1, 0);
+ MMU_PAGE_64K, 1);
}
#endif /* CONFIG_SPU_FS_64K_LS */
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 830ec55d246f..6a94c397ad08 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -91,6 +91,7 @@ config S390
select ARCH_INLINE_WRITE_UNLOCK_BH
select ARCH_INLINE_WRITE_UNLOCK_IRQ
select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_SAVE_PAGE_KEYS if HIBERNATION
select ARCH_WANT_IPC_PARSE_VERSION
select BUILDTIME_EXTABLE_SORT
diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug
index fc32a2df4974..c56878e1245f 100644
--- a/arch/s390/Kconfig.debug
+++ b/arch/s390/Kconfig.debug
@@ -17,20 +17,6 @@ config STRICT_DEVMEM
If you are unsure, say Y.
-config DEBUG_STRICT_USER_COPY_CHECKS
- def_bool n
- prompt "Strict user copy size checks"
- ---help---
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time warnings.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, or if you run an older (pre 4.4) gcc, say N.
-
config S390_PTDUMP
bool "Export kernel pagetable layout to userspace via debugfs"
depends on DEBUG_KERNEL
diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c
index 5f7d7ba2874c..7a539f4f5e30 100644
--- a/arch/s390/hypfs/inode.c
+++ b/arch/s390/hypfs/inode.c
@@ -21,6 +21,7 @@
#include <linux/module.h>
#include <linux/seq_file.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include <asm/ebcdic.h>
#include "hypfs.h"
diff --git a/arch/s390/include/asm/hugetlb.h b/arch/s390/include/asm/hugetlb.h
index 593753ee07f3..bd90359d6d22 100644
--- a/arch/s390/include/asm/hugetlb.h
+++ b/arch/s390/include/asm/hugetlb.h
@@ -114,7 +114,7 @@ static inline pte_t huge_ptep_get_and_clear(struct mm_struct *mm,
#define huge_ptep_set_wrprotect(__mm, __addr, __ptep) \
({ \
pte_t __pte = huge_ptep_get(__ptep); \
- if (pte_write(__pte)) { \
+ if (huge_pte_write(__pte)) { \
huge_ptep_invalidate(__mm, __addr, __ptep); \
set_huge_pte_at(__mm, __addr, __ptep, \
huge_pte_wrprotect(__pte)); \
@@ -127,4 +127,58 @@ static inline void huge_ptep_clear_flush(struct vm_area_struct *vma,
huge_ptep_invalidate(vma->vm_mm, address, ptep);
}
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+ pte_t pte;
+ pmd_t pmd;
+
+ pmd = mk_pmd_phys(page_to_phys(page), pgprot);
+ pte_val(pte) = pmd_val(pmd);
+ return pte;
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = pte_val(pte);
+ return pmd_write(pmd);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+ /* No dirty bit in the segment table entry. */
+ return 0;
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = pte_val(pte);
+ pte_val(pte) = pmd_val(pmd_mkwrite(pmd));
+ return pte;
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+ /* No dirty bit in the segment table entry. */
+ return pte;
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+ pmd_t pmd;
+
+ pmd_val(pmd) = pte_val(pte);
+ pte_val(pte) = pmd_val(pmd_modify(pmd, newprot));
+ return pte;
+}
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pmd_clear((pmd_t *) ptep);
+}
+
#endif /* _ASM_S390_HUGETLB_H */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 75b8750a16ff..7c9a14525849 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -419,6 +419,13 @@ extern unsigned long MODULES_END;
#define __S110 PAGE_RW
#define __S111 PAGE_RW
+/*
+ * Segment entry (large page) protection definitions.
+ */
+#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE)
+#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO)
+#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW)
+
static inline int mm_exclusive(struct mm_struct *mm)
{
return likely(mm == current->active_mm &&
@@ -909,26 +916,6 @@ static inline pte_t pte_mkspecial(pte_t pte)
#ifdef CONFIG_HUGETLB_PAGE
static inline pte_t pte_mkhuge(pte_t pte)
{
- /*
- * PROT_NONE needs to be remapped from the pte type to the ste type.
- * The HW invalid bit is also different for pte and ste. The pte
- * invalid bit happens to be the same as the ste _SEGMENT_ENTRY_LARGE
- * bit, so we don't have to clear it.
- */
- if (pte_val(pte) & _PAGE_INVALID) {
- if (pte_val(pte) & _PAGE_SWT)
- pte_val(pte) |= _HPAGE_TYPE_NONE;
- pte_val(pte) |= _SEGMENT_ENTRY_INV;
- }
- /*
- * Clear SW pte bits, there are no SW bits in a segment table entry.
- */
- pte_val(pte) &= ~(_PAGE_SWT | _PAGE_SWX | _PAGE_SWC |
- _PAGE_SWR | _PAGE_SWW);
- /*
- * Also set the change-override bit because we don't need dirty bit
- * tracking for hugetlbfs pages.
- */
pte_val(pte) |= (_SEGMENT_ENTRY_LARGE | _SEGMENT_ENTRY_CO);
return pte;
}
@@ -1273,31 +1260,7 @@ static inline void __pmd_idte(unsigned long address, pmd_t *pmdp)
}
}
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-
-#define SEGMENT_NONE __pgprot(_HPAGE_TYPE_NONE)
-#define SEGMENT_RO __pgprot(_HPAGE_TYPE_RO)
-#define SEGMENT_RW __pgprot(_HPAGE_TYPE_RW)
-
-#define __HAVE_ARCH_PGTABLE_DEPOSIT
-extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
-
-#define __HAVE_ARCH_PGTABLE_WITHDRAW
-extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
-
-static inline int pmd_trans_splitting(pmd_t pmd)
-{
- return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
-}
-
-static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
- pmd_t *pmdp, pmd_t entry)
-{
- if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
- pmd_val(entry) |= _SEGMENT_ENTRY_CO;
- *pmdp = entry;
-}
-
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
static inline unsigned long massage_pgprot_pmd(pgprot_t pgprot)
{
/*
@@ -1318,10 +1281,11 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
return pmd;
}
-static inline pmd_t pmd_mkhuge(pmd_t pmd)
+static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
{
- pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
- return pmd;
+ pmd_t __pmd;
+ pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
+ return __pmd;
}
static inline pmd_t pmd_mkwrite(pmd_t pmd)
@@ -1331,6 +1295,34 @@ static inline pmd_t pmd_mkwrite(pmd_t pmd)
pmd_val(pmd) &= ~_SEGMENT_ENTRY_RO;
return pmd;
}
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLB_PAGE */
+
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+
+#define __HAVE_ARCH_PGTABLE_DEPOSIT
+extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable);
+
+#define __HAVE_ARCH_PGTABLE_WITHDRAW
+extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm);
+
+static inline int pmd_trans_splitting(pmd_t pmd)
+{
+ return pmd_val(pmd) & _SEGMENT_ENTRY_SPLIT;
+}
+
+static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
+ pmd_t *pmdp, pmd_t entry)
+{
+ if (!(pmd_val(entry) & _SEGMENT_ENTRY_INV) && MACHINE_HAS_EDAT1)
+ pmd_val(entry) |= _SEGMENT_ENTRY_CO;
+ *pmdp = entry;
+}
+
+static inline pmd_t pmd_mkhuge(pmd_t pmd)
+{
+ pmd_val(pmd) |= _SEGMENT_ENTRY_LARGE;
+ return pmd;
+}
static inline pmd_t pmd_wrprotect(pmd_t pmd)
{
@@ -1427,13 +1419,6 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm,
}
}
-static inline pmd_t mk_pmd_phys(unsigned long physpage, pgprot_t pgprot)
-{
- pmd_t __pmd;
- pmd_val(__pmd) = physpage + massage_pgprot_pmd(pgprot);
- return __pmd;
-}
-
#define pfn_pmd(pfn, pgprot) mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
#define mk_pmd(page, pgprot) pfn_pmd(page_to_pfn(page), (pgprot))
diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile
index 6ab0d0b5cec8..20b0e97a7df2 100644
--- a/arch/s390/lib/Makefile
+++ b/arch/s390/lib/Makefile
@@ -3,7 +3,6 @@
#
lib-y += delay.o string.o uaccess_std.o uaccess_pt.o
-obj-y += usercopy.o
obj-$(CONFIG_32BIT) += div64.o qrnnd.o ucmpdi2.o mem32.o
obj-$(CONFIG_64BIT) += mem64.o
lib-$(CONFIG_64BIT) += uaccess_mvcos.o
diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c
index 532525ec88c1..121089d57802 100644
--- a/arch/s390/mm/hugetlbpage.c
+++ b/arch/s390/mm/hugetlbpage.c
@@ -39,7 +39,7 @@ int arch_prepare_hugepage(struct page *page)
if (!ptep)
return -ENOMEM;
- pte = mk_pte(page, PAGE_RW);
+ pte_val(pte) = addr;
for (i = 0; i < PTRS_PER_PTE; i++) {
set_pte_at(&init_mm, addr + i * PAGE_SIZE, ptep + i, pte);
pte_val(pte) += PAGE_SIZE;
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 9f9c315b4c07..0b09b2342302 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -42,11 +42,10 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __attribute__((__aligned__(PAGE_SIZE)));
unsigned long empty_zero_page, zero_page_mask;
EXPORT_SYMBOL(empty_zero_page);
-static unsigned long __init setup_zero_pages(void)
+static void __init setup_zero_pages(void)
{
struct cpuid cpu_id;
unsigned int order;
- unsigned long size;
struct page *page;
int i;
@@ -83,14 +82,11 @@ static unsigned long __init setup_zero_pages(void)
page = virt_to_page((void *) empty_zero_page);
split_page(page, order);
for (i = 1 << order; i > 0; i--) {
- SetPageReserved(page);
+ mark_page_reserved(page);
page++;
}
- size = PAGE_SIZE << order;
- zero_page_mask = (size - 1) & PAGE_MASK;
-
- return 1UL << order;
+ zero_page_mask = ((PAGE_SIZE << order) - 1) & PAGE_MASK;
}
/*
@@ -147,7 +143,7 @@ void __init mem_init(void)
/* this will put all low memory onto the freelists */
totalram_pages += free_all_bootmem();
- totalram_pages -= setup_zero_pages(); /* Setup zeroed pages. */
+ setup_zero_pages(); /* Setup zeroed pages. */
reservedpages = 0;
@@ -166,34 +162,15 @@ void __init mem_init(void)
PFN_ALIGN((unsigned long)&_eshared) - 1);
}
-void free_init_pages(char *what, unsigned long begin, unsigned long end)
-{
- unsigned long addr = begin;
-
- if (begin >= end)
- return;
- for (; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- memset((void *)(addr & PAGE_MASK), POISON_FREE_INITMEM,
- PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
-}
-
void free_initmem(void)
{
- free_init_pages("unused kernel memory",
- (unsigned long)&__init_begin,
- (unsigned long)&__init_end);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
void __init free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory", start, end);
+ free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
}
#endif
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 06bafec00278..40023290ee5b 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -91,11 +91,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
@@ -176,11 +174,9 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = s390_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = s390_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index cee6bce1e30c..1592aad7dbc4 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -43,7 +43,7 @@ EXPORT_SYMBOL_GPL(empty_zero_page);
static struct kcore_list kcore_mem, kcore_vmalloc;
-static unsigned long setup_zero_page(void)
+static void setup_zero_page(void)
{
struct page *page;
@@ -52,9 +52,7 @@ static unsigned long setup_zero_page(void)
panic("Oh boy, that early out of memory?");
page = virt_to_page((void *) empty_zero_page);
- SetPageReserved(page);
-
- return 1UL;
+ mark_page_reserved(page);
}
#ifndef CONFIG_NEED_MULTIPLE_NODES
@@ -84,7 +82,7 @@ void __init mem_init(void)
high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
totalram_pages += free_all_bootmem();
- totalram_pages -= setup_zero_page(); /* Setup zeroed pages. */
+ setup_zero_page(); /* Setup zeroed pages. */
reservedpages = 0;
for (tmp = 0; tmp < max_low_pfn; tmp++)
@@ -109,37 +107,16 @@ void __init mem_init(void)
}
#endif /* !CONFIG_NEED_MULTIPLE_NODES */
-static void free_init_pages(const char *what, unsigned long begin, unsigned long end)
-{
- unsigned long pfn;
-
- for (pfn = PFN_UP(begin); pfn < PFN_DOWN(end); pfn++) {
- struct page *page = pfn_to_page(pfn);
- void *addr = phys_to_virt(PFN_PHYS(pfn));
-
- ClearPageReserved(page);
- init_page_count(page);
- memset(addr, POISON_FREE_INITMEM, PAGE_SIZE);
- __free_page(page);
- totalram_pages++;
- }
- printk(KERN_INFO "Freeing %s: %ldk freed\n", what, (end - begin) >> 10);
-}
-
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- free_init_pages("initrd memory",
- virt_to_phys((void *) start),
- virt_to_phys((void *) end));
+ free_reserved_area(start, end, POISON_FREE_INITMEM, "initrd");
}
#endif
void __init_refok free_initmem(void)
{
- free_init_pages("unused kernel memory",
- __pa(&__init_begin),
- __pa(&__init_end));
+ free_initmem_default(POISON_FREE_INITMEM);
}
unsigned long pgd_current;
diff --git a/arch/sh/include/asm/hugetlb.h b/arch/sh/include/asm/hugetlb.h
index b3808c7d67b2..699255d6d1c6 100644
--- a/arch/sh/include/asm/hugetlb.h
+++ b/arch/sh/include/asm/hugetlb.h
@@ -3,6 +3,7 @@
#include <asm/cacheflush.h>
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/sh/kernel/sh_bios.c b/arch/sh/kernel/sh_bios.c
index 47475cca068a..a5b51b9d7338 100644
--- a/arch/sh/kernel/sh_bios.c
+++ b/arch/sh/kernel/sh_bios.c
@@ -144,8 +144,6 @@ static struct console bios_console = {
.index = -1,
};
-static struct console *early_console;
-
static int __init setup_early_printk(char *buf)
{
int keep_early = 0;
diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c
index 105794037143..20f9ead650d3 100644
--- a/arch/sh/mm/init.c
+++ b/arch/sh/mm/init.c
@@ -417,15 +417,13 @@ void __init mem_init(void)
for_each_online_node(nid) {
pg_data_t *pgdat = NODE_DATA(nid);
- unsigned long node_pages = 0;
void *node_high_memory;
num_physpages += pgdat->node_present_pages;
if (pgdat->node_spanned_pages)
- node_pages = free_all_bootmem_node(pgdat);
+ totalram_pages += free_all_bootmem_node(pgdat);
- totalram_pages += node_pages;
node_high_memory = (void *)__va((pgdat->node_start_pfn +
pgdat->node_spanned_pages) <<
@@ -501,31 +499,13 @@ void __init mem_init(void)
void free_initmem(void)
{
- unsigned long addr;
-
- addr = (unsigned long)(&__init_begin);
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
- free_page(addr);
- totalram_pages++;
- }
- printk("Freeing unused kernel memory: %ldk freed\n",
- ((unsigned long)&__init_end -
- (unsigned long)&__init_begin) >> 10);
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- unsigned long p;
- for (p = start; p < end; p += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(p));
- init_page_count(virt_to_page(p));
- free_page(p);
- totalram_pages++;
- }
- printk("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/sparc/include/asm/hugetlb.h b/arch/sparc/include/asm/hugetlb.h
index 7eb57d245044..e4cab465b81f 100644
--- a/arch/sparc/include/asm/hugetlb.h
+++ b/arch/sparc/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#define _ASM_SPARC64_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
void set_huge_pte_at(struct mm_struct *mm, unsigned long addr,
diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c
index 9b40c9c12a0c..6cfc1b09ec25 100644
--- a/arch/sparc/kernel/leon_smp.c
+++ b/arch/sparc/kernel/leon_smp.c
@@ -253,24 +253,15 @@ void __init leon_smp_done(void)
/* Free unneeded trap tables */
if (!cpu_present(1)) {
- ClearPageReserved(virt_to_page(&trapbase_cpu1));
- init_page_count(virt_to_page(&trapbase_cpu1));
- free_page((unsigned long)&trapbase_cpu1);
- totalram_pages++;
+ free_reserved_page(virt_to_page(&trapbase_cpu1));
num_physpages++;
}
if (!cpu_present(2)) {
- ClearPageReserved(virt_to_page(&trapbase_cpu2));
- init_page_count(virt_to_page(&trapbase_cpu2));
- free_page((unsigned long)&trapbase_cpu2);
- totalram_pages++;
+ free_reserved_page(virt_to_page(&trapbase_cpu2));
num_physpages++;
}
if (!cpu_present(3)) {
- ClearPageReserved(virt_to_page(&trapbase_cpu3));
- init_page_count(virt_to_page(&trapbase_cpu3));
- free_page((unsigned long)&trapbase_cpu3);
- totalram_pages++;
+ free_reserved_page(virt_to_page(&trapbase_cpu3));
num_physpages++;
}
/* Ok, they are spinning and ready to go. */
diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c
index 2daaaa6eda23..51561b8b15ba 100644
--- a/arch/sparc/kernel/sys_sparc_64.c
+++ b/arch/sparc/kernel/sys_sparc_64.c
@@ -290,7 +290,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
sysctl_legacy_va_layout) {
mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
/* We know it's 32-bit */
unsigned long task_size = STACK_TOP32;
@@ -302,7 +301,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
mm->mmap_base = PAGE_ALIGN(task_size - gap - random_factor);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile
index 8410065f2862..dbe119b63b48 100644
--- a/arch/sparc/lib/Makefile
+++ b/arch/sparc/lib/Makefile
@@ -45,4 +45,3 @@ obj-y += iomap.o
obj-$(CONFIG_SPARC32) += atomic32.o ucmpdi2.o
obj-y += ksyms.o
obj-$(CONFIG_SPARC64) += PeeCeeI.o
-obj-y += usercopy.o
diff --git a/arch/sparc/lib/usercopy.c b/arch/sparc/lib/usercopy.c
deleted file mode 100644
index 5c4284ce1c03..000000000000
--- a/arch/sparc/lib/usercopy.c
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/bug.h>
-
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/arch/sparc/mm/init_32.c b/arch/sparc/mm/init_32.c
index 48e0c030e8f5..af472cf7c69a 100644
--- a/arch/sparc/mm/init_32.c
+++ b/arch/sparc/mm/init_32.c
@@ -282,14 +282,8 @@ static void map_high_region(unsigned long start_pfn, unsigned long end_pfn)
printk("mapping high region %08lx - %08lx\n", start_pfn, end_pfn);
#endif
- for (tmp = start_pfn; tmp < end_pfn; tmp++) {
- struct page *page = pfn_to_page(tmp);
-
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
- }
+ for (tmp = start_pfn; tmp < end_pfn; tmp++)
+ free_highmem_page(pfn_to_page(tmp));
}
void __init mem_init(void)
@@ -347,8 +341,6 @@ void __init mem_init(void)
map_high_region(start_pfn, end_pfn);
}
- totalram_pages += totalhigh_pages;
-
codepages = (((unsigned long) &_etext) - ((unsigned long)&_start));
codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
datapages = (((unsigned long) &_edata) - ((unsigned long)&_etext));
@@ -374,45 +366,14 @@ void __init mem_init(void)
void free_initmem (void)
{
- unsigned long addr;
- unsigned long freed;
-
- addr = (unsigned long)(&__init_begin);
- freed = (unsigned long)(&__init_end) - addr;
- for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- struct page *p;
-
- memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- p = virt_to_page(addr);
-
- ClearPageReserved(p);
- init_page_count(p);
- __free_page(p);
- totalram_pages++;
- num_physpages++;
- }
- printk(KERN_INFO "Freeing unused kernel memory: %ldk freed\n",
- freed >> 10);
+ num_physpages += free_initmem_default(POISON_FREE_INITMEM);
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start < end)
- printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
- (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- struct page *p;
-
- memset((void *)start, POISON_FREE_INITMEM, PAGE_SIZE);
- p = virt_to_page(start);
-
- ClearPageReserved(p);
- init_page_count(p);
- __free_page(p);
- totalram_pages++;
- num_physpages++;
- }
+ num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM,
+ "initrd");
}
#endif
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 076068f4459e..abc3ae5aa928 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -2060,8 +2060,7 @@ void __init mem_init(void)
/* We subtract one to account for the mem_map_zero page
* allocated below.
*/
- totalram_pages -= 1;
- num_physpages = totalram_pages;
+ num_physpages = totalram_pages - 1;
/*
* Set up the zero page, mark it reserved, so that page count
@@ -2072,7 +2071,7 @@ void __init mem_init(void)
prom_printf("paging_init: Cannot alloc zero page.\n");
prom_halt();
}
- SetPageReserved(mem_map_zero);
+ mark_page_reserved(mem_map_zero);
codepages = (((unsigned long) _etext) - ((unsigned long) _start));
codepages = PAGE_ALIGN(codepages) >> PAGE_SHIFT;
@@ -2112,37 +2111,22 @@ void free_initmem(void)
initend = (unsigned long)(__init_end) & PAGE_MASK;
for (; addr < initend; addr += PAGE_SIZE) {
unsigned long page;
- struct page *p;
page = (addr +
((unsigned long) __va(kern_base)) -
((unsigned long) KERNBASE));
memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- if (do_free) {
- p = virt_to_page(page);
-
- ClearPageReserved(p);
- init_page_count(p);
- __free_page(p);
- totalram_pages++;
- }
+ if (do_free)
+ free_reserved_page(virt_to_page(page));
}
}
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start < end)
- printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- struct page *p = virt_to_page(start);
-
- ClearPageReserved(p);
- init_page_count(p);
- __free_page(p);
- totalram_pages++;
- }
+ num_physpages += free_reserved_area(start, end, POISON_FREE_INITMEM,
+ "initrd");
}
#endif
diff --git a/arch/tile/Kconfig b/arch/tile/Kconfig
index 5b46f198d1ef..2001fee2a154 100644
--- a/arch/tile/Kconfig
+++ b/arch/tile/Kconfig
@@ -18,6 +18,7 @@ config TILE
select HAVE_DEBUG_BUGVERBOSE
select VIRT_TO_BUS
select SYS_HYPERVISOR
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select GENERIC_CLOCKEVENTS
select MODULES_USE_ELF_RELA
@@ -107,13 +108,6 @@ config STRICT_DEVMEM
config SMP
def_bool y
-# Allow checking for compile-time determined overflow errors in
-# copy_from_user(). There are still unprovable places in the
-# generic code as of 2.6.34, so this option is not really compatible
-# with -Werror, which is more useful in general.
-config DEBUG_COPY_FROM_USER
- def_bool n
-
config HVC_TILE
depends on TTY
select HVC_DRIVER
diff --git a/arch/tile/include/asm/hugetlb.h b/arch/tile/include/asm/hugetlb.h
index 0f885af2b621..3257733003f8 100644
--- a/arch/tile/include/asm/hugetlb.h
+++ b/arch/tile/include/asm/hugetlb.h
@@ -16,6 +16,7 @@
#define _ASM_TILE_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/tile/include/asm/uaccess.h b/arch/tile/include/asm/uaccess.h
index 9ab078a4605d..8a082bc6bca5 100644
--- a/arch/tile/include/asm/uaccess.h
+++ b/arch/tile/include/asm/uaccess.h
@@ -395,7 +395,12 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
return n;
}
-#ifdef CONFIG_DEBUG_COPY_FROM_USER
+#ifdef CONFIG_DEBUG_STRICT_USER_COPY_CHECKS
+/*
+ * There are still unprovable places in the generic code as of 2.6.34, so this
+ * option is not really compatible with -Werror, which is more useful in
+ * general.
+ */
extern void copy_from_user_overflow(void)
__compiletime_warning("copy_from_user() size is not provably correct");
diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
index afb9c9a0d887..34d72a151bf3 100644
--- a/arch/tile/kernel/early_printk.c
+++ b/arch/tile/kernel/early_printk.c
@@ -17,6 +17,7 @@
#include <linux/init.h>
#include <linux/string.h>
#include <linux/irqflags.h>
+#include <linux/printk.h>
#include <asm/setup.h>
#include <hv/hypervisor.h>
@@ -33,25 +34,8 @@ static struct console early_hv_console = {
};
/* Direct interface for emergencies */
-static struct console *early_console = &early_hv_console;
-static int early_console_initialized;
static int early_console_complete;
-static void early_vprintk(const char *fmt, va_list ap)
-{
- char buf[512];
- int n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
-}
-
-void early_printk(const char *fmt, ...)
-{
- va_list ap;
- va_start(ap, fmt);
- early_vprintk(fmt, ap);
- va_end(ap);
-}
-
void early_panic(const char *fmt, ...)
{
va_list ap;
@@ -69,14 +53,13 @@ static int __initdata keep_early;
static int __init setup_early_printk(char *str)
{
- if (early_console_initialized)
+ if (early_console)
return 1;
if (str != NULL && strncmp(str, "keep", 4) == 0)
keep_early = 1;
early_console = &early_hv_console;
- early_console_initialized = 1;
register_console(early_console);
return 0;
@@ -85,12 +68,12 @@ static int __init setup_early_printk(char *str)
void __init disable_early_printk(void)
{
early_console_complete = 1;
- if (!early_console_initialized || !early_console)
+ if (!early_console)
return;
if (!keep_early) {
early_printk("disabling early console\n");
unregister_console(early_console);
- early_console_initialized = 0;
+ early_console = NULL;
} else {
early_printk("keeping early console\n");
}
@@ -98,7 +81,7 @@ void __init disable_early_printk(void)
void warn_early_printk(void)
{
- if (early_console_complete || early_console_initialized)
+ if (early_console_complete || early_console)
return;
early_printk("\
Machine shutting down before console output is fully initialized.\n\
diff --git a/arch/tile/lib/uaccess.c b/arch/tile/lib/uaccess.c
index f8d398c9ee7f..030abe3ee4f1 100644
--- a/arch/tile/lib/uaccess.c
+++ b/arch/tile/lib/uaccess.c
@@ -22,11 +22,3 @@ int __range_ok(unsigned long addr, unsigned long size)
is_arch_mappable_range(addr, size));
}
EXPORT_SYMBOL(__range_ok);
-
-#ifdef CONFIG_DEBUG_COPY_FROM_USER
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
-#endif
diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c
index f96f4cec602a..d67d91ebf63e 100644
--- a/arch/tile/mm/mmap.c
+++ b/arch/tile/mm/mmap.c
@@ -66,10 +66,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (!is_32bit || rlimit(RLIMIT_STACK) == RLIM_INFINITY) {
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base(mm);
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/tile/mm/pgtable.c b/arch/tile/mm/pgtable.c
index b3b4972c2451..dfd63ce87327 100644
--- a/arch/tile/mm/pgtable.c
+++ b/arch/tile/mm/pgtable.c
@@ -592,12 +592,7 @@ void iounmap(volatile void __iomem *addr_in)
in parallel. Reuse of the virtual address is prevented by
leaving it in the global lists until we're done with it.
cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == addr)
- break;
- }
- read_unlock(&vmlist_lock);
+ p = find_vm_area((void *)addr);
if (!p) {
pr_err("iounmap: bad address %p\n", addr);
diff --git a/arch/um/kernel/early_printk.c b/arch/um/kernel/early_printk.c
index 49480f092456..4a0800bc37b2 100644
--- a/arch/um/kernel/early_printk.c
+++ b/arch/um/kernel/early_printk.c
@@ -16,7 +16,7 @@ static void early_console_write(struct console *con, const char *s, unsigned int
um_early_printk(s, n);
}
-static struct console early_console = {
+static struct console early_console_dev = {
.name = "earlycon",
.write = early_console_write,
.flags = CON_BOOT,
@@ -25,8 +25,10 @@ static struct console early_console = {
static int __init setup_early_printk(char *buf)
{
- register_console(&early_console);
-
+ if (!early_console) {
+ early_console = &early_console_dev;
+ register_console(&early_console_dev);
+ }
return 0;
}
diff --git a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
index 5abcbfbe7e25..9df292b270a8 100644
--- a/arch/um/kernel/mem.c
+++ b/arch/um/kernel/mem.c
@@ -42,17 +42,12 @@ static unsigned long brk_end;
static void setup_highmem(unsigned long highmem_start,
unsigned long highmem_len)
{
- struct page *page;
unsigned long highmem_pfn;
int i;
highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
- for (i = 0; i < highmem_len >> PAGE_SHIFT; i++) {
- page = &mem_map[highmem_pfn + i];
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- }
+ for (i = 0; i < highmem_len >> PAGE_SHIFT; i++)
+ free_highmem_page(&mem_map[highmem_pfn + i]);
}
#endif
@@ -73,18 +68,13 @@ void __init mem_init(void)
totalram_pages = free_all_bootmem();
max_low_pfn = totalram_pages;
#ifdef CONFIG_HIGHMEM
- totalhigh_pages = highmem >> PAGE_SHIFT;
- totalram_pages += totalhigh_pages;
+ setup_highmem(end_iomem, highmem);
#endif
num_physpages = totalram_pages;
max_pfn = totalram_pages;
printk(KERN_INFO "Memory: %luk available\n",
nr_free_pages() << (PAGE_SHIFT-10));
kmalloc_ok = 1;
-
-#ifdef CONFIG_HIGHMEM
- setup_highmem(end_iomem, highmem);
-#endif
}
/*
@@ -254,15 +244,7 @@ void free_initmem(void)
#ifdef CONFIG_BLK_DEV_INITRD
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (start < end)
- printk(KERN_INFO "Freeing initrd memory: %ldk freed\n",
- (end - start) >> 10);
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page(start);
- totalram_pages++;
- }
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
diff --git a/arch/unicore32/kernel/early_printk.c b/arch/unicore32/kernel/early_printk.c
index 3922255f1fa8..9be0d5d02a9a 100644
--- a/arch/unicore32/kernel/early_printk.c
+++ b/arch/unicore32/kernel/early_printk.c
@@ -33,21 +33,17 @@ static struct console early_ocd_console = {
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_ocd_console;
-
-static int __initdata keep_early;
-
static int __init setup_early_printk(char *buf)
{
- if (!buf)
+ int keep_early;
+
+ if (!buf || early_console)
return 0;
if (strstr(buf, "keep"))
keep_early = 1;
- if (!strncmp(buf, "ocd", 3))
- early_console = &early_ocd_console;
+ early_console = &early_ocd_console;
if (keep_early)
early_console->flags &= ~CON_BOOT;
diff --git a/arch/unicore32/mm/init.c b/arch/unicore32/mm/init.c
index de186bde8975..63df12d71ce3 100644
--- a/arch/unicore32/mm/init.c
+++ b/arch/unicore32/mm/init.c
@@ -66,6 +66,9 @@ void show_mem(unsigned int filter)
printk(KERN_DEFAULT "Mem-info:\n");
show_free_areas(filter);
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
+
for_each_bank(i, mi) {
struct membank *bank = &mi->bank[i];
unsigned int pfn1, pfn2;
@@ -313,24 +316,6 @@ void __init bootmem_init(void)
max_pfn = max_high - PHYS_PFN_OFFSET;
}
-static inline int free_area(unsigned long pfn, unsigned long end, char *s)
-{
- unsigned int pages = 0, size = (end - pfn) << (PAGE_SHIFT - 10);
-
- for (; pfn < end; pfn++) {
- struct page *page = pfn_to_page(pfn);
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- pages++;
- }
-
- if (size && s)
- printk(KERN_INFO "Freeing %s memory: %dK\n", s, size);
-
- return pages;
-}
-
static inline void
free_memmap(unsigned long start_pfn, unsigned long end_pfn)
{
@@ -404,9 +389,9 @@ void __init mem_init(void)
max_mapnr = pfn_to_page(max_pfn + PHYS_PFN_OFFSET) - mem_map;
- /* this will put all unused low memory onto the freelists */
free_unused_memmap(&meminfo);
+ /* this will put all unused low memory onto the freelists */
totalram_pages += free_all_bootmem();
reserved_pages = free_pages = 0;
@@ -491,9 +476,7 @@ void __init mem_init(void)
void free_initmem(void)
{
- totalram_pages += free_area(__phys_to_pfn(__pa(__init_begin)),
- __phys_to_pfn(__pa(__init_end)),
- "init");
+ free_initmem_default(0);
}
#ifdef CONFIG_BLK_DEV_INITRD
@@ -503,9 +486,7 @@ static int keep_initrd;
void free_initrd_mem(unsigned long start, unsigned long end)
{
if (!keep_initrd)
- totalram_pages += free_area(__phys_to_pfn(__pa(start)),
- __phys_to_pfn(__pa(end)),
- "initrd");
+ free_reserved_area(start, end, 0, "initrd");
}
static int __init keepinitrd_setup(char *__unused)
diff --git a/arch/unicore32/mm/ioremap.c b/arch/unicore32/mm/ioremap.c
index b7a605597b08..13068ee22f33 100644
--- a/arch/unicore32/mm/ioremap.c
+++ b/arch/unicore32/mm/ioremap.c
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__uc32_ioremap_cached);
void __uc32_iounmap(volatile void __iomem *io_addr)
{
void *addr = (void *)(PAGE_MASK & (unsigned long)io_addr);
- struct vm_struct **p, *tmp;
+ struct vm_struct *vm;
/*
* If this is a section based mapping we need to handle it
@@ -244,17 +244,10 @@ void __uc32_iounmap(volatile void __iomem *io_addr)
* all the mappings before the area can be reclaimed
* by someone else.
*/
- write_lock(&vmlist_lock);
- for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
- if ((tmp->flags & VM_IOREMAP) && (tmp->addr == addr)) {
- if (tmp->flags & VM_UNICORE_SECTION_MAPPING) {
- unmap_area_sections((unsigned long)tmp->addr,
- tmp->size);
- }
- break;
- }
- }
- write_unlock(&vmlist_lock);
+ vm = find_vm_area(addr);
+ if (vm && (vm->flags & VM_IOREMAP) &&
+ (vm->flags & VM_UNICORE_SECTION_MAPPING))
+ unmap_area_sections((unsigned long)vm->addr, vm->size);
vunmap(addr);
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 24f12c961339..75267ecba812 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -20,6 +20,7 @@ config X86_64
### Arch settings
config X86
def_bool y
+ select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
select HAVE_AOUT if X86_32
select HAVE_UNSTABLE_SCHED_CLOCK
select ARCH_SUPPORTS_NUMA_BALANCING
@@ -64,6 +65,7 @@ config X86
select HAVE_KERNEL_LZMA
select HAVE_KERNEL_XZ
select HAVE_KERNEL_LZO
+ select HAVE_KERNEL_LZ4
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index b322f124ee3c..dea0da520e13 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -292,20 +292,6 @@ config OPTIMIZE_INLINING
If unsure, say N.
-config DEBUG_STRICT_USER_COPY_CHECKS
- bool "Strict copy size checks"
- depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
- ---help---
- Enabling this option turns a certain set of sanity checks for user
- copy operations into compile time failures.
-
- The copy_from_user() etc checks are there to help test if there
- are sufficient security checks on the length argument of
- the copy operation, by having gcc prove that the argument is
- within bounds.
-
- If unsure, or if you run an older (pre 4.4) gcc, say N.
-
config DEBUG_NMI_SELFTEST
bool "NMI Selftest"
depends on DEBUG_KERNEL && X86_LOCAL_APIC
diff --git a/arch/x86/boot/compressed/Makefile b/arch/x86/boot/compressed/Makefile
index 8a84501acb1b..c275db562aa9 100644
--- a/arch/x86/boot/compressed/Makefile
+++ b/arch/x86/boot/compressed/Makefile
@@ -4,7 +4,7 @@
# create a compressed vmlinux image from the original vmlinux
#
-targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
+targets := vmlinux.lds vmlinux vmlinux.bin vmlinux.bin.gz vmlinux.bin.bz2 vmlinux.bin.lzma vmlinux.bin.xz vmlinux.bin.lzo vmlinux.bin.lz4 head_$(BITS).o misc.o string.o cmdline.o early_serial_console.o piggy.o
KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2
KBUILD_CFLAGS += -fno-strict-aliasing -fPIC
@@ -64,12 +64,15 @@ $(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
$(call if_changed,xzkern)
$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
$(call if_changed,lzo)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
+ $(call if_changed,lz4)
suffix-$(CONFIG_KERNEL_GZIP) := gz
suffix-$(CONFIG_KERNEL_BZIP2) := bz2
suffix-$(CONFIG_KERNEL_LZMA) := lzma
suffix-$(CONFIG_KERNEL_XZ) := xz
suffix-$(CONFIG_KERNEL_LZO) := lzo
+suffix-$(CONFIG_KERNEL_LZ4) := lz4
quiet_cmd_mkpiggy = MKPIGGY $@
cmd_mkpiggy = $(obj)/mkpiggy $< > $@ || ( rm -f $@ ; false )
diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 7cb56c6ca351..0319c88290a5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -145,6 +145,10 @@ static int lines, cols;
#include "../../../../lib/decompress_unlzo.c"
#endif
+#ifdef CONFIG_KERNEL_LZ4
+#include "../../../../lib/decompress_unlz4.c"
+#endif
+
static void scroll(void)
{
int i;
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index 03abf9b70011..0f9a4728a467 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -162,7 +162,6 @@ static int aout_core_dump(long signr, struct pt_regs *regs, struct file *file,
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(current->comm));
dump.u_ar0 = offsetof(struct user32, regs);
dump.signal = signr;
@@ -309,8 +308,6 @@ static int load_aout_binary(struct linux_binprm *bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = TASK_UNMAPPED_BASE;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, IA32_STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h
index cccd07fa5e3a..b8e9224f0b45 100644
--- a/arch/x86/include/asm/e820.h
+++ b/arch/x86/include/asm/e820.h
@@ -17,6 +17,8 @@ extern unsigned long pci_mem_start;
extern int e820_any_mapped(u64 start, u64 end, unsigned type);
extern int e820_all_mapped(u64 start, u64 end, unsigned type);
extern void e820_add_region(u64 start, u64 size, int type);
+extern void e820_add_limit_region(u64 start, u64 size, int type);
+extern void e820_adjust_region(u64 *start, u64 *size);
extern void e820_print_map(char *who);
extern int
sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, u32 *pnr_map);
diff --git a/arch/x86/include/asm/hugetlb.h b/arch/x86/include/asm/hugetlb.h
index bdd35dbd0605..a8091216963b 100644
--- a/arch/x86/include/asm/hugetlb.h
+++ b/arch/x86/include/asm/hugetlb.h
@@ -2,6 +2,7 @@
#define _ASM_X86_HUGETLB_H
#include <asm/page.h>
+#include <asm-generic/hugetlb.h>
static inline int is_hugepage_only_range(struct mm_struct *mm,
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index 5f81bcefbe14..895f62e36ebb 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -44,10 +44,10 @@ obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o perf_event_amd_ibs.o
obj-$(CONFIG_HYPERVISOR_GUEST) += vmware.o hypervisor.o mshyperv.o
quiet_cmd_mkcapflags = MKCAP $@
- cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
+ cmd_mkcapflags = $(CONFIG_SHELL) $(srctree)/$(src)/mkcapflags.sh $< $@
cpufeature = $(src)/../../include/asm/cpufeature.h
targets += capflags.c
-$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
+$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.sh FORCE
$(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
deleted file mode 100644
index 091972ef49de..000000000000
--- a/arch/x86/kernel/cpu/mkcapflags.pl
+++ /dev/null
@@ -1,48 +0,0 @@
-#!/usr/bin/perl -w
-#
-# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
-#
-
-($in, $out) = @ARGV;
-
-open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
-open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
-
-print OUT "#ifndef _ASM_X86_CPUFEATURE_H\n";
-print OUT "#include <asm/cpufeature.h>\n";
-print OUT "#endif\n";
-print OUT "\n";
-print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
-
-%features = ();
-$err = 0;
-
-while (defined($line = <IN>)) {
- if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
- $macro = $1;
- $feature = "\L$2";
- $tail = $3;
- if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
- $feature = "\L$1";
- }
-
- next if ($feature eq '');
-
- if ($features{$feature}++) {
- print STDERR "$in: duplicate feature name: $feature\n";
- $err++;
- }
- printf OUT "\t%-32s = \"%s\",\n", "[$macro]", $feature;
- }
-}
-print OUT "};\n";
-
-close(IN);
-close(OUT);
-
-if ($err) {
- unlink($out);
- exit(1);
-}
-
-exit(0);
diff --git a/arch/x86/kernel/cpu/mkcapflags.sh b/arch/x86/kernel/cpu/mkcapflags.sh
new file mode 100644
index 000000000000..2bf616505499
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.sh
@@ -0,0 +1,41 @@
+#!/bin/sh
+#
+# Generate the x86_cap_flags[] array from include/asm/cpufeature.h
+#
+
+IN=$1
+OUT=$2
+
+TABS="$(printf '\t\t\t\t\t')"
+trap 'rm "$OUT"' EXIT
+
+(
+ echo "#ifndef _ASM_X86_CPUFEATURE_H"
+ echo "#include <asm/cpufeature.h>"
+ echo "#endif"
+ echo ""
+ echo "const char * const x86_cap_flags[NCAPINTS*32] = {"
+
+ # Iterate through any input lines starting with #define X86_FEATURE_
+ sed -n -e 's/\t/ /g' -e 's/^ *# *define *X86_FEATURE_//p' $IN |
+ while read i
+ do
+ # Name is everything up to the first whitespace
+ NAME="$(echo "$i" | sed 's/ .*//')"
+
+ # If the /* comment */ starts with a quote string, grab that.
+ VALUE="$(echo "$i" | sed -n 's@.*/\* *\("[^"]*"\).*\*/@\1@p')"
+ [ -z "$VALUE" ] && VALUE="\"$NAME\""
+ [ "$VALUE" == '""' ] && continue
+
+ # Name is uppercase, VALUE is all lowercase
+ VALUE="$(echo "$VALUE" | tr A-Z a-z)"
+
+ TABCOUNT=$(( ( 5*8 - 14 - $(echo "$NAME" | wc -c) ) / 8 ))
+ printf "\t[%s]%.*s = %s,\n" \
+ "X86_FEATURE_$NAME" "$TABCOUNT" "$TABS" "$VALUE"
+ done
+ echo "};"
+) > $OUT
+
+trap - EXIT
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d32abeabbda5..0d5bb689649a 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -47,6 +47,7 @@ unsigned long pci_mem_start = 0xaeedbabe;
#ifdef CONFIG_PCI
EXPORT_SYMBOL(pci_mem_start);
#endif
+static u64 mem_limit = ~0ULL;
/*
* This function checks if any part of the range <start,end> is mapped
@@ -108,7 +109,7 @@ int __init e820_all_mapped(u64 start, u64 end, unsigned type)
* Add a memory region to the kernel e820 map.
*/
static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
- int type)
+ int type, bool limited)
{
int x = e820x->nr_map;
@@ -119,6 +120,22 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
return;
}
+ if (limited) {
+ if (start >= mem_limit) {
+ printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long)start,
+ (unsigned long long)(start + size - 1));
+ return;
+ }
+
+ if (mem_limit - start < size) {
+ printk(KERN_ERR "e820: ignoring [mem %#010llx-%#010llx]\n",
+ (unsigned long long)mem_limit,
+ (unsigned long long)(start + size - 1));
+ size = mem_limit - start;
+ }
+ }
+
e820x->map[x].addr = start;
e820x->map[x].size = size;
e820x->map[x].type = type;
@@ -127,7 +144,37 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
void __init e820_add_region(u64 start, u64 size, int type)
{
- __e820_add_region(&e820, start, size, type);
+ __e820_add_region(&e820, start, size, type, false);
+}
+
+/*
+ * do_add_efi_memmap() calls this function().
+ *
+ * Note: BOOT_SERVICES_{CODE,DATA} regions on some efi machines are marked
+ * as E820_RAM, and they are needed to be mapped. Please use e820_add_region()
+ * to add BOOT_SERVICES_{CODE,DATA} regions.
+ */
+void __init e820_add_limit_region(u64 start, u64 size, int type)
+{
+ /*
+ * efi_init() is called after finish_e820_parsing(), so we should
+ * check whether [start, start + size) contains address above
+ * mem_limit if the type is E820_RAM.
+ */
+ __e820_add_region(&e820, start, size, type, type == E820_RAM);
+}
+
+void __init e820_adjust_region(u64 *start, u64 *size)
+{
+ if (*start >= mem_limit) {
+ *size = 0;
+ return;
+ }
+
+ if (mem_limit - *start < *size)
+ *size = mem_limit - *start;
+
+ return;
}
static void __init e820_print_type(u32 type)
@@ -455,8 +502,9 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
/* new range is totally covered? */
if (ei->addr < start && ei_end > end) {
- __e820_add_region(e820x, start, size, new_type);
- __e820_add_region(e820x, end, ei_end - end, ei->type);
+ __e820_add_region(e820x, start, size, new_type, false);
+ __e820_add_region(e820x, end, ei_end - end, ei->type,
+ false);
ei->size = start - ei->addr;
real_updated_size += size;
continue;
@@ -469,7 +517,7 @@ static u64 __init __e820_update_range(struct e820map *e820x, u64 start,
continue;
__e820_add_region(e820x, final_start, final_end - final_start,
- new_type);
+ new_type, false);
real_updated_size += final_end - final_start;
@@ -809,7 +857,7 @@ static int userdef __initdata;
/* "mem=nopentium" disables the 4MB page tables. */
static int __init parse_memopt(char *p)
{
- u64 mem_size;
+ char *oldp;
if (!p)
return -EINVAL;
@@ -825,11 +873,11 @@ static int __init parse_memopt(char *p)
}
userdef = 1;
- mem_size = memparse(p, &p);
+ oldp = p;
+ mem_limit = memparse(p, &p);
/* don't remove all of memory when handling "mem={invalid}" param */
- if (mem_size == 0)
+ if (mem_limit == 0 || p == oldp)
return -EINVAL;
- e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
return 0;
}
@@ -895,6 +943,12 @@ early_param("memmap", parse_memmap_opt);
void __init finish_e820_parsing(void)
{
+ if (mem_limit != ~0ULL) {
+ userdef = 1;
+ e820_remove_range(mem_limit, ULLONG_MAX - mem_limit,
+ E820_RAM, 1);
+ }
+
if (userdef) {
u32 nr = e820.nr_map;
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 9b9f18b49918..d15f575a861b 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -169,25 +169,9 @@ static struct console early_serial_console = {
.index = -1,
};
-/* Direct interface for emergencies */
-static struct console *early_console = &early_vga_console;
-static int __initdata early_console_initialized;
-
-asmlinkage void early_printk(const char *fmt, ...)
-{
- char buf[512];
- int n;
- va_list ap;
-
- va_start(ap, fmt);
- n = vscnprintf(buf, sizeof(buf), fmt, ap);
- early_console->write(early_console, buf, n);
- va_end(ap);
-}
-
static inline void early_console_register(struct console *con, int keep_early)
{
- if (early_console->index != -1) {
+ if (con->index != -1) {
printk(KERN_CRIT "ERROR: earlyprintk= %s already used\n",
con->name);
return;
@@ -207,9 +191,8 @@ static int __init setup_early_printk(char *buf)
if (!buf)
return 0;
- if (early_console_initialized)
+ if (early_console)
return 0;
- early_console_initialized = 1;
keep = (strstr(buf, "keep") != NULL);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index f0312d746402..3eb18acd0e40 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -689,9 +689,3 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
return n;
}
EXPORT_SYMBOL(_copy_from_user);
-
-void copy_from_user_overflow(void)
-{
- WARN(1, "Buffer overflow detected!\n");
-}
-EXPORT_SYMBOL(copy_from_user_overflow);
diff --git a/arch/x86/mm/highmem_32.c b/arch/x86/mm/highmem_32.c
index 6f31ee56c008..252b8f5489ba 100644
--- a/arch/x86/mm/highmem_32.c
+++ b/arch/x86/mm/highmem_32.c
@@ -137,5 +137,4 @@ void __init set_highmem_pages_init(void)
add_highpages_with_active_regions(nid, zone_start_pfn,
zone_end_pfn);
}
- totalram_pages += totalhigh_pages;
}
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 59b7fc453277..fdc5dca14fb3 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -515,11 +515,8 @@ void free_init_pages(char *what, unsigned long begin, unsigned long end)
printk(KERN_INFO "Freeing %s: %luk freed\n", what, (end - begin) >> 10);
for (; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(addr));
- init_page_count(virt_to_page(addr));
memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
- free_page(addr);
- totalram_pages++;
+ free_reserved_page(virt_to_page(addr));
}
#endif
}
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 2d19001151d5..3ac7e319918d 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -427,14 +427,6 @@ static void __init permanent_kmaps_init(pgd_t *pgd_base)
pkmap_page_table = pte;
}
-static void __init add_one_highpage_init(struct page *page)
-{
- ClearPageReserved(page);
- init_page_count(page);
- __free_page(page);
- totalhigh_pages++;
-}
-
void __init add_highpages_with_active_regions(int nid,
unsigned long start_pfn, unsigned long end_pfn)
{
@@ -448,7 +440,7 @@ void __init add_highpages_with_active_regions(int nid,
start_pfn, end_pfn);
for ( ; pfn < e_pfn; pfn++)
if (pfn_valid(pfn))
- add_one_highpage_init(pfn_to_page(pfn));
+ free_highmem_page(pfn_to_page(pfn));
}
}
#else
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 474e28f10815..bcbe92412c58 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1019,6 +1019,7 @@ void __ref vmemmap_free(struct page *memmap, unsigned long nr_pages)
remove_pagetable(start, end, false);
}
+#ifdef CONFIG_MEMORY_HOTREMOVE
static void __meminit
kernel_physical_mapping_remove(unsigned long start, unsigned long end)
{
@@ -1028,7 +1029,6 @@ kernel_physical_mapping_remove(unsigned long start, unsigned long end)
remove_pagetable(start, end, true);
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
int __ref arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
@@ -1067,10 +1067,9 @@ void __init mem_init(void)
/* clear_bss() already clear the empty_zero_page */
- reservedpages = 0;
-
- /* this will put all low memory onto the freelists */
register_page_bootmem_info();
+
+ /* this will put all memory onto the freelists */
totalram_pages = free_all_bootmem();
absent_pages = absent_pages_in_range(0, max_pfn);
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index 78fe3f1ac49f..9a1e6583910c 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -282,12 +282,7 @@ void iounmap(volatile void __iomem *addr)
in parallel. Reuse of the virtual address is prevented by
leaving it in the global lists until we're done with it.
cpa takes care of the direct mappings. */
- read_lock(&vmlist_lock);
- for (p = vmlist; p; p = p->next) {
- if (p->addr == (void __force *)addr)
- break;
- }
- read_unlock(&vmlist_lock);
+ p = find_vm_area((void __force *)addr);
if (!p) {
printk(KERN_ERR "iounmap: bad address %p\n", addr);
diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c
index 845df6835f9f..62c29a5bfe26 100644
--- a/arch/x86/mm/mmap.c
+++ b/arch/x86/mm/mmap.c
@@ -115,10 +115,8 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
if (mmap_is_legacy()) {
mm->mmap_base = mmap_legacy_base();
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
} else {
mm->mmap_base = mmap_base();
mm->get_unmapped_area = arch_get_unmapped_area_topdown;
- mm->unmap_area = arch_unmap_area_topdown;
}
}
diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c
index b0086567271c..934610802e3f 100644
--- a/arch/x86/mm/pageattr-test.c
+++ b/arch/x86/mm/pageattr-test.c
@@ -130,13 +130,12 @@ static int pageattr_test(void)
}
failed += print_split(&sa);
- srandom32(100);
for (i = 0; i < NTEST; i++) {
- unsigned long pfn = random32() % max_pfn_mapped;
+ unsigned long pfn = prandom_u32() % max_pfn_mapped;
addr[i] = (unsigned long)__va(pfn << PAGE_SHIFT);
- len[i] = random32() % 100;
+ len[i] = prandom_u32() % 100;
len[i] = min_t(unsigned long, len[i], max_pfn_mapped - pfn - 1);
if (len[i] == 0)
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 5f2ecaf3f9d8..4f65e1d05119 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -316,10 +316,17 @@ static void __init do_add_efi_memmap(void)
int e820_type;
switch (md->type) {
- case EFI_LOADER_CODE:
- case EFI_LOADER_DATA:
case EFI_BOOT_SERVICES_CODE:
case EFI_BOOT_SERVICES_DATA:
+ /* EFI_BOOT_SERVICES_{CODE,DATA} needs to be mapped */
+ if (md->attribute & EFI_MEMORY_WB)
+ e820_type = E820_RAM;
+ else
+ e820_type = E820_RESERVED;
+ e820_add_region(start, size, e820_type);
+ continue;
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
case EFI_CONVENTIONAL_MEMORY:
if (md->attribute & EFI_MEMORY_WB)
e820_type = E820_RAM;
@@ -344,7 +351,7 @@ static void __init do_add_efi_memmap(void)
e820_type = E820_RESERVED;
break;
}
- e820_add_region(start, size, e820_type);
+ e820_add_limit_region(start, size, e820_type);
}
sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
}
@@ -451,6 +458,8 @@ void __init efi_free_boot_services(void)
md->type != EFI_BOOT_SERVICES_DATA)
continue;
+ e820_adjust_region(&start, &size);
+
/* Could not reserve boot area */
if (!size)
continue;
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index 7a5156ffebb6..bba125b4bb06 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -208,32 +208,17 @@ void __init mem_init(void)
highmemsize >> 10);
}
-void
-free_reserved_mem(void *start, void *end)
-{
- for (; start < end; start += PAGE_SIZE) {
- ClearPageReserved(virt_to_page(start));
- init_page_count(virt_to_page(start));
- free_page((unsigned long)start);
- totalram_pages++;
- }
-}
-
#ifdef CONFIG_BLK_DEV_INITRD
extern int initrd_is_mapped;
void free_initrd_mem(unsigned long start, unsigned long end)
{
- if (initrd_is_mapped) {
- free_reserved_mem((void*)start, (void*)end);
- printk ("Freeing initrd memory: %ldk freed\n",(end-start)>>10);
- }
+ if (initrd_is_mapped)
+ free_reserved_area(start, end, 0, "initrd");
}
#endif
void free_initmem(void)
{
- free_reserved_mem(__init_begin, __init_end);
- printk("Freeing unused kernel memory: %zuk freed\n",
- (__init_end - __init_begin) >> 10);
+ free_initmem_default(0);
}
diff --git a/block/blk-core.c b/block/blk-core.c
index 074b758efc42..186603b36c44 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -151,7 +151,8 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
EXPORT_SYMBOL(blk_rq_init);
static void req_bio_endio(struct request *rq, struct bio *bio,
- unsigned int nbytes, int error)
+ unsigned int nbytes, int error,
+ struct batch_complete *batch)
{
if (error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
@@ -175,7 +176,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio,
/* don't actually finish bio if it's part of flush sequence */
if (bio->bi_size == 0 && !(rq->cmd_flags & REQ_FLUSH_SEQ))
- bio_endio(bio, error);
+ bio_endio_batch(bio, error, batch);
}
void blk_dump_rq_flags(struct request *rq, char *msg)
@@ -2250,7 +2251,8 @@ EXPORT_SYMBOL(blk_fetch_request);
* %false - this request doesn't have any more data
* %true - this request has more data
**/
-bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
+bool blk_update_request(struct request *req, int error, unsigned int nr_bytes,
+ struct batch_complete *batch)
{
int total_bytes, bio_nbytes, next_idx = 0;
struct bio *bio;
@@ -2306,7 +2308,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
if (nr_bytes >= bio->bi_size) {
req->bio = bio->bi_next;
nbytes = bio->bi_size;
- req_bio_endio(req, bio, nbytes, error);
+ req_bio_endio(req, bio, nbytes, error, batch);
next_idx = 0;
bio_nbytes = 0;
} else {
@@ -2368,7 +2370,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
* if the request wasn't completed, update state
*/
if (bio_nbytes) {
- req_bio_endio(req, bio, bio_nbytes, error);
+ req_bio_endio(req, bio, bio_nbytes, error, batch);
bio->bi_idx += next_idx;
bio_iovec(bio)->bv_offset += nr_bytes;
bio_iovec(bio)->bv_len -= nr_bytes;
@@ -2405,14 +2407,15 @@ EXPORT_SYMBOL_GPL(blk_update_request);
static bool blk_update_bidi_request(struct request *rq, int error,
unsigned int nr_bytes,
- unsigned int bidi_bytes)
+ unsigned int bidi_bytes,
+ struct batch_complete *batch)
{
- if (blk_update_request(rq, error, nr_bytes))
+ if (blk_update_request(rq, error, nr_bytes, batch))
return true;
/* Bidi request must be completed as a whole */
if (unlikely(blk_bidi_rq(rq)) &&
- blk_update_request(rq->next_rq, error, bidi_bytes))
+ blk_update_request(rq->next_rq, error, bidi_bytes, batch))
return true;
if (blk_queue_add_random(rq->q))
@@ -2495,7 +2498,7 @@ static bool blk_end_bidi_request(struct request *rq, int error,
struct request_queue *q = rq->q;
unsigned long flags;
- if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+ if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes, NULL))
return true;
spin_lock_irqsave(q->queue_lock, flags);
@@ -2521,9 +2524,10 @@ static bool blk_end_bidi_request(struct request *rq, int error,
* %true - still buffers pending for this request
**/
bool __blk_end_bidi_request(struct request *rq, int error,
- unsigned int nr_bytes, unsigned int bidi_bytes)
+ unsigned int nr_bytes, unsigned int bidi_bytes,
+ struct batch_complete *batch)
{
- if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes))
+ if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes, batch))
return true;
blk_finish_request(rq, error);
@@ -2624,7 +2628,7 @@ EXPORT_SYMBOL_GPL(blk_end_request_err);
**/
bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes)
{
- return __blk_end_bidi_request(rq, error, nr_bytes, 0);
+ return __blk_end_bidi_request(rq, error, nr_bytes, 0, NULL);
}
EXPORT_SYMBOL(__blk_end_request);
@@ -2636,7 +2640,7 @@ EXPORT_SYMBOL(__blk_end_request);
* Description:
* Completely finish @rq. Must be called with queue lock held.
*/
-void __blk_end_request_all(struct request *rq, int error)
+void blk_end_request_all_batch(struct request *rq, int error, struct batch_complete *batch)
{
bool pending;
unsigned int bidi_bytes = 0;
@@ -2644,10 +2648,10 @@ void __blk_end_request_all(struct request *rq, int error)
if (unlikely(blk_bidi_rq(rq)))
bidi_bytes = blk_rq_bytes(rq->next_rq);
- pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes);
+ pending = __blk_end_bidi_request(rq, error, blk_rq_bytes(rq), bidi_bytes, batch);
BUG_ON(pending);
}
-EXPORT_SYMBOL(__blk_end_request_all);
+EXPORT_SYMBOL(blk_end_request_all_batch);
/**
* __blk_end_request_cur - Helper function to finish the current request chunk.
diff --git a/block/blk-flush.c b/block/blk-flush.c
index db8f1b507857..28785079ee23 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -316,7 +316,7 @@ void blk_insert_flush(struct request *rq)
* complete the request.
*/
if (!policy) {
- __blk_end_bidi_request(rq, 0, 0, 0);
+ __blk_end_bidi_request(rq, 0, 0, 0, NULL);
return;
}
diff --git a/block/blk.h b/block/blk.h
index e837b8f619b7..dc8fee6d41d6 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -31,7 +31,8 @@ void blk_queue_bypass_end(struct request_queue *q);
void blk_dequeue_request(struct request *rq);
void __blk_queue_free_tags(struct request_queue *q);
bool __blk_end_bidi_request(struct request *rq, int error,
- unsigned int nr_bytes, unsigned int bidi_bytes);
+ unsigned int nr_bytes, unsigned int bidi_bytes,
+ struct batch_complete *batch);
void blk_rq_timed_out_timer(unsigned long data);
void blk_delete_timer(struct request *);
diff --git a/block/genhd.c b/block/genhd.c
index 3c001fba80c7..a1ed52af5290 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -849,7 +849,7 @@ static int show_partition(struct seq_file *seqf, void *v)
char buf[BDEVNAME_SIZE];
/* Don't show non-partitionable removeable devices or empty devices */
- if (!get_capacity(sgp) || (!disk_max_parts(sgp) &&
+ if (!get_capacity(sgp) || (!(disk_max_parts(sgp) > 1) &&
(sgp->flags & GENHD_FL_REMOVABLE)))
return 0;
if (sgp->flags & GENHD_FL_SUPPRESS_PARTITION_INFO)
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 789cdea05893..ae95ee6a58aa 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -257,6 +257,7 @@ void delete_partition(struct gendisk *disk, int partno)
hd_struct_put(part);
}
+EXPORT_SYMBOL(delete_partition);
static ssize_t whole_disk_show(struct device *dev,
struct device_attribute *attr, char *buf)
diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index 9a87daa6f4fb..a5ffcc988f0b 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -27,6 +27,7 @@
#include <linux/ratelimit.h>
#include <linux/slab.h>
#include <linux/times.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
#include <scsi/scsi.h>
diff --git a/crypto/Kconfig b/crypto/Kconfig
index aed52b2e4a55..cad183559a27 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1236,6 +1236,22 @@ config CRYPTO_842
help
This is the 842 algorithm.
+config CRYPTO_LZ4
+ tristate "LZ4 compression algorithm"
+ select CRYPTO_ALGAPI
+ select LZ4_COMPRESS
+ select LZ4_DECOMPRESS
+ help
+ This is the LZ4 algorithm.
+
+config CRYPTO_LZ4HC
+ tristate "LZ4HC compression algorithm"
+ select CRYPTO_ALGAPI
+ select LZ4HC_COMPRESS
+ select LZ4_DECOMPRESS
+ help
+ This is the LZ4 high compression mode algorithm.
+
comment "Random Number Generation"
config CRYPTO_ANSI_CPRNG
diff --git a/crypto/Makefile b/crypto/Makefile
index be1a1bebbb86..af87faa69ae3 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -84,6 +84,8 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
+obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
+obj-$(CONFIG_CRYPTO_LZ4HC) += lz4hc.o
obj-$(CONFIG_CRYPTO_842) += 842.o
obj-$(CONFIG_CRYPTO_RNG2) += rng.o
obj-$(CONFIG_CRYPTO_RNG2) += krng.o
diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c
index aa2b0270ed16..4a92bac744dc 100644
--- a/crypto/async_tx/raid6test.c
+++ b/crypto/async_tx/raid6test.c
@@ -46,15 +46,10 @@ static void callback(void *param)
static void makedata(int disks)
{
- int i, j;
+ int i;
for (i = 0; i < disks; i++) {
- for (j = 0; j < PAGE_SIZE/sizeof(u32); j += sizeof(u32)) {
- u32 *p = page_address(data[i]) + j;
-
- *p = random32();
- }
-
+ prandom_bytes(page_address(data[i]), PAGE_SIZE);
dataptrs[i] = data[i];
}
}
diff --git a/crypto/lz4.c b/crypto/lz4.c
new file mode 100644
index 000000000000..98bfdd71e78f
--- /dev/null
+++ b/crypto/lz4.c
@@ -0,0 +1,105 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2013 Chanho Min <chanho.min@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4.h>
+
+struct lz4_ctx {
+ void *lz4_comp_mem;
+};
+
+static int lz4_init(struct crypto_tfm *tfm)
+{
+ struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->lz4_comp_mem = vmalloc(LZ4_MEM_COMPRESS);
+ if (!ctx->lz4_comp_mem)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void lz4_exit(struct crypto_tfm *tfm)
+{
+ struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+ vfree(ctx->lz4_comp_mem);
+}
+
+static int lz4_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ struct lz4_ctx *ctx = crypto_tfm_ctx(tfm);
+ size_t tmp_len = *dlen;
+ int err;
+
+ err = lz4_compress(src, slen, dst, &tmp_len, ctx->lz4_comp_mem);
+
+ if (err < 0)
+ return -EINVAL;
+
+ *dlen = tmp_len;
+ return 0;
+}
+
+static int lz4_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ int err;
+ size_t tmp_len = *dlen;
+
+ err = lz4_decompress(src, &slen, dst, tmp_len);
+ if (err < 0)
+ return -EINVAL;
+
+ *dlen = tmp_len;
+ return err;
+}
+
+static struct crypto_alg alg_lz4 = {
+ .cra_name = "lz4",
+ .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+ .cra_ctxsize = sizeof(struct lz4_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(alg_lz4.cra_list),
+ .cra_init = lz4_init,
+ .cra_exit = lz4_exit,
+ .cra_u = { .compress = {
+ .coa_compress = lz4_compress_crypto,
+ .coa_decompress = lz4_decompress_crypto } }
+};
+
+static int __init lz4_mod_init(void)
+{
+ return crypto_register_alg(&alg_lz4);
+}
+
+static void __exit lz4_mod_fini(void)
+{
+ crypto_unregister_alg(&alg_lz4);
+}
+
+module_init(lz4_mod_init);
+module_exit(lz4_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4 Compression Algorithm");
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
new file mode 100644
index 000000000000..d3c9625917cd
--- /dev/null
+++ b/crypto/lz4hc.c
@@ -0,0 +1,105 @@
+/*
+ * Cryptographic API.
+ *
+ * Copyright (c) 2013 Chanho Min <chanho.min@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/crypto.h>
+#include <linux/vmalloc.h>
+#include <linux/lz4.h>
+
+struct lz4hc_ctx {
+ void *lz4hc_comp_mem;
+};
+
+static int lz4hc_init(struct crypto_tfm *tfm)
+{
+ struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ ctx->lz4hc_comp_mem = vmalloc(LZ4HC_MEM_COMPRESS);
+ if (!ctx->lz4hc_comp_mem)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void lz4hc_exit(struct crypto_tfm *tfm)
+{
+ struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+
+ vfree(ctx->lz4hc_comp_mem);
+}
+
+static int lz4hc_compress_crypto(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ struct lz4hc_ctx *ctx = crypto_tfm_ctx(tfm);
+ size_t tmp_len = *dlen;
+ int err;
+
+ err = lz4hc_compress(src, slen, dst, &tmp_len, ctx->lz4hc_comp_mem);
+
+ if (err < 0)
+ return -EINVAL;
+
+ *dlen = tmp_len;
+ return 0;
+}
+
+static int lz4hc_decompress_crypto(struct crypto_tfm *tfm, const u8 *src,
+ unsigned int slen, u8 *dst, unsigned int *dlen)
+{
+ int err;
+ size_t tmp_len = *dlen;
+
+ err = lz4_decompress(src, &slen, dst, tmp_len);
+ if (err < 0)
+ return -EINVAL;
+
+ *dlen = tmp_len;
+ return err;
+}
+
+static struct crypto_alg alg_lz4hc = {
+ .cra_name = "lz4hc",
+ .cra_flags = CRYPTO_ALG_TYPE_COMPRESS,
+ .cra_ctxsize = sizeof(struct lz4hc_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_list = LIST_HEAD_INIT(alg_lz4hc.cra_list),
+ .cra_init = lz4hc_init,
+ .cra_exit = lz4hc_exit,
+ .cra_u = { .compress = {
+ .coa_compress = lz4hc_compress_crypto,
+ .coa_decompress = lz4hc_decompress_crypto } }
+};
+
+static int __init lz4hc_mod_init(void)
+{
+ return crypto_register_alg(&alg_lz4hc);
+}
+
+static void __exit lz4hc_mod_fini(void)
+{
+ crypto_unregister_alg(&alg_lz4hc);
+}
+
+module_init(lz4hc_mod_init);
+module_exit(lz4hc_mod_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4HC Compression Algorithm");
diff --git a/drivers/Kconfig b/drivers/Kconfig
index 202fa6d051b9..4606d7558dd8 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -118,6 +118,8 @@ source "drivers/vfio/Kconfig"
source "drivers/vlynq/Kconfig"
+source "drivers/virt/Kconfig"
+
source "drivers/virtio/Kconfig"
source "drivers/hv/Kconfig"
@@ -142,8 +144,6 @@ source "drivers/remoteproc/Kconfig"
source "drivers/rpmsg/Kconfig"
-source "drivers/virt/Kconfig"
-
source "drivers/devfreq/Kconfig"
source "drivers/extcon/Kconfig"
diff --git a/drivers/block/cciss.c b/drivers/block/cciss.c
index ade58bc8f3c4..1c1b8e544aa2 100644
--- a/drivers/block/cciss.c
+++ b/drivers/block/cciss.c
@@ -4206,7 +4206,7 @@ static int cciss_find_cfgtables(ctlr_info_t *h)
if (rc)
return rc;
h->cfgtable = remap_pci_mem(pci_resource_start(h->pdev,
- cfg_base_addr_index) + cfg_offset, sizeof(h->cfgtable));
+ cfg_base_addr_index) + cfg_offset, sizeof(*h->cfgtable));
if (!h->cfgtable)
return -ENOMEM;
rc = write_driver_ver_to_cfgtable(h->cfgtable);
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index a9eccfc6079b..83c5ae0ed56b 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -757,7 +757,8 @@ static struct socket *drbd_wait_for_connect(struct drbd_tconn *tconn, struct acc
rcu_read_unlock();
timeo = connect_int * HZ;
- timeo += (random32() & 1) ? timeo / 7 : -timeo / 7; /* 28.5% random jitter */
+ /* 28.5% random jitter */
+ timeo += (prandom_u32() & 1) ? timeo / 7 : -timeo / 7;
err = wait_for_completion_interruptible_timeout(&ad->door_bell, timeo);
if (err <= 0)
@@ -953,7 +954,7 @@ retry:
conn_warn(tconn, "Error receiving initial packet\n");
sock_release(s);
randomize:
- if (random32() & 1)
+ if (prandom_u32() & 1)
goto retry;
}
}
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index 747bb2af69dc..1e0cc5a9ff8d 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -1044,12 +1044,27 @@ static int loop_clr_fd(struct loop_device *lo)
lo->lo_state = Lo_unbound;
/* This is safe: open() is still holding a reference. */
module_put(THIS_MODULE);
- if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev)
- ioctl_by_bdev(bdev, BLKRRPART, 0);
lo->lo_flags = 0;
if (!part_shift)
lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN;
mutex_unlock(&lo->lo_ctl_mutex);
+ if (bdev) {
+ /*
+ * Remove all partitions, since BLKRRPART won't remove user
+ * added partitions when max_part=0.
+ */
+ struct disk_part_iter piter;
+ struct hd_struct *part;
+
+ mutex_lock_nested(&bdev->bd_mutex, 1);
+ invalidate_partition(bdev->bd_disk, 0);
+ disk_part_iter_init(&piter, bdev->bd_disk, DISK_PITER_INCL_EMPTY);
+ while ((part = disk_part_iter_next(&piter)))
+ delete_partition(bdev->bd_disk, part->partno);
+ disk_part_iter_exit(&piter);
+ mutex_unlock(&bdev->bd_mutex);
+ }
+
/*
* Need not hold lo_ctl_mutex to fput backing file.
* Calling fput holding lo_ctl_mutex triggers a circular
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 11cc9522cdd4..b84dda58c33c 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -146,6 +146,9 @@ static void mtip_command_cleanup(struct driver_data *dd)
struct mtip_cmd *command;
struct mtip_port *port = dd->port;
static int in_progress;
+ struct batch_complete batch;
+
+ batch_complete_init(&batch);
if (in_progress)
return;
@@ -161,11 +164,9 @@ static void mtip_command_cleanup(struct driver_data *dd)
command = &port->commands[commandindex];
if (atomic_read(&command->active)
- && (command->async_callback)) {
- command->async_callback(command->async_data,
- -ENODEV);
- command->async_callback = NULL;
- command->async_data = NULL;
+ && (command->bio)) {
+ bio_endio_batch(command->bio, -ENODEV, &batch);
+ command->bio = NULL;
}
dma_unmap_sg(&port->dd->pdev->dev,
@@ -173,9 +174,10 @@ static void mtip_command_cleanup(struct driver_data *dd)
command->scatter_ents,
command->direction);
}
+ up(&port->cmd_slot);
}
- up(&port->cmd_slot);
+ batch_complete(&batch);
set_bit(MTIP_DDF_CLEANUP_BIT, &dd->dd_flag);
in_progress = 0;
@@ -564,6 +566,9 @@ static void mtip_timeout_function(unsigned long int data)
unsigned int bit, group;
unsigned int num_command_slots;
unsigned long to, tagaccum[SLOTBITS_IN_LONGS];
+ struct batch_complete batch;
+
+ batch_complete_init(&batch);
if (unlikely(!port))
return;
@@ -606,11 +611,9 @@ static void mtip_timeout_function(unsigned long int data)
writel(1 << bit, port->completed[group]);
/* Call the async completion callback. */
- if (likely(command->async_callback))
- command->async_callback(command->async_data,
- -EIO);
- command->async_callback = NULL;
- command->comp_func = NULL;
+ if (likely(command->bio))
+ bio_endio_batch(command->bio, -EIO, &batch);
+ command->bio = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&port->dd->pdev->dev,
@@ -629,6 +632,8 @@ static void mtip_timeout_function(unsigned long int data)
}
}
+ batch_complete(&batch);
+
if (cmdto_cnt) {
print_tags(port->dd, "timed out", tagaccum, cmdto_cnt);
if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) {
@@ -679,7 +684,8 @@ static void mtip_timeout_function(unsigned long int data)
static void mtip_async_complete(struct mtip_port *port,
int tag,
void *data,
- int status)
+ int status,
+ struct batch_complete *batch)
{
struct mtip_cmd *command;
struct driver_data *dd = data;
@@ -696,11 +702,10 @@ static void mtip_async_complete(struct mtip_port *port,
}
/* Upper layer callback */
- if (likely(command->async_callback))
- command->async_callback(command->async_data, cb_status);
+ if (likely(command->bio))
+ bio_endio_batch(command->bio, cb_status, batch);
- command->async_callback = NULL;
- command->comp_func = NULL;
+ command->bio = NULL;
/* Unmap the DMA scatter list entries */
dma_unmap_sg(&dd->pdev->dev,
@@ -733,24 +738,22 @@ static void mtip_async_complete(struct mtip_port *port,
static void mtip_completion(struct mtip_port *port,
int tag,
void *data,
- int status)
+ int status,
+ struct batch_complete *batch)
{
- struct mtip_cmd *command = &port->commands[tag];
struct completion *waiting = data;
if (unlikely(status == PORT_IRQ_TF_ERR))
dev_warn(&port->dd->pdev->dev,
"Internal command %d completed with TFE\n", tag);
- command->async_callback = NULL;
- command->comp_func = NULL;
-
complete(waiting);
}
static void mtip_null_completion(struct mtip_port *port,
int tag,
void *data,
- int status)
+ int status,
+ struct batch_complete *batch)
{
return;
}
@@ -779,6 +782,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
unsigned char *buf;
char *fail_reason = NULL;
int fail_all_ncq_write = 0, fail_all_ncq_cmds = 0;
+ struct batch_complete batch;
dev_warn(&dd->pdev->dev, "Taskfile error\n");
@@ -796,13 +800,14 @@ static void mtip_handle_tfe(struct driver_data *dd)
atomic_inc(&cmd->active); /* active > 1 indicates error */
if (cmd->comp_data && cmd->comp_func) {
cmd->comp_func(port, MTIP_TAG_INTERNAL,
- cmd->comp_data, PORT_IRQ_TF_ERR);
+ cmd->comp_data, PORT_IRQ_TF_ERR, NULL);
}
goto handle_tfe_exit;
}
/* clear the tag accumulator */
memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
+ batch_complete_init(&batch);
/* Loop through all the groups */
for (group = 0; group < dd->slot_groups; group++) {
@@ -829,7 +834,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
cmd->comp_func(port,
tag,
cmd->comp_data,
- 0);
+ 0, &batch);
} else {
dev_err(&port->dd->pdev->dev,
"Missing completion func for tag %d",
@@ -842,6 +847,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
}
}
}
+ batch_complete(&batch);
print_tags(dd, "completed (TFE)", tagaccum, cmd_cnt);
@@ -883,6 +889,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
/* clear the tag accumulator */
memset(tagaccum, 0, SLOTBITS_IN_LONGS * sizeof(long));
+ batch_complete_init(&batch);
/* Loop through all the groups */
for (group = 0; group < dd->slot_groups; group++) {
@@ -916,7 +923,7 @@ static void mtip_handle_tfe(struct driver_data *dd)
if (cmd->comp_func) {
cmd->comp_func(port, tag,
cmd->comp_data,
- -ENODATA);
+ -ENODATA, &batch);
}
continue;
}
@@ -946,13 +953,15 @@ static void mtip_handle_tfe(struct driver_data *dd)
port,
tag,
cmd->comp_data,
- PORT_IRQ_TF_ERR);
+ PORT_IRQ_TF_ERR, &batch);
else
dev_warn(&port->dd->pdev->dev,
"Bad completion for tag %d\n",
tag);
}
}
+
+ batch_complete(&batch);
print_tags(dd, "reissued (TFE)", tagaccum, cmd_cnt);
handle_tfe_exit:
@@ -973,6 +982,9 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
struct driver_data *dd = port->dd;
int tag, bit;
struct mtip_cmd *command;
+ struct batch_complete batch;
+
+ batch_complete_init(&batch);
if (!completed) {
WARN_ON_ONCE(!completed);
@@ -997,7 +1009,8 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
port,
tag,
command->comp_data,
- 0);
+ 0,
+ &batch);
} else {
dev_warn(&dd->pdev->dev,
"Null completion "
@@ -1007,13 +1020,16 @@ static inline void mtip_workq_sdbfx(struct mtip_port *port, int group,
if (mtip_check_surprise_removal(
dd->pdev)) {
mtip_command_cleanup(dd);
- return;
+ goto out;
}
}
}
completed >>= 1;
}
+out:
+ batch_complete(&batch);
+
/* If last, re-enable interrupts */
if (atomic_dec_return(&dd->irq_workers_active) == 0)
writel(0xffffffff, dd->mmio + HOST_IRQ_STAT);
@@ -1034,7 +1050,7 @@ static inline void mtip_process_legacy(struct driver_data *dd, u32 port_stat)
cmd->comp_func(port,
MTIP_TAG_INTERNAL,
cmd->comp_data,
- 0);
+ 0, NULL);
return;
}
}
@@ -2554,8 +2570,8 @@ static int mtip_hw_ioctl(struct driver_data *dd, unsigned int cmd,
* None
*/
static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
- int nsect, int nents, int tag, void *callback,
- void *data, int dir)
+ int nsect, int nents, int tag,
+ struct bio *bio, int dir)
{
struct host_to_dev_fis *fis;
struct mtip_port *port = dd->port;
@@ -2610,12 +2626,7 @@ static void mtip_hw_submit_io(struct driver_data *dd, sector_t sector,
command->comp_func = mtip_async_complete;
command->direction = dma_dir;
- /*
- * Set the completion function and data for the command passed
- * from the upper layer.
- */
- command->async_data = data;
- command->async_callback = callback;
+ command->bio = bio;
/*
* To prevent this command from being issued
@@ -3795,7 +3806,6 @@ static void mtip_make_request(struct request_queue *queue, struct bio *bio)
bio_sectors(bio),
nents,
tag,
- bio_endio,
bio,
bio_data_dir(bio));
} else
diff --git a/drivers/block/mtip32xx/mtip32xx.h b/drivers/block/mtip32xx/mtip32xx.h
index 3bffff5f670c..af8c6f79a8d8 100644
--- a/drivers/block/mtip32xx/mtip32xx.h
+++ b/drivers/block/mtip32xx/mtip32xx.h
@@ -325,11 +325,9 @@ struct mtip_cmd {
void (*comp_func)(struct mtip_port *port,
int tag,
void *data,
- int status);
- /* Additional callback function that may be called by comp_func() */
- void (*async_callback)(void *data, int status);
-
- void *async_data; /* Addl. data passed to async_callback() */
+ int status,
+ struct batch_complete *batch);
+ struct bio *bio;
int scatter_ents; /* Number of scatter list entries used */
diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c
index 758f2ac878cf..deb722d63d68 100644
--- a/drivers/block/swim3.c
+++ b/drivers/block/swim3.c
@@ -775,7 +775,7 @@ static irqreturn_t swim3_interrupt(int irq, void *dev_id)
if (intr & ERROR_INTR) {
n = fs->scount - 1 - resid / 512;
if (n > 0) {
- blk_update_request(req, 0, n << 9);
+ blk_update_request(req, 0, n << 9, NULL);
fs->req_sector += n;
}
if (fs->retries < 5) {
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 64723953e1c9..49d0ec2472c5 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -217,7 +217,8 @@ static void virtblk_bio_send_flush_work(struct work_struct *work)
virtblk_bio_send_flush(vbr);
}
-static inline void virtblk_request_done(struct virtblk_req *vbr)
+static inline void virtblk_request_done(struct virtblk_req *vbr,
+ struct batch_complete *batch)
{
struct virtio_blk *vblk = vbr->vblk;
struct request *req = vbr->req;
@@ -231,11 +232,12 @@ static inline void virtblk_request_done(struct virtblk_req *vbr)
req->errors = (error != 0);
}
- __blk_end_request_all(req, error);
+ blk_end_request_all_batch(req, error, batch);
mempool_free(vbr, vblk->pool);
}
-static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
+static inline void virtblk_bio_flush_done(struct virtblk_req *vbr,
+ struct batch_complete *batch)
{
struct virtio_blk *vblk = vbr->vblk;
@@ -244,12 +246,13 @@ static inline void virtblk_bio_flush_done(struct virtblk_req *vbr)
INIT_WORK(&vbr->work, virtblk_bio_send_data_work);
queue_work(virtblk_wq, &vbr->work);
} else {
- bio_endio(vbr->bio, virtblk_result(vbr));
+ bio_endio_batch(vbr->bio, virtblk_result(vbr), batch);
mempool_free(vbr, vblk->pool);
}
}
-static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
+static inline void virtblk_bio_data_done(struct virtblk_req *vbr,
+ struct batch_complete *batch)
{
struct virtio_blk *vblk = vbr->vblk;
@@ -259,17 +262,18 @@ static inline void virtblk_bio_data_done(struct virtblk_req *vbr)
INIT_WORK(&vbr->work, virtblk_bio_send_flush_work);
queue_work(virtblk_wq, &vbr->work);
} else {
- bio_endio(vbr->bio, virtblk_result(vbr));
+ bio_endio_batch(vbr->bio, virtblk_result(vbr), batch);
mempool_free(vbr, vblk->pool);
}
}
-static inline void virtblk_bio_done(struct virtblk_req *vbr)
+static inline void virtblk_bio_done(struct virtblk_req *vbr,
+ struct batch_complete *batch)
{
if (unlikely(vbr->flags & VBLK_IS_FLUSH))
- virtblk_bio_flush_done(vbr);
+ virtblk_bio_flush_done(vbr, batch);
else
- virtblk_bio_data_done(vbr);
+ virtblk_bio_data_done(vbr, batch);
}
static void virtblk_done(struct virtqueue *vq)
@@ -279,16 +283,19 @@ static void virtblk_done(struct virtqueue *vq)
struct virtblk_req *vbr;
unsigned long flags;
unsigned int len;
+ struct batch_complete batch;
+
+ batch_complete_init(&batch);
spin_lock_irqsave(vblk->disk->queue->queue_lock, flags);
do {
virtqueue_disable_cb(vq);
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
if (vbr->bio) {
- virtblk_bio_done(vbr);
+ virtblk_bio_done(vbr, &batch);
bio_done = true;
} else {
- virtblk_request_done(vbr);
+ virtblk_request_done(vbr, &batch);
req_done = true;
}
}
@@ -298,6 +305,8 @@ static void virtblk_done(struct virtqueue *vq)
blk_start_queue(vblk->disk->queue);
spin_unlock_irqrestore(vblk->disk->queue->queue_lock, flags);
+ batch_complete(&batch);
+
if (bio_done)
wake_up(&vblk->queue_wait);
}
diff --git a/drivers/char/mem.c b/drivers/char/mem.c
index 2c644afbcdd4..1ccbe9482faa 100644
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -28,6 +28,7 @@
#include <linux/pfn.h>
#include <linux/export.h>
#include <linux/io.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
@@ -627,6 +628,18 @@ static ssize_t write_null(struct file *file, const char __user *buf,
return count;
}
+static ssize_t aio_read_null(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return 0;
+}
+
+static ssize_t aio_write_null(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ return iov_length(iov, nr_segs);
+}
+
static int pipe_to_null(struct pipe_inode_info *info, struct pipe_buffer *buf,
struct splice_desc *sd)
{
@@ -670,6 +683,24 @@ static ssize_t read_zero(struct file *file, char __user *buf,
return written ? written : -EFAULT;
}
+static ssize_t aio_read_zero(struct kiocb *iocb, const struct iovec *iov,
+ unsigned long nr_segs, loff_t pos)
+{
+ size_t written = 0;
+ unsigned long i;
+ ssize_t ret;
+
+ for (i = 0; i < nr_segs; i++) {
+ ret = read_zero(iocb->ki_filp, iov[i].iov_base, iov[i].iov_len,
+ &pos);
+ if (ret < 0)
+ break;
+ written += ret;
+ }
+
+ return written ? written : -EFAULT;
+}
+
static int mmap_zero(struct file *file, struct vm_area_struct *vma)
{
#ifndef CONFIG_MMU
@@ -738,6 +769,7 @@ static int open_port(struct inode *inode, struct file *filp)
#define full_lseek null_lseek
#define write_zero write_null
#define read_full read_zero
+#define aio_write_zero aio_write_null
#define open_mem open_port
#define open_kmem open_mem
#define open_oldmem open_mem
@@ -766,6 +798,8 @@ static const struct file_operations null_fops = {
.llseek = null_lseek,
.read = read_null,
.write = write_null,
+ .aio_read = aio_read_null,
+ .aio_write = aio_write_null,
.splice_write = splice_write_null,
};
@@ -782,6 +816,8 @@ static const struct file_operations zero_fops = {
.llseek = zero_lseek,
.read = read_zero,
.write = write_zero,
+ .aio_read = aio_read_zero,
+ .aio_write = aio_write_zero,
.mmap = mmap_zero,
};
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index 4cd392dbf115..40e940d4db6b 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -410,22 +410,45 @@ static void __init dmi_dump_ids(void)
printk(KERN_CONT "\n");
}
-static int __init dmi_present(const char __iomem *p)
+static int __init dmi_present(const u8 *buf)
{
- u8 buf[15];
+ int smbios_ver;
- memcpy_fromio(buf, p, 15);
- if (dmi_checksum(buf, 15)) {
+ if (memcmp(buf, "_SM_", 4) == 0 &&
+ buf[5] < 32 && dmi_checksum(buf, buf[5])) {
+ smbios_ver = (buf[6] << 8) + buf[7];
+
+ /* Some BIOS report weird SMBIOS version, fix that up */
+ switch (smbios_ver) {
+ case 0x021F:
+ case 0x0221:
+ pr_debug("SMBIOS version fixup(2.%d->2.%d)\n",
+ smbios_ver & 0xFF, 3);
+ smbios_ver = 0x0203;
+ break;
+ case 0x0233:
+ pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6);
+ smbios_ver = 0x0206;
+ break;
+ }
+ } else {
+ smbios_ver = 0;
+ }
+
+ buf += 16;
+
+ if (memcmp(buf, "_DMI_", 5) == 0 && dmi_checksum(buf, 15)) {
dmi_num = (buf[13] << 8) | buf[12];
dmi_len = (buf[7] << 8) | buf[6];
dmi_base = (buf[11] << 24) | (buf[10] << 16) |
(buf[9] << 8) | buf[8];
if (dmi_walk_early(dmi_decode) == 0) {
- if (dmi_ver)
+ if (smbios_ver) {
+ dmi_ver = smbios_ver;
pr_info("SMBIOS %d.%d present.\n",
dmi_ver >> 8, dmi_ver & 0xFF);
- else {
+ } else {
dmi_ver = (buf[14] & 0xF0) << 4 |
(buf[14] & 0x0F);
pr_info("Legacy DMI %d.%d present.\n",
@@ -435,40 +458,14 @@ static int __init dmi_present(const char __iomem *p)
return 0;
}
}
- dmi_ver = 0;
- return 1;
-}
-static int __init smbios_present(const char __iomem *p)
-{
- u8 buf[32];
-
- memcpy_fromio(buf, p, 32);
- if ((buf[5] < 32) && dmi_checksum(buf, buf[5])) {
- dmi_ver = (buf[6] << 8) + buf[7];
-
- /* Some BIOS report weird SMBIOS version, fix that up */
- switch (dmi_ver) {
- case 0x021F:
- case 0x0221:
- pr_debug("SMBIOS version fixup(2.%d->2.%d)\n",
- dmi_ver & 0xFF, 3);
- dmi_ver = 0x0203;
- break;
- case 0x0233:
- pr_debug("SMBIOS version fixup(2.%d->2.%d)\n", 51, 6);
- dmi_ver = 0x0206;
- break;
- }
- return memcmp(p + 16, "_DMI_", 5) || dmi_present(p + 16);
- }
return 1;
}
void __init dmi_scan_machine(void)
{
char __iomem *p, *q;
- int rc;
+ char buf[32];
if (efi_enabled(EFI_CONFIG_TABLES)) {
if (efi.smbios == EFI_INVALID_TABLE_ADDR)
@@ -481,10 +478,10 @@ void __init dmi_scan_machine(void)
p = dmi_ioremap(efi.smbios, 32);
if (p == NULL)
goto error;
-
- rc = smbios_present(p);
+ memcpy_fromio(buf, p, 32);
dmi_iounmap(p, 32);
- if (!rc) {
+
+ if (!dmi_present(buf)) {
dmi_available = 1;
goto out;
}
@@ -499,18 +496,15 @@ void __init dmi_scan_machine(void)
if (p == NULL)
goto error;
+ memset(buf, 0, 16);
for (q = p; q < p + 0x10000; q += 16) {
- if (memcmp(q, "_SM_", 4) == 0 && q - p <= 0xFFE0)
- rc = smbios_present(q);
- else if (memcmp(q, "_DMI_", 5) == 0)
- rc = dmi_present(q);
- else
- continue;
- if (!rc) {
+ memcpy_fromio(buf + 16, q, 16);
+ if (!dmi_present(buf)) {
dmi_available = 1;
dmi_iounmap(p, 0x10000);
goto out;
}
+ memcpy(buf, buf + 16, 16);
}
dmi_iounmap(p, 0x10000);
}
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 59d6b9bf204b..cf71f1d627fe 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -399,6 +399,14 @@ static void drm_fb_helper_dpms(struct fb_info *info, int dpms_mode)
return;
/*
+ * fbdev->blank can be called from irq context in case of a panic.
+ * Since we already have our own special panic handler which will
+ * restore the fbdev console mode completely, just bail out early.
+ */
+ if (oops_in_progress)
+ return;
+
+ /*
* For each CRTC in this fb, turn the connectors on/off.
*/
drm_modeset_lock_all(dev);
diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
index 31f9201b2980..c40088ecf9f3 100644
--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
+++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
@@ -62,13 +62,13 @@ static int __cxio_init_resource_fifo(struct kfifo *fifo,
kfifo_in(fifo, (unsigned char *) &entry, sizeof(u32));
if (random) {
j = 0;
- random_bytes = random32();
+ random_bytes = prandom_u32();
for (i = 0; i < RANDOM_SIZE; i++)
rarray[i] = i + skip_low;
for (i = skip_low + RANDOM_SIZE; i < nr - skip_high; i++) {
if (j >= RANDOM_SIZE) {
j = 0;
- random_bytes = random32();
+ random_bytes = prandom_u32();
}
idx = (random_bytes >> (j * 2)) & 0xF;
kfifo_in(fifo,
diff --git a/drivers/infiniband/hw/cxgb4/id_table.c b/drivers/infiniband/hw/cxgb4/id_table.c
index f95e5df30db2..0161ae6ad629 100644
--- a/drivers/infiniband/hw/cxgb4/id_table.c
+++ b/drivers/infiniband/hw/cxgb4/id_table.c
@@ -54,7 +54,7 @@ u32 c4iw_id_alloc(struct c4iw_id_table *alloc)
if (obj < alloc->max) {
if (alloc->flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last += random32() % RANDOM_SKIP;
+ alloc->last += prandom_u32() % RANDOM_SKIP;
else
alloc->last = obj + 1;
if (alloc->last >= alloc->max)
@@ -88,7 +88,7 @@ int c4iw_id_table_alloc(struct c4iw_id_table *alloc, u32 start, u32 num,
alloc->start = start;
alloc->flags = flags;
if (flags & C4IW_ID_TABLE_F_RANDOM)
- alloc->last = random32() % RANDOM_SKIP;
+ alloc->last = prandom_u32() % RANDOM_SKIP;
else
alloc->last = 0;
alloc->max = num;
diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
index aed8afee56da..6d7f453b4d05 100644
--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
+++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
@@ -40,6 +40,7 @@
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/io.h>
+#include <linux/aio.h>
#include <linux/jiffies.h>
#include <linux/cpu.h>
#include <asm/pgtable.h>
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 934792c477bc..4d599cedbb0b 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -93,7 +93,7 @@ static void __propagate_pkey_ev(struct mlx4_ib_dev *dev, int port_num,
__be64 mlx4_ib_gen_node_guid(void)
{
#define NODE_GUID_HI ((u64) (((u64)IB_OPENIB_OUI) << 40))
- return cpu_to_be64(NODE_GUID_HI | random32());
+ return cpu_to_be64(NODE_GUID_HI | prandom_u32());
}
__be64 mlx4_ib_get_new_demux_tid(struct mlx4_ib_demux_ctx *ctx)
diff --git a/drivers/infiniband/hw/qib/qib_file_ops.c b/drivers/infiniband/hw/qib/qib_file_ops.c
index 4f7aa301b3b1..b56c9428f3c5 100644
--- a/drivers/infiniband/hw/qib/qib_file_ops.c
+++ b/drivers/infiniband/hw/qib/qib_file_ops.c
@@ -39,7 +39,7 @@
#include <linux/vmalloc.h>
#include <linux/highmem.h>
#include <linux/io.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include <linux/jiffies.h>
#include <asm/pgtable.h>
#include <linux/delay.h>
diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
index 67b0c1d23678..249976cfb28a 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c
@@ -460,7 +460,7 @@ static int ipoib_cm_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *even
goto err_qp;
}
- psn = random32() & 0xffffff;
+ psn = prandom_u32() & 0xffffff;
ret = ipoib_cm_modify_rx_qp(dev, cm_id, p->qp, psn);
if (ret)
goto err_modify;
diff --git a/drivers/leds/leds-ot200.c b/drivers/leds/leds-ot200.c
index ee14662ed5ce..98cae529373f 100644
--- a/drivers/leds/leds-ot200.c
+++ b/drivers/leds/leds-ot200.c
@@ -47,37 +47,37 @@ static struct ot200_led leds[] = {
{
.name = "led_1",
.port = 0x49,
- .mask = BIT(7),
+ .mask = BIT(6),
},
{
.name = "led_2",
.port = 0x49,
- .mask = BIT(6),
+ .mask = BIT(5),
},
{
.name = "led_3",
.port = 0x49,
- .mask = BIT(5),
+ .mask = BIT(4),
},
{
.name = "led_4",
.port = 0x49,
- .mask = BIT(4),
+ .mask = BIT(3),
},
{
.name = "led_5",
.port = 0x49,
- .mask = BIT(3),
+ .mask = BIT(2),
},
{
.name = "led_6",
.port = 0x49,
- .mask = BIT(2),
+ .mask = BIT(1),
},
{
.name = "led_7",
.port = 0x49,
- .mask = BIT(1),
+ .mask = BIT(0),
}
};
diff --git a/drivers/lguest/page_tables.c b/drivers/lguest/page_tables.c
index 3b62be160a6e..864baabaee25 100644
--- a/drivers/lguest/page_tables.c
+++ b/drivers/lguest/page_tables.c
@@ -686,7 +686,7 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
* We pick one entry at random to throw out. Choosing the Least
* Recently Used might be better, but this is easy.
*/
- next = random32() % ARRAY_SIZE(cpu->lg->pgdirs);
+ next = prandom_u32() % ARRAY_SIZE(cpu->lg->pgdirs);
/* If it's never been allocated at all before, try now. */
if (!cpu->lg->pgdirs[next].pgdir) {
cpu->lg->pgdirs[next].pgdir =
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 7e469260fe5e..e52880f6b589 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -695,7 +695,7 @@ static void end_clone_bio(struct bio *clone, int error)
* Do not use blk_end_request() here, because it may complete
* the original request before the clone, and break the ordering.
*/
- blk_update_request(tio->orig, 0, nr_bytes);
+ blk_update_request(tio->orig, 0, nr_bytes, NULL);
}
/*
diff --git a/drivers/memstick/host/r592.c b/drivers/memstick/host/r592.c
index a7c5b31c0d50..9718661c1fb6 100644
--- a/drivers/memstick/host/r592.c
+++ b/drivers/memstick/host/r592.c
@@ -847,7 +847,7 @@ static void r592_remove(struct pci_dev *pdev)
dev->dummy_dma_page_physical_address);
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int r592_suspend(struct device *core_dev)
{
struct pci_dev *pdev = to_pci_dev(core_dev);
@@ -870,10 +870,10 @@ static int r592_resume(struct device *core_dev)
r592_update_card_detect(dev);
return 0;
}
-
-SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume);
#endif
+static SIMPLE_DEV_PM_OPS(r592_pm_ops, r592_suspend, r592_resume);
+
MODULE_DEVICE_TABLE(pci, r592_pci_id_tbl);
static struct pci_driver r852_pci_driver = {
@@ -881,9 +881,7 @@ static struct pci_driver r852_pci_driver = {
.id_table = r592_pci_id_tbl,
.probe = r592_probe,
.remove = r592_remove,
-#ifdef CONFIG_PM
.driver.pm = &r592_pm_ops,
-#endif
};
static __init int r592_module_init(void)
diff --git a/drivers/message/i2o/i2o_config.c b/drivers/message/i2o/i2o_config.c
index 5451beff183f..a60c188c2bd9 100644
--- a/drivers/message/i2o/i2o_config.c
+++ b/drivers/message/i2o/i2o_config.c
@@ -687,6 +687,11 @@ static int i2o_cfg_passthru32(struct file *file, unsigned cmnd,
}
size = size >> 16;
size *= 4;
+ if (size > sizeof(rmsg)) {
+ rcode = -EINVAL;
+ goto sg_list_cleanup;
+ }
+
/* Copy in the user's I2O command */
if (copy_from_user(rmsg, user_msg, size)) {
rcode = -EFAULT;
@@ -922,6 +927,11 @@ static int i2o_cfg_passthru(unsigned long arg)
}
size = size >> 16;
size *= 4;
+ if (size > sizeof(rmsg)) {
+ rcode = -EFAULT;
+ goto sg_list_cleanup;
+ }
+
/* Copy in the user's I2O command */
if (copy_from_user(rmsg, user_msg, size)) {
rcode = -EFAULT;
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 08a3cf2a7610..9290bb51a06a 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -120,8 +120,8 @@ static void mmc_should_fail_request(struct mmc_host *host,
!should_fail(&host->fail_mmc_request, data->blksz * data->blocks))
return;
- data->error = data_errors[random32() % ARRAY_SIZE(data_errors)];
- data->bytes_xfered = (random32() % (data->bytes_xfered >> 9)) << 9;
+ data->error = data_errors[prandom_u32() % ARRAY_SIZE(data_errors)];
+ data->bytes_xfered = (prandom_u32() % (data->bytes_xfered >> 9)) << 9;
}
#else /* CONFIG_FAIL_MMC_REQUEST */
diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 149a3a038491..5abdd4894082 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -4085,7 +4085,7 @@ static int cnic_cm_alloc_mem(struct cnic_dev *dev)
if (!cp->csk_tbl)
return -ENOMEM;
- port_id = random32();
+ port_id = prandom_u32();
port_id %= CNIC_LOCAL_PORT_RANGE;
if (cnic_init_id_tbl(&cp->csk_port_tbl, CNIC_LOCAL_PORT_RANGE,
CNIC_LOCAL_PORT_MIN, port_id)) {
@@ -4145,7 +4145,7 @@ static int cnic_cm_init_bnx2_hw(struct cnic_dev *dev)
{
u32 seed;
- seed = random32();
+ seed = prandom_u32();
cnic_ctx_wr(dev, 45, 0, seed);
return 0;
}
diff --git a/drivers/net/hamradio/baycom_epp.c b/drivers/net/hamradio/baycom_epp.c
index 49b8b58fc5c6..484f77ec2ce1 100644
--- a/drivers/net/hamradio/baycom_epp.c
+++ b/drivers/net/hamradio/baycom_epp.c
@@ -449,7 +449,7 @@ static int transmit(struct baycom_state *bc, int cnt, unsigned char stat)
if ((--bc->hdlctx.slotcnt) > 0)
return 0;
bc->hdlctx.slotcnt = bc->ch_params.slottime;
- if ((random32() % 256) > bc->ch_params.ppersist)
+ if ((prandom_u32() % 256) > bc->ch_params.ppersist)
return 0;
}
}
diff --git a/drivers/net/hamradio/hdlcdrv.c b/drivers/net/hamradio/hdlcdrv.c
index a4a3516b6bbf..3169252613fa 100644
--- a/drivers/net/hamradio/hdlcdrv.c
+++ b/drivers/net/hamradio/hdlcdrv.c
@@ -389,7 +389,7 @@ void hdlcdrv_arbitrate(struct net_device *dev, struct hdlcdrv_state *s)
if ((--s->hdlctx.slotcnt) > 0)
return;
s->hdlctx.slotcnt = s->ch_params.slottime;
- if ((random32() % 256) > s->ch_params.ppersist)
+ if ((prandom_u32() % 256) > s->ch_params.ppersist)
return;
start_tx(dev, s);
}
diff --git a/drivers/net/hamradio/yam.c b/drivers/net/hamradio/yam.c
index 4cf8f1017aad..ae3feb074f00 100644
--- a/drivers/net/hamradio/yam.c
+++ b/drivers/net/hamradio/yam.c
@@ -638,7 +638,7 @@ static void yam_arbitrate(struct net_device *dev)
yp->slotcnt = yp->slot / 10;
/* is random > persist ? */
- if ((random32() % 256) > yp->pers)
+ if ((prandom_u32() % 256) > yp->pers)
return;
yam_start_tx(dev, yp);
diff --git a/drivers/net/team/team_mode_random.c b/drivers/net/team/team_mode_random.c
index 9eabfaa22f3e..5ca14d463ba7 100644
--- a/drivers/net/team/team_mode_random.c
+++ b/drivers/net/team/team_mode_random.c
@@ -18,7 +18,7 @@
static u32 random_N(unsigned int N)
{
- return reciprocal_divide(random32(), N);
+ return reciprocal_divide(prandom_u32(), N);
}
static bool rnd_transmit(struct team *team, struct sk_buff *skb)
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
index 4166e642068b..bca31a855875 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/p2p.c
@@ -1118,7 +1118,7 @@ static void brcmf_p2p_afx_handler(struct work_struct *work)
if (afx_hdl->is_listen && afx_hdl->my_listen_chan)
/* 100ms ~ 300ms */
err = brcmf_p2p_discover_listen(p2p, afx_hdl->my_listen_chan,
- 100 * (1 + (random32() % 3)));
+ 100 * (1 + (prandom_u32() % 3)));
else
err = brcmf_p2p_act_frm_search(p2p, afx_hdl->peer_listen_chan);
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index dbf5b1289516..aebf66cdd71f 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -216,7 +216,7 @@ mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
mwifiex_form_mgmt_frame(skb, buf, len);
mwifiex_queue_tx_pkt(priv, skb);
- *cookie = random32() | 1;
+ *cookie = prandom_u32() | 1;
cfg80211_mgmt_tx_status(wdev, *cookie, buf, len, true, GFP_ATOMIC);
wiphy_dbg(wiphy, "info: management frame transmitted\n");
@@ -271,7 +271,7 @@ mwifiex_cfg80211_remain_on_channel(struct wiphy *wiphy,
duration);
if (!ret) {
- *cookie = random32() | 1;
+ *cookie = prandom_u32() | 1;
priv->roc_cfg.cookie = *cookie;
priv->roc_cfg.chan = *chan;
diff --git a/drivers/platform/x86/thinkpad_acpi.c b/drivers/platform/x86/thinkpad_acpi.c
index 9a907567f41e..edec135b1685 100644
--- a/drivers/platform/x86/thinkpad_acpi.c
+++ b/drivers/platform/x86/thinkpad_acpi.c
@@ -1964,9 +1964,6 @@ struct tp_nvram_state {
/* kthread for the hotkey poller */
static struct task_struct *tpacpi_hotkey_task;
-/* Acquired while the poller kthread is running, use to sync start/stop */
-static struct mutex hotkey_thread_mutex;
-
/*
* Acquire mutex to write poller control variables as an
* atomic block.
@@ -2462,8 +2459,6 @@ static int hotkey_kthread(void *data)
unsigned int poll_freq;
bool was_frozen;
- mutex_lock(&hotkey_thread_mutex);
-
if (tpacpi_lifecycle == TPACPI_LIFE_EXITING)
goto exit;
@@ -2523,7 +2518,6 @@ static int hotkey_kthread(void *data)
}
exit:
- mutex_unlock(&hotkey_thread_mutex);
return 0;
}
@@ -2533,9 +2527,6 @@ static void hotkey_poll_stop_sync(void)
if (tpacpi_hotkey_task) {
kthread_stop(tpacpi_hotkey_task);
tpacpi_hotkey_task = NULL;
- mutex_lock(&hotkey_thread_mutex);
- /* at this point, the thread did exit */
- mutex_unlock(&hotkey_thread_mutex);
}
}
@@ -3234,7 +3225,6 @@ static int __init hotkey_init(struct ibm_init_struct *iibm)
mutex_init(&hotkey_mutex);
#ifdef CONFIG_THINKPAD_ACPI_HOTKEY_POLL
- mutex_init(&hotkey_thread_mutex);
mutex_init(&hotkey_thread_data_mutex);
#endif
diff --git a/drivers/rtc/class.c b/drivers/rtc/class.c
index 9b742d3ffb94..66385402d20e 100644
--- a/drivers/rtc/class.c
+++ b/drivers/rtc/class.c
@@ -259,6 +259,76 @@ void rtc_device_unregister(struct rtc_device *rtc)
}
EXPORT_SYMBOL_GPL(rtc_device_unregister);
+static void devm_rtc_device_release(struct device *dev, void *res)
+{
+ struct rtc_device *rtc = *(struct rtc_device **)res;
+
+ rtc_device_unregister(rtc);
+}
+
+static int devm_rtc_device_match(struct device *dev, void *res, void *data)
+{
+ struct rtc **r = res;
+
+ return *r == data;
+}
+
+/**
+ * devm_rtc_device_register - resource managed rtc_device_register()
+ * @dev: the device to register
+ * @name: the name of the device
+ * @ops: the rtc operations structure
+ * @owner: the module owner
+ *
+ * @return a struct rtc on success, or an ERR_PTR on error
+ *
+ * Managed rtc_device_register(). The rtc_device returned from this function
+ * are automatically freed on driver detach. See rtc_device_register()
+ * for more information.
+ */
+
+struct rtc_device *devm_rtc_device_register(struct device *dev,
+ const char *name,
+ const struct rtc_class_ops *ops,
+ struct module *owner)
+{
+ struct rtc_device **ptr, *rtc;
+
+ ptr = devres_alloc(devm_rtc_device_release, sizeof(*ptr), GFP_KERNEL);
+ if (!ptr)
+ return ERR_PTR(-ENOMEM);
+
+ rtc = rtc_device_register(name, dev, ops, owner);
+ if (!IS_ERR(rtc)) {
+ *ptr = rtc;
+ devres_add(dev, ptr);
+ } else {
+ devres_free(ptr);
+ }
+
+ return rtc;
+}
+EXPORT_SYMBOL_GPL(devm_rtc_device_register);
+
+/**
+ * devm_rtc_device_unregister - resource managed devm_rtc_device_unregister()
+ * @dev: the device to unregister
+ * @rtc: the RTC class device to unregister
+ *
+ * Deallocated a rtc allocated with devm_rtc_device_register(). Normally this
+ * function will not need to be called and the resource management code will
+ * ensure that the resource is freed.
+ */
+void devm_rtc_device_unregister(struct device *dev, struct rtc_device *rtc)
+{
+ int rc;
+
+ rc = devres_release(dev, devm_rtc_device_release,
+ devm_rtc_device_match, rtc);
+ WARN_ON(rc);
+}
+EXPORT_SYMBOL_GPL(devm_rtc_device_unregister);
+
static int __init rtc_init(void)
{
rtc_class = class_create(THIS_MODULE, "rtc");
diff --git a/drivers/rtc/rtc-88pm80x.c b/drivers/rtc/rtc-88pm80x.c
index 63b17ebe90e8..76f9505ff7c5 100644
--- a/drivers/rtc/rtc-88pm80x.c
+++ b/drivers/rtc/rtc-88pm80x.c
@@ -312,7 +312,7 @@ static int pm80x_rtc_probe(struct platform_device *pdev)
}
rtc_tm_to_time(&tm, &ticks);
- info->rtc_dev = rtc_device_register("88pm80x-rtc", &pdev->dev,
+ info->rtc_dev = devm_rtc_device_register(&pdev->dev, "88pm80x-rtc",
&pm80x_rtc_ops, THIS_MODULE);
if (IS_ERR(info->rtc_dev)) {
ret = PTR_ERR(info->rtc_dev);
@@ -346,7 +346,6 @@ static int pm80x_rtc_remove(struct platform_device *pdev)
{
struct pm80x_rtc_info *info = platform_get_drvdata(pdev);
platform_set_drvdata(pdev, NULL);
- rtc_device_unregister(info->rtc_dev);
pm80x_free_irq(info->chip, info->irq, info);
return 0;
}
diff --git a/drivers/rtc/rtc-ab3100.c b/drivers/rtc/rtc-ab3100.c
index 261a07e0fb24..47a4f2c4d30e 100644
--- a/drivers/rtc/rtc-ab3100.c
+++ b/drivers/rtc/rtc-ab3100.c
@@ -229,8 +229,8 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev)
/* Ignore any error on this write */
}
- rtc = rtc_device_register("ab3100-rtc", &pdev->dev, &ab3100_rtc_ops,
- THIS_MODULE);
+ rtc = devm_rtc_device_register(&pdev->dev, "ab3100-rtc",
+ &ab3100_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc)) {
err = PTR_ERR(rtc);
return err;
@@ -242,9 +242,6 @@ static int __init ab3100_rtc_probe(struct platform_device *pdev)
static int __exit ab3100_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc);
platform_set_drvdata(pdev, NULL);
return 0;
}
@@ -257,19 +254,7 @@ static struct platform_driver ab3100_rtc_driver = {
.remove = __exit_p(ab3100_rtc_remove),
};
-static int __init ab3100_rtc_init(void)
-{
- return platform_driver_probe(&ab3100_rtc_driver,
- ab3100_rtc_probe);
-}
-
-static void __exit ab3100_rtc_exit(void)
-{
- platform_driver_unregister(&ab3100_rtc_driver);
-}
-
-module_init(ab3100_rtc_init);
-module_exit(ab3100_rtc_exit);
+module_platform_driver_probe(ab3100_rtc_driver, ab3100_rtc_probe);
MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
MODULE_DESCRIPTION("AB3100 RTC Driver");
diff --git a/drivers/rtc/rtc-at32ap700x.c b/drivers/rtc/rtc-at32ap700x.c
index 8dd08305aae1..619c8877f2f1 100644
--- a/drivers/rtc/rtc-at32ap700x.c
+++ b/drivers/rtc/rtc-at32ap700x.c
@@ -302,17 +302,7 @@ static struct platform_driver at32_rtc_driver = {
},
};
-static int __init at32_rtc_init(void)
-{
- return platform_driver_probe(&at32_rtc_driver, at32_rtc_probe);
-}
-module_init(at32_rtc_init);
-
-static void __exit at32_rtc_exit(void)
-{
- platform_driver_unregister(&at32_rtc_driver);
-}
-module_exit(at32_rtc_exit);
+module_platform_driver_probe(at32_rtc_driver, at32_rtc_probe);
MODULE_AUTHOR("Hans-Christian Egtvedt <hcegtvedt@atmel.com>");
MODULE_DESCRIPTION("Real time clock for AVR32 AT32AP700x");
diff --git a/drivers/rtc/rtc-at91rm9200.c b/drivers/rtc/rtc-at91rm9200.c
index 434ebc3a99dc..f07bd318260c 100644
--- a/drivers/rtc/rtc-at91rm9200.c
+++ b/drivers/rtc/rtc-at91rm9200.c
@@ -337,7 +337,7 @@ static int __exit at91_rtc_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
/* AT91RM9200 RTC Power management control */
@@ -369,39 +369,20 @@ static int at91_rtc_resume(struct device *dev)
}
return 0;
}
-
-static const struct dev_pm_ops at91_rtc_pm = {
- .suspend = at91_rtc_suspend,
- .resume = at91_rtc_resume,
-};
-
-#define at91_rtc_pm_ptr &at91_rtc_pm
-
-#else
-#define at91_rtc_pm_ptr NULL
#endif
+static SIMPLE_DEV_PM_OPS(at91_rtc_pm_ops, at91_rtc_suspend, at91_rtc_resume);
+
static struct platform_driver at91_rtc_driver = {
.remove = __exit_p(at91_rtc_remove),
.driver = {
.name = "at91_rtc",
.owner = THIS_MODULE,
- .pm = at91_rtc_pm_ptr,
+ .pm = &at91_rtc_pm_ops,
},
};
-static int __init at91_rtc_init(void)
-{
- return platform_driver_probe(&at91_rtc_driver, at91_rtc_probe);
-}
-
-static void __exit at91_rtc_exit(void)
-{
- platform_driver_unregister(&at91_rtc_driver);
-}
-
-module_init(at91_rtc_init);
-module_exit(at91_rtc_exit);
+module_platform_driver_probe(at91_rtc_driver, at91_rtc_probe);
MODULE_AUTHOR("Rick Bronson");
MODULE_DESCRIPTION("RTC driver for Atmel AT91RM9200");
diff --git a/drivers/rtc/rtc-au1xxx.c b/drivers/rtc/rtc-au1xxx.c
index b309da4ec745..7995abc391fc 100644
--- a/drivers/rtc/rtc-au1xxx.c
+++ b/drivers/rtc/rtc-au1xxx.c
@@ -101,7 +101,7 @@ static int au1xtoy_rtc_probe(struct platform_device *pdev)
while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_C0S)
msleep(1);
- rtcdev = rtc_device_register("rtc-au1xxx", &pdev->dev,
+ rtcdev = devm_rtc_device_register(&pdev->dev, "rtc-au1xxx",
&au1xtoy_rtc_ops, THIS_MODULE);
if (IS_ERR(rtcdev)) {
ret = PTR_ERR(rtcdev);
@@ -118,9 +118,6 @@ out_err:
static int au1xtoy_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtcdev = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtcdev);
platform_set_drvdata(pdev, NULL);
return 0;
@@ -134,18 +131,7 @@ static struct platform_driver au1xrtc_driver = {
.remove = au1xtoy_rtc_remove,
};
-static int __init au1xtoy_rtc_init(void)
-{
- return platform_driver_probe(&au1xrtc_driver, au1xtoy_rtc_probe);
-}
-
-static void __exit au1xtoy_rtc_exit(void)
-{
- platform_driver_unregister(&au1xrtc_driver);
-}
-
-module_init(au1xtoy_rtc_init);
-module_exit(au1xtoy_rtc_exit);
+module_platform_driver_probe(au1xrtc_driver, au1xtoy_rtc_probe);
MODULE_DESCRIPTION("Au1xxx TOY-counter-based RTC driver");
MODULE_AUTHOR("Manuel Lauss <manuel.lauss@gmail.com>");
diff --git a/drivers/rtc/rtc-bq32k.c b/drivers/rtc/rtc-bq32k.c
index 036cb89f8188..fea78bc713ca 100644
--- a/drivers/rtc/rtc-bq32k.c
+++ b/drivers/rtc/rtc-bq32k.c
@@ -153,7 +153,7 @@ static int bq32k_probe(struct i2c_client *client,
if (error)
return error;
- rtc = rtc_device_register(bq32k_driver.driver.name, &client->dev,
+ rtc = devm_rtc_device_register(&client->dev, bq32k_driver.driver.name,
&bq32k_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -165,9 +165,6 @@ static int bq32k_probe(struct i2c_client *client,
static int bq32k_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- rtc_device_unregister(rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-coh901331.c b/drivers/rtc/rtc-coh901331.c
index 2d28ec1aa1cd..bf0387f80d2d 100644
--- a/drivers/rtc/rtc-coh901331.c
+++ b/drivers/rtc/rtc-coh901331.c
@@ -155,7 +155,6 @@ static int __exit coh901331_remove(struct platform_device *pdev)
struct coh901331_port *rtap = dev_get_drvdata(&pdev->dev);
if (rtap) {
- rtc_device_unregister(rtap->rtc);
clk_unprepare(rtap->clk);
platform_set_drvdata(pdev, NULL);
}
@@ -211,8 +210,8 @@ static int __init coh901331_probe(struct platform_device *pdev)
clk_disable(rtap->clk);
platform_set_drvdata(pdev, rtap);
- rtap->rtc = rtc_device_register("coh901331", &pdev->dev, &coh901331_ops,
- THIS_MODULE);
+ rtap->rtc = devm_rtc_device_register(&pdev->dev, "coh901331",
+ &coh901331_ops, THIS_MODULE);
if (IS_ERR(rtap->rtc)) {
ret = PTR_ERR(rtap->rtc);
goto out_no_rtc;
@@ -287,18 +286,7 @@ static struct platform_driver coh901331_driver = {
.shutdown = coh901331_shutdown,
};
-static int __init coh901331_init(void)
-{
- return platform_driver_probe(&coh901331_driver, coh901331_probe);
-}
-
-static void __exit coh901331_exit(void)
-{
- platform_driver_unregister(&coh901331_driver);
-}
-
-module_init(coh901331_init);
-module_exit(coh901331_exit);
+module_platform_driver_probe(coh901331_driver, coh901331_probe);
MODULE_AUTHOR("Linus Walleij <linus.walleij@stericsson.com>");
MODULE_DESCRIPTION("ST-Ericsson AB COH 901 331 RTC Driver");
diff --git a/drivers/rtc/rtc-da9052.c b/drivers/rtc/rtc-da9052.c
index 0dde688ca09b..7286b279cf2d 100644
--- a/drivers/rtc/rtc-da9052.c
+++ b/drivers/rtc/rtc-da9052.c
@@ -239,17 +239,15 @@ static int da9052_rtc_probe(struct platform_device *pdev)
rtc->da9052 = dev_get_drvdata(pdev->dev.parent);
platform_set_drvdata(pdev, rtc);
- rtc->irq = platform_get_irq_byname(pdev, "ALM");
- ret = devm_request_threaded_irq(&pdev->dev, rtc->irq, NULL,
- da9052_rtc_irq,
- IRQF_TRIGGER_LOW | IRQF_ONESHOT,
- "ALM", rtc);
+ rtc->irq = DA9052_IRQ_ALARM;
+ ret = da9052_request_irq(rtc->da9052, rtc->irq, "ALM",
+ da9052_rtc_irq, rtc);
if (ret != 0) {
rtc_err(rtc->da9052, "irq registration failed: %d\n", ret);
return ret;
}
- rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&da9052_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc))
return PTR_ERR(rtc->rtc);
@@ -259,9 +257,6 @@ static int da9052_rtc_probe(struct platform_device *pdev)
static int da9052_rtc_remove(struct platform_device *pdev)
{
- struct da9052_rtc *rtc = pdev->dev.platform_data;
-
- rtc_device_unregister(rtc->rtc);
platform_set_drvdata(pdev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-da9055.c b/drivers/rtc/rtc-da9055.c
index 8f0dcfedb83c..73858ca9709a 100644
--- a/drivers/rtc/rtc-da9055.c
+++ b/drivers/rtc/rtc-da9055.c
@@ -294,7 +294,7 @@ static int da9055_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&da9055_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc)) {
ret = PTR_ERR(rtc->rtc);
@@ -317,9 +317,6 @@ err_rtc:
static int da9055_rtc_remove(struct platform_device *pdev)
{
- struct da9055_rtc *rtc = pdev->dev.platform_data;
-
- rtc_device_unregister(rtc->rtc);
platform_set_drvdata(pdev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-davinci.c b/drivers/rtc/rtc-davinci.c
index 56b73089bb29..a55048c3e26f 100644
--- a/drivers/rtc/rtc-davinci.c
+++ b/drivers/rtc/rtc-davinci.c
@@ -523,7 +523,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, davinci_rtc);
- davinci_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ davinci_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&davinci_rtc_ops, THIS_MODULE);
if (IS_ERR(davinci_rtc->rtc)) {
ret = PTR_ERR(davinci_rtc->rtc);
@@ -543,7 +543,7 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
0, "davinci_rtc", davinci_rtc);
if (ret < 0) {
dev_err(dev, "unable to register davinci RTC interrupt\n");
- goto fail2;
+ goto fail1;
}
/* Enable interrupts */
@@ -557,14 +557,12 @@ static int __init davinci_rtc_probe(struct platform_device *pdev)
return 0;
-fail2:
- rtc_device_unregister(davinci_rtc->rtc);
fail1:
platform_set_drvdata(pdev, NULL);
return ret;
}
-static int davinci_rtc_remove(struct platform_device *pdev)
+static int __exit davinci_rtc_remove(struct platform_device *pdev)
{
struct davinci_rtc *davinci_rtc = platform_get_drvdata(pdev);
@@ -572,8 +570,6 @@ static int davinci_rtc_remove(struct platform_device *pdev)
rtcif_write(davinci_rtc, 0, PRTCIF_INTEN);
- rtc_device_unregister(davinci_rtc->rtc);
-
platform_set_drvdata(pdev, NULL);
return 0;
@@ -581,24 +577,14 @@ static int davinci_rtc_remove(struct platform_device *pdev)
static struct platform_driver davinci_rtc_driver = {
.probe = davinci_rtc_probe,
- .remove = davinci_rtc_remove,
+ .remove = __exit_p(davinci_rtc_remove),
.driver = {
.name = "rtc_davinci",
.owner = THIS_MODULE,
},
};
-static int __init rtc_init(void)
-{
- return platform_driver_probe(&davinci_rtc_driver, davinci_rtc_probe);
-}
-module_init(rtc_init);
-
-static void __exit rtc_exit(void)
-{
- platform_driver_unregister(&davinci_rtc_driver);
-}
-module_exit(rtc_exit);
+module_platform_driver_probe(davinci_rtc_driver, davinci_rtc_probe);
MODULE_AUTHOR("Miguel Aguilar <miguel.aguilar@ridgerun.com>");
MODULE_DESCRIPTION("Texas Instruments DaVinci PRTC Driver");
diff --git a/drivers/rtc/rtc-dm355evm.c b/drivers/rtc/rtc-dm355evm.c
index b2ed2c94b081..1e1ca63d58a9 100644
--- a/drivers/rtc/rtc-dm355evm.c
+++ b/drivers/rtc/rtc-dm355evm.c
@@ -127,8 +127,8 @@ static int dm355evm_rtc_probe(struct platform_device *pdev)
{
struct rtc_device *rtc;
- rtc = rtc_device_register(pdev->name,
- &pdev->dev, &dm355evm_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+ &dm355evm_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc)) {
dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
PTR_ERR(rtc));
@@ -141,9 +141,6 @@ static int dm355evm_rtc_probe(struct platform_device *pdev)
static int dm355evm_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc);
platform_set_drvdata(pdev, NULL);
return 0;
}
diff --git a/drivers/rtc/rtc-ds1286.c b/drivers/rtc/rtc-ds1286.c
index d989412a348a..d120cb8bfcbe 100644
--- a/drivers/rtc/rtc-ds1286.c
+++ b/drivers/rtc/rtc-ds1286.c
@@ -270,7 +270,6 @@ static int ds1286_set_time(struct device *dev, struct rtc_time *tm)
static int ds1286_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
{
struct ds1286_priv *priv = dev_get_drvdata(dev);
- unsigned char cmd;
unsigned long flags;
/*
@@ -281,7 +280,7 @@ static int ds1286_read_alarm(struct device *dev, struct rtc_wkalrm *alm)
alm->time.tm_min = ds1286_rtc_read(priv, RTC_MINUTES_ALARM) & 0x7f;
alm->time.tm_hour = ds1286_rtc_read(priv, RTC_HOURS_ALARM) & 0x1f;
alm->time.tm_wday = ds1286_rtc_read(priv, RTC_DAY_ALARM) & 0x07;
- cmd = ds1286_rtc_read(priv, RTC_CMD);
+ ds1286_rtc_read(priv, RTC_CMD);
spin_unlock_irqrestore(&priv->lock, flags);
alm->time.tm_min = bcd2bin(alm->time.tm_min);
diff --git a/drivers/rtc/rtc-ds1302.c b/drivers/rtc/rtc-ds1302.c
index fdbcdb289d60..d13954346286 100644
--- a/drivers/rtc/rtc-ds1302.c
+++ b/drivers/rtc/rtc-ds1302.c
@@ -224,7 +224,7 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev)
return -ENODEV;
}
- rtc = rtc_device_register("ds1302", &pdev->dev,
+ rtc = devm_rtc_device_register(&pdev->dev, "ds1302",
&ds1302_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -234,11 +234,8 @@ static int __init ds1302_rtc_probe(struct platform_device *pdev)
return 0;
}
-static int ds1302_rtc_remove(struct platform_device *pdev)
+static int __exit ds1302_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc);
platform_set_drvdata(pdev, NULL);
return 0;
@@ -249,21 +246,10 @@ static struct platform_driver ds1302_platform_driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
},
- .remove = ds1302_rtc_remove,
+ .remove = __exit_p(ds1302_rtc_remove),
};
-static int __init ds1302_rtc_init(void)
-{
- return platform_driver_probe(&ds1302_platform_driver, ds1302_rtc_probe);
-}
-
-static void __exit ds1302_rtc_exit(void)
-{
- platform_driver_unregister(&ds1302_platform_driver);
-}
-
-module_init(ds1302_rtc_init);
-module_exit(ds1302_rtc_exit);
+module_platform_driver_probe(ds1302_platform_driver, ds1302_rtc_probe);
MODULE_DESCRIPTION("Dallas DS1302 RTC driver");
MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/rtc/rtc-ds1307.c b/drivers/rtc/rtc-ds1307.c
index 970a236b147a..b859d3c67f11 100644
--- a/drivers/rtc/rtc-ds1307.c
+++ b/drivers/rtc/rtc-ds1307.c
@@ -4,6 +4,7 @@
* Copyright (C) 2005 James Chapman (ds1337 core)
* Copyright (C) 2006 David Brownell
* Copyright (C) 2009 Matthias Fuchs (rx8025 support)
+ * Copyright (C) 2012 Bertrand Achard (nvram access fixes)
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -196,7 +197,7 @@ static s32 ds1307_read_block_data_once(const struct i2c_client *client,
static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command,
u8 length, u8 *values)
{
- u8 oldvalues[I2C_SMBUS_BLOCK_MAX];
+ u8 oldvalues[255];
s32 ret;
int tries = 0;
@@ -222,7 +223,7 @@ static s32 ds1307_read_block_data(const struct i2c_client *client, u8 command,
static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command,
u8 length, const u8 *values)
{
- u8 currvalues[I2C_SMBUS_BLOCK_MAX];
+ u8 currvalues[255];
int tries = 0;
dev_dbg(&client->dev, "ds1307_write_block_data (length=%d)\n", length);
@@ -250,6 +251,57 @@ static s32 ds1307_write_block_data(const struct i2c_client *client, u8 command,
/*----------------------------------------------------------------------*/
+/* These RTC devices are not designed to be connected to a SMbus adapter.
+ SMbus limits block operations length to 32 bytes, whereas it's not
+ limited on I2C buses. As a result, accesses may exceed 32 bytes;
+ in that case, split them into smaller blocks */
+
+static s32 ds1307_native_smbus_write_block_data(const struct i2c_client *client,
+ u8 command, u8 length, const u8 *values)
+{
+ u8 suboffset = 0;
+
+ if (length <= I2C_SMBUS_BLOCK_MAX)
+ return i2c_smbus_write_i2c_block_data(client,
+ command, length, values);
+
+ while (suboffset < length) {
+ s32 retval = i2c_smbus_write_i2c_block_data(client,
+ command + suboffset,
+ min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
+ values + suboffset);
+ if (retval < 0)
+ return retval;
+
+ suboffset += I2C_SMBUS_BLOCK_MAX;
+ }
+ return length;
+}
+
+static s32 ds1307_native_smbus_read_block_data(const struct i2c_client *client,
+ u8 command, u8 length, u8 *values)
+{
+ u8 suboffset = 0;
+
+ if (length <= I2C_SMBUS_BLOCK_MAX)
+ return i2c_smbus_read_i2c_block_data(client,
+ command, length, values);
+
+ while (suboffset < length) {
+ s32 retval = i2c_smbus_read_i2c_block_data(client,
+ command + suboffset,
+ min(I2C_SMBUS_BLOCK_MAX, length - suboffset),
+ values + suboffset);
+ if (retval < 0)
+ return retval;
+
+ suboffset += I2C_SMBUS_BLOCK_MAX;
+ }
+ return length;
+}
+
+/*----------------------------------------------------------------------*/
+
/*
* The IRQ logic includes a "real" handler running in IRQ context just
* long enough to schedule this workqueue entry. We need a task context
@@ -646,8 +698,8 @@ static int ds1307_probe(struct i2c_client *client,
buf = ds1307->regs;
if (i2c_check_functionality(adapter, I2C_FUNC_SMBUS_I2C_BLOCK)) {
- ds1307->read_block_data = i2c_smbus_read_i2c_block_data;
- ds1307->write_block_data = i2c_smbus_write_i2c_block_data;
+ ds1307->read_block_data = ds1307_native_smbus_read_block_data;
+ ds1307->write_block_data = ds1307_native_smbus_write_block_data;
} else {
ds1307->read_block_data = ds1307_read_block_data;
ds1307->write_block_data = ds1307_write_block_data;
@@ -661,7 +713,7 @@ static int ds1307_probe(struct i2c_client *client,
tmp = ds1307->read_block_data(ds1307->client,
DS1337_REG_CONTROL, 2, buf);
if (tmp != 2) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
@@ -700,7 +752,7 @@ static int ds1307_probe(struct i2c_client *client,
tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
if (tmp != 2) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
@@ -744,7 +796,7 @@ static int ds1307_probe(struct i2c_client *client,
tmp = i2c_smbus_read_i2c_block_data(ds1307->client,
RX8025_REG_CTRL1 << 4 | 0x08, 2, buf);
if (tmp != 2) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
@@ -772,7 +824,7 @@ read_rtc:
/* read RTC registers */
tmp = ds1307->read_block_data(ds1307->client, ds1307->offset, 8, buf);
if (tmp != 8) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
@@ -814,7 +866,7 @@ read_rtc:
tmp = i2c_smbus_read_byte_data(client, DS1340_REG_FLAG);
if (tmp < 0) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
index 6a3fcfe3b0e7..6ce8a997cf51 100644
--- a/drivers/rtc/rtc-ds1511.c
+++ b/drivers/rtc/rtc-ds1511.c
@@ -538,15 +538,14 @@ static int ds1511_rtc_probe(struct platform_device *pdev)
}
}
- rtc = rtc_device_register(pdev->name, &pdev->dev, &ds1511_rtc_ops,
- THIS_MODULE);
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &ds1511_rtc_ops,
+ THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
pdata->rtc = rtc;
ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr);
- if (ret)
- rtc_device_unregister(pdata->rtc);
+
return ret;
}
@@ -555,7 +554,6 @@ static int ds1511_rtc_remove(struct platform_device *pdev)
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
sysfs_remove_bin_file(&pdev->dev.kobj, &ds1511_nvram_attr);
- rtc_device_unregister(pdata->rtc);
if (pdata->irq > 0) {
/*
* disable the alarm interrupt
diff --git a/drivers/rtc/rtc-ds1553.c b/drivers/rtc/rtc-ds1553.c
index 25ce0621ade9..8c6c952e90b1 100644
--- a/drivers/rtc/rtc-ds1553.c
+++ b/drivers/rtc/rtc-ds1553.c
@@ -326,15 +326,14 @@ static int ds1553_rtc_probe(struct platform_device *pdev)
}
}
- rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&ds1553_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
pdata->rtc = rtc;
ret = sysfs_create_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
- if (ret)
- rtc_device_unregister(rtc);
+
return ret;
}
@@ -343,7 +342,6 @@ static int ds1553_rtc_remove(struct platform_device *pdev)
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
sysfs_remove_bin_file(&pdev->dev.kobj, &ds1553_nvram_attr);
- rtc_device_unregister(pdata->rtc);
if (pdata->irq > 0)
writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
return 0;
diff --git a/drivers/rtc/rtc-ds1672.c b/drivers/rtc/rtc-ds1672.c
index 45d65c0b3a85..3fc2a4738027 100644
--- a/drivers/rtc/rtc-ds1672.c
+++ b/drivers/rtc/rtc-ds1672.c
@@ -155,11 +155,6 @@ static const struct rtc_class_ops ds1672_rtc_ops = {
static int ds1672_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
return 0;
}
@@ -177,7 +172,7 @@ static int ds1672_probe(struct i2c_client *client,
dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
- rtc = rtc_device_register(ds1672_driver.driver.name, &client->dev,
+ rtc = devm_rtc_device_register(&client->dev, ds1672_driver.driver.name,
&ds1672_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
@@ -202,7 +197,6 @@ static int ds1672_probe(struct i2c_client *client,
return 0;
exit_devreg:
- rtc_device_unregister(rtc);
return err;
}
diff --git a/drivers/rtc/rtc-ds1742.c b/drivers/rtc/rtc-ds1742.c
index 609c870e2cc5..eccdc62ae1c0 100644
--- a/drivers/rtc/rtc-ds1742.c
+++ b/drivers/rtc/rtc-ds1742.c
@@ -208,17 +208,14 @@ static int ds1742_rtc_probe(struct platform_device *pdev)
pdata->last_jiffies = jiffies;
platform_set_drvdata(pdev, pdata);
- rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&ds1742_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
pdata->rtc = rtc;
ret = sysfs_create_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
- if (ret) {
- dev_err(&pdev->dev, "creating nvram file in sysfs failed\n");
- rtc_device_unregister(rtc);
- }
+
return ret;
}
@@ -227,7 +224,6 @@ static int ds1742_rtc_remove(struct platform_device *pdev)
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
sysfs_remove_bin_file(&pdev->dev.kobj, &pdata->nvram_attr);
- rtc_device_unregister(pdata->rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-ds3234.c b/drivers/rtc/rtc-ds3234.c
index 7a4495ef1c39..a66efd49430e 100644
--- a/drivers/rtc/rtc-ds3234.c
+++ b/drivers/rtc/rtc-ds3234.c
@@ -146,8 +146,8 @@ static int ds3234_probe(struct spi_device *spi)
ds3234_get_reg(&spi->dev, DS3234_REG_CONT_STAT, &tmp);
dev_info(&spi->dev, "Ctrl/Stat Reg: 0x%02x\n", tmp);
- rtc = rtc_device_register("ds3234",
- &spi->dev, &ds3234_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, "ds3234",
+ &ds3234_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -158,9 +158,6 @@ static int ds3234_probe(struct spi_device *spi)
static int ds3234_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = spi_get_drvdata(spi);
-
- rtc_device_unregister(rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-efi.c b/drivers/rtc/rtc-efi.c
index 1a0c37c9152b..b3c8c0b1709d 100644
--- a/drivers/rtc/rtc-efi.c
+++ b/drivers/rtc/rtc-efi.c
@@ -191,7 +191,7 @@ static int __init efi_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
- rtc = rtc_device_register("rtc-efi", &dev->dev, &efi_rtc_ops,
+ rtc = devm_rtc_device_register(&dev->dev, "rtc-efi", &efi_rtc_ops,
THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -203,10 +203,6 @@ static int __init efi_rtc_probe(struct platform_device *dev)
static int __exit efi_rtc_remove(struct platform_device *dev)
{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
- rtc_device_unregister(rtc);
-
return 0;
}
@@ -218,18 +214,7 @@ static struct platform_driver efi_rtc_driver = {
.remove = __exit_p(efi_rtc_remove),
};
-static int __init efi_rtc_init(void)
-{
- return platform_driver_probe(&efi_rtc_driver, efi_rtc_probe);
-}
-
-static void __exit efi_rtc_exit(void)
-{
- platform_driver_unregister(&efi_rtc_driver);
-}
-
-module_init(efi_rtc_init);
-module_exit(efi_rtc_exit);
+module_platform_driver_probe(efi_rtc_driver, efi_rtc_probe);
MODULE_AUTHOR("dann frazier <dannf@hp.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-em3027.c b/drivers/rtc/rtc-em3027.c
index f6c24ce35d36..3f9eb57d0486 100644
--- a/drivers/rtc/rtc-em3027.c
+++ b/drivers/rtc/rtc-em3027.c
@@ -121,7 +121,7 @@ static int em3027_probe(struct i2c_client *client,
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C))
return -ENODEV;
- rtc = rtc_device_register(em3027_driver.driver.name, &client->dev,
+ rtc = devm_rtc_device_register(&client->dev, em3027_driver.driver.name,
&em3027_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -133,11 +133,6 @@ static int em3027_probe(struct i2c_client *client,
static int em3027_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-ep93xx.c b/drivers/rtc/rtc-ep93xx.c
index 1a4e5e4a70cd..5807b77c444a 100644
--- a/drivers/rtc/rtc-ep93xx.c
+++ b/drivers/rtc/rtc-ep93xx.c
@@ -153,8 +153,8 @@ static int ep93xx_rtc_probe(struct platform_device *pdev)
pdev->dev.platform_data = ep93xx_rtc;
platform_set_drvdata(pdev, ep93xx_rtc);
- ep93xx_rtc->rtc = rtc_device_register(pdev->name,
- &pdev->dev, &ep93xx_rtc_ops, THIS_MODULE);
+ ep93xx_rtc->rtc = devm_rtc_device_register(&pdev->dev,
+ pdev->name, &ep93xx_rtc_ops, THIS_MODULE);
if (IS_ERR(ep93xx_rtc->rtc)) {
err = PTR_ERR(ep93xx_rtc->rtc);
goto exit;
@@ -162,12 +162,10 @@ static int ep93xx_rtc_probe(struct platform_device *pdev)
err = sysfs_create_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files);
if (err)
- goto fail;
+ goto exit;
return 0;
-fail:
- rtc_device_unregister(ep93xx_rtc->rtc);
exit:
platform_set_drvdata(pdev, NULL);
pdev->dev.platform_data = NULL;
@@ -176,11 +174,8 @@ exit:
static int ep93xx_rtc_remove(struct platform_device *pdev)
{
- struct ep93xx_rtc *ep93xx_rtc = platform_get_drvdata(pdev);
-
sysfs_remove_group(&pdev->dev.kobj, &ep93xx_rtc_sysfs_files);
platform_set_drvdata(pdev, NULL);
- rtc_device_unregister(ep93xx_rtc->rtc);
pdev->dev.platform_data = NULL;
return 0;
diff --git a/drivers/rtc/rtc-fm3130.c b/drivers/rtc/rtc-fm3130.c
index bff3cdc5140e..4d4ad3fcb5e9 100644
--- a/drivers/rtc/rtc-fm3130.c
+++ b/drivers/rtc/rtc-fm3130.c
@@ -395,7 +395,7 @@ static int fm3130_probe(struct i2c_client *client,
tmp = i2c_transfer(adapter, fm3130->msg, 4);
if (tmp != 4) {
- pr_debug("read error %d\n", tmp);
+ dev_dbg(&client->dev, "read error %d\n", tmp);
err = -EIO;
goto exit_free;
}
diff --git a/drivers/rtc/rtc-generic.c b/drivers/rtc/rtc-generic.c
index 98322004ad2e..06279ce6bff2 100644
--- a/drivers/rtc/rtc-generic.c
+++ b/drivers/rtc/rtc-generic.c
@@ -38,8 +38,8 @@ static int __init generic_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
- rtc = rtc_device_register("rtc-generic", &dev->dev, &generic_rtc_ops,
- THIS_MODULE);
+ rtc = devm_rtc_device_register(&dev->dev, "rtc-generic",
+ &generic_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -50,10 +50,6 @@ static int __init generic_rtc_probe(struct platform_device *dev)
static int __exit generic_rtc_remove(struct platform_device *dev)
{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
- rtc_device_unregister(rtc);
-
return 0;
}
@@ -65,18 +61,7 @@ static struct platform_driver generic_rtc_driver = {
.remove = __exit_p(generic_rtc_remove),
};
-static int __init generic_rtc_init(void)
-{
- return platform_driver_probe(&generic_rtc_driver, generic_rtc_probe);
-}
-
-static void __exit generic_rtc_fini(void)
-{
- platform_driver_unregister(&generic_rtc_driver);
-}
-
-module_init(generic_rtc_init);
-module_exit(generic_rtc_fini);
+module_platform_driver_probe(generic_rtc_driver, generic_rtc_probe);
MODULE_AUTHOR("Kyle McMartin <kyle@mcmartin.ca>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-hid-sensor-time.c b/drivers/rtc/rtc-hid-sensor-time.c
index 31c5728ef629..63024505dddc 100644
--- a/drivers/rtc/rtc-hid-sensor-time.c
+++ b/drivers/rtc/rtc-hid-sensor-time.c
@@ -255,8 +255,9 @@ static int hid_time_probe(struct platform_device *pdev)
return ret;
}
- time_state->rtc = rtc_device_register("hid-sensor-time",
- &pdev->dev, &hid_time_rtc_ops, THIS_MODULE);
+ time_state->rtc = devm_rtc_device_register(&pdev->dev,
+ "hid-sensor-time", &hid_time_rtc_ops,
+ THIS_MODULE);
if (IS_ERR(time_state->rtc)) {
dev_err(&pdev->dev, "rtc device register failed!\n");
@@ -269,9 +270,7 @@ static int hid_time_probe(struct platform_device *pdev)
static int hid_time_remove(struct platform_device *pdev)
{
struct hid_sensor_hub_device *hsdev = pdev->dev.platform_data;
- struct hid_time_state *time_state = platform_get_drvdata(pdev);
- rtc_device_unregister(time_state->rtc);
sensor_hub_remove_callback(hsdev, HID_USAGE_SENSOR_TIME);
return 0;
diff --git a/drivers/rtc/rtc-imxdi.c b/drivers/rtc/rtc-imxdi.c
index 82aad695979e..d3a8c8e255de 100644
--- a/drivers/rtc/rtc-imxdi.c
+++ b/drivers/rtc/rtc-imxdi.c
@@ -369,7 +369,7 @@ static void dryice_work(struct work_struct *work)
/*
* probe for dryice rtc device
*/
-static int dryice_rtc_probe(struct platform_device *pdev)
+static int __init dryice_rtc_probe(struct platform_device *pdev)
{
struct resource *res;
struct imxdi_dev *imxdi;
@@ -464,7 +464,7 @@ static int dryice_rtc_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, imxdi);
- imxdi->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ imxdi->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&dryice_rtc_ops, THIS_MODULE);
if (IS_ERR(imxdi->rtc)) {
rc = PTR_ERR(imxdi->rtc);
@@ -479,7 +479,7 @@ err:
return rc;
}
-static int dryice_rtc_remove(struct platform_device *pdev)
+static int __exit dryice_rtc_remove(struct platform_device *pdev)
{
struct imxdi_dev *imxdi = platform_get_drvdata(pdev);
@@ -488,8 +488,6 @@ static int dryice_rtc_remove(struct platform_device *pdev)
/* mask all interrupts */
__raw_writel(0, imxdi->ioaddr + DIER);
- rtc_device_unregister(imxdi->rtc);
-
clk_disable_unprepare(imxdi->clk);
return 0;
@@ -510,21 +508,10 @@ static struct platform_driver dryice_rtc_driver = {
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(dryice_dt_ids),
},
- .remove = dryice_rtc_remove,
+ .remove = __exit_p(dryice_rtc_remove),
};
-static int __init dryice_rtc_init(void)
-{
- return platform_driver_probe(&dryice_rtc_driver, dryice_rtc_probe);
-}
-
-static void __exit dryice_rtc_exit(void)
-{
- platform_driver_unregister(&dryice_rtc_driver);
-}
-
-module_init(dryice_rtc_init);
-module_exit(dryice_rtc_exit);
+module_platform_driver_probe(dryice_rtc_driver, dryice_rtc_probe);
MODULE_AUTHOR("Freescale Semiconductor, Inc.");
MODULE_AUTHOR("Baruch Siach <baruch@tkos.co.il>");
diff --git a/drivers/rtc/rtc-lp8788.c b/drivers/rtc/rtc-lp8788.c
index 9a4631218f41..9853ac15b296 100644
--- a/drivers/rtc/rtc-lp8788.c
+++ b/drivers/rtc/rtc-lp8788.c
@@ -299,7 +299,7 @@ static int lp8788_rtc_probe(struct platform_device *pdev)
device_init_wakeup(dev, 1);
- rtc->rdev = rtc_device_register("lp8788_rtc", dev,
+ rtc->rdev = devm_rtc_device_register(dev, "lp8788_rtc",
&lp8788_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rdev)) {
dev_err(dev, "can not register rtc device\n");
@@ -314,9 +314,6 @@ static int lp8788_rtc_probe(struct platform_device *pdev)
static int lp8788_rtc_remove(struct platform_device *pdev)
{
- struct lp8788_rtc *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc->rdev);
platform_set_drvdata(pdev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-lpc32xx.c b/drivers/rtc/rtc-lpc32xx.c
index 40a598332bac..787550d756e9 100644
--- a/drivers/rtc/rtc-lpc32xx.c
+++ b/drivers/rtc/rtc-lpc32xx.c
@@ -273,8 +273,8 @@ static int lpc32xx_rtc_probe(struct platform_device *pdev)
platform_set_drvdata(pdev, rtc);
- rtc->rtc = rtc_device_register(RTC_NAME, &pdev->dev, &lpc32xx_rtc_ops,
- THIS_MODULE);
+ rtc->rtc = devm_rtc_device_register(&pdev->dev, RTC_NAME,
+ &lpc32xx_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc)) {
dev_err(&pdev->dev, "Can't get RTC\n");
platform_set_drvdata(pdev, NULL);
@@ -307,7 +307,6 @@ static int lpc32xx_rtc_remove(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 0);
platform_set_drvdata(pdev, NULL);
- rtc_device_unregister(rtc->rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-ls1x.c b/drivers/rtc/rtc-ls1x.c
index f59b6349551a..db82f91f4562 100644
--- a/drivers/rtc/rtc-ls1x.c
+++ b/drivers/rtc/rtc-ls1x.c
@@ -172,7 +172,7 @@ static int ls1x_rtc_probe(struct platform_device *pdev)
while (readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_TTS)
usleep_range(1000, 3000);
- rtcdev = rtc_device_register("ls1x-rtc", &pdev->dev,
+ rtcdev = devm_rtc_device_register(&pdev->dev, "ls1x-rtc",
&ls1x_rtc_ops , THIS_MODULE);
if (IS_ERR(rtcdev)) {
ret = PTR_ERR(rtcdev);
@@ -187,9 +187,6 @@ err:
static int ls1x_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtcdev = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtcdev);
platform_set_drvdata(pdev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-m41t93.c b/drivers/rtc/rtc-m41t93.c
index 49169680786e..cfc21a1608ab 100644
--- a/drivers/rtc/rtc-m41t93.c
+++ b/drivers/rtc/rtc-m41t93.c
@@ -184,8 +184,8 @@ static int m41t93_probe(struct spi_device *spi)
return -ENODEV;
}
- rtc = rtc_device_register(m41t93_driver.driver.name,
- &spi->dev, &m41t93_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, m41t93_driver.driver.name,
+ &m41t93_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -197,11 +197,6 @@ static int m41t93_probe(struct spi_device *spi)
static int m41t93_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = spi_get_drvdata(spi);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-m41t94.c b/drivers/rtc/rtc-m41t94.c
index 89266c6764bc..cf655a91e385 100644
--- a/drivers/rtc/rtc-m41t94.c
+++ b/drivers/rtc/rtc-m41t94.c
@@ -124,8 +124,8 @@ static int m41t94_probe(struct spi_device *spi)
return res;
}
- rtc = rtc_device_register(m41t94_driver.driver.name,
- &spi->dev, &m41t94_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, m41t94_driver.driver.name,
+ &m41t94_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -136,11 +136,6 @@ static int m41t94_probe(struct spi_device *spi)
static int m41t94_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = spi_get_drvdata(spi);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-m48t86.c b/drivers/rtc/rtc-m48t86.c
index 2ffbcacd2439..33a91c484533 100644
--- a/drivers/rtc/rtc-m48t86.c
+++ b/drivers/rtc/rtc-m48t86.c
@@ -148,8 +148,10 @@ static int m48t86_rtc_probe(struct platform_device *dev)
{
unsigned char reg;
struct m48t86_ops *ops = dev->dev.platform_data;
- struct rtc_device *rtc = rtc_device_register("m48t86",
- &dev->dev, &m48t86_rtc_ops, THIS_MODULE);
+ struct rtc_device *rtc;
+
+ rtc = devm_rtc_device_register(&dev->dev, "m48t86",
+ &m48t86_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -166,11 +168,6 @@ static int m48t86_rtc_probe(struct platform_device *dev)
static int m48t86_rtc_remove(struct platform_device *dev)
{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
platform_set_drvdata(dev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-max6900.c b/drivers/rtc/rtc-max6900.c
index a00e33204b91..8669d6d09a00 100644
--- a/drivers/rtc/rtc-max6900.c
+++ b/drivers/rtc/rtc-max6900.c
@@ -214,11 +214,6 @@ static int max6900_rtc_set_time(struct device *dev, struct rtc_time *tm)
static int max6900_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
return 0;
}
@@ -237,8 +232,8 @@ max6900_probe(struct i2c_client *client, const struct i2c_device_id *id)
dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
- rtc = rtc_device_register(max6900_driver.driver.name,
- &client->dev, &max6900_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&client->dev, max6900_driver.driver.name,
+ &max6900_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
diff --git a/drivers/rtc/rtc-max6902.c b/drivers/rtc/rtc-max6902.c
index 7d0bf698b79e..7e4491b1f90b 100644
--- a/drivers/rtc/rtc-max6902.c
+++ b/drivers/rtc/rtc-max6902.c
@@ -134,8 +134,8 @@ static int max6902_probe(struct spi_device *spi)
if (res != 0)
return res;
- rtc = rtc_device_register("max6902",
- &spi->dev, &max6902_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, "max6902",
+ &max6902_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -145,9 +145,6 @@ static int max6902_probe(struct spi_device *spi)
static int max6902_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = dev_get_drvdata(&spi->dev);
-
- rtc_device_unregister(rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-max77686.c b/drivers/rtc/rtc-max77686.c
index 6b1337f9baf4..5a12b32f77ec 100644
--- a/drivers/rtc/rtc-max77686.c
+++ b/drivers/rtc/rtc-max77686.c
@@ -24,7 +24,7 @@
/* RTC Control Register */
#define BCD_EN_SHIFT 0
-#define BCD_EN_MASK (1 << BCD_EN_SHIFT)
+#define BCD_EN_MASK (1 << BCD_EN_SHIFT)
#define MODEL24_SHIFT 1
#define MODEL24_MASK (1 << MODEL24_SHIFT)
/* RTC Update Register1 */
@@ -33,12 +33,12 @@
#define RTC_RBUDR_SHIFT 4
#define RTC_RBUDR_MASK (1 << RTC_RBUDR_SHIFT)
/* WTSR and SMPL Register */
-#define WTSRT_SHIFT 0
-#define SMPLT_SHIFT 2
+#define WTSRT_SHIFT 0
+#define SMPLT_SHIFT 2
#define WTSR_EN_SHIFT 6
#define SMPL_EN_SHIFT 7
-#define WTSRT_MASK (3 << WTSRT_SHIFT)
-#define SMPLT_MASK (3 << SMPLT_SHIFT)
+#define WTSRT_MASK (3 << WTSRT_SHIFT)
+#define SMPLT_MASK (3 << SMPLT_SHIFT)
#define WTSR_EN_MASK (1 << WTSR_EN_SHIFT)
#define SMPL_EN_MASK (1 << SMPL_EN_SHIFT)
/* RTC Hour register */
@@ -466,7 +466,7 @@ static void max77686_rtc_enable_smpl(struct max77686_rtc_info *info, bool enable
val = 0;
regmap_read(info->max77686->rtc_regmap, MAX77686_WTSR_SMPL_CNTL, &val);
- pr_info("%s: WTSR_SMPL(0x%02x)\n", __func__, val);
+ dev_info(info->dev, "%s: WTSR_SMPL(0x%02x)\n", __func__, val);
}
#endif /* MAX77686_RTC_WTSR_SMPL */
@@ -505,7 +505,8 @@ static int max77686_rtc_probe(struct platform_device *pdev)
dev_info(&pdev->dev, "%s\n", __func__);
- info = kzalloc(sizeof(struct max77686_rtc_info), GFP_KERNEL);
+ info = devm_kzalloc(&pdev->dev, sizeof(struct max77686_rtc_info),
+ GFP_KERNEL);
if (!info)
return -ENOMEM;
@@ -519,7 +520,6 @@ static int max77686_rtc_probe(struct platform_device *pdev)
ret = PTR_ERR(info->max77686->rtc_regmap);
dev_err(info->max77686->dev, "Failed to allocate register map: %d\n",
ret);
- kfree(info);
return ret;
}
platform_set_drvdata(pdev, info);
@@ -538,8 +538,8 @@ static int max77686_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- info->rtc_dev = rtc_device_register("max77686-rtc", &pdev->dev,
- &max77686_rtc_ops, THIS_MODULE);
+ info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max77686-rtc",
+ &max77686_rtc_ops, THIS_MODULE);
if (IS_ERR(info->rtc_dev)) {
dev_info(&pdev->dev, "%s: fail\n", __func__);
@@ -555,32 +555,20 @@ static int max77686_rtc_probe(struct platform_device *pdev)
goto err_rtc;
info->virq = virq;
- ret = request_threaded_irq(virq, NULL, max77686_rtc_alarm_irq, 0,
- "rtc-alarm0", info);
+ ret = devm_request_threaded_irq(&pdev->dev, virq, NULL,
+ max77686_rtc_alarm_irq, 0, "rtc-alarm0", info);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
info->virq, ret);
goto err_rtc;
}
- goto out;
err_rtc:
- kfree(info);
- return ret;
-out:
return ret;
}
static int max77686_rtc_remove(struct platform_device *pdev)
{
- struct max77686_rtc_info *info = platform_get_drvdata(pdev);
-
- if (info) {
- free_irq(info->virq, info);
- rtc_device_unregister(info->rtc_dev);
- kfree(info);
- }
-
return 0;
}
@@ -594,11 +582,14 @@ static void max77686_rtc_shutdown(struct platform_device *pdev)
for (i = 0; i < 3; i++) {
max77686_rtc_enable_wtsr(info, false);
regmap_read(info->max77686->rtc_regmap, MAX77686_WTSR_SMPL_CNTL, &val);
- pr_info("%s: WTSR_SMPL reg(0x%02x)\n", __func__, val);
- if (val & WTSR_EN_MASK)
- pr_emerg("%s: fail to disable WTSR\n", __func__);
- else {
- pr_info("%s: success to disable WTSR\n", __func__);
+ dev_info(info->dev, "%s: WTSR_SMPL reg(0x%02x)\n", __func__,
+ val);
+ if (val & WTSR_EN_MASK) {
+ dev_emerg(info->dev, "%s: fail to disable WTSR\n",
+ __func__);
+ } else {
+ dev_info(info->dev, "%s: success to disable WTSR\n",
+ __func__);
break;
}
}
@@ -624,18 +615,8 @@ static struct platform_driver max77686_rtc_driver = {
.id_table = rtc_id,
};
-static int __init max77686_rtc_init(void)
-{
- return platform_driver_register(&max77686_rtc_driver);
-}
-module_init(max77686_rtc_init);
-
-static void __exit max77686_rtc_exit(void)
-{
- platform_driver_unregister(&max77686_rtc_driver);
-}
-module_exit(max77686_rtc_exit);
+module_platform_driver(max77686_rtc_driver);
MODULE_DESCRIPTION("Maxim MAX77686 RTC driver");
-MODULE_AUTHOR("<woong.byun@samsung.com>");
+MODULE_AUTHOR("Chiwoong Byun <woong.byun@samsung.com>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-max8907.c b/drivers/rtc/rtc-max8907.c
index 31ca8faf9f05..9d62cdb83d11 100644
--- a/drivers/rtc/rtc-max8907.c
+++ b/drivers/rtc/rtc-max8907.c
@@ -190,7 +190,7 @@ static int max8907_rtc_probe(struct platform_device *pdev)
rtc->max8907 = max8907;
rtc->regmap = max8907->regmap_rtc;
- rtc->rtc_dev = rtc_device_register("max8907-rtc", &pdev->dev,
+ rtc->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8907-rtc",
&max8907_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc_dev)) {
ret = PTR_ERR(rtc->rtc_dev);
@@ -217,16 +217,11 @@ static int max8907_rtc_probe(struct platform_device *pdev)
return 0;
err_unregister:
- rtc_device_unregister(rtc->rtc_dev);
return ret;
}
static int max8907_rtc_remove(struct platform_device *pdev)
{
- struct max8907_rtc *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc->rtc_dev);
-
return 0;
}
diff --git a/drivers/rtc/rtc-max8997.c b/drivers/rtc/rtc-max8997.c
index 00e505b6bee3..d12acc49c822 100644
--- a/drivers/rtc/rtc-max8997.c
+++ b/drivers/rtc/rtc-max8997.c
@@ -479,8 +479,8 @@ static int max8997_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- info->rtc_dev = rtc_device_register("max8997-rtc", &pdev->dev,
- &max8997_rtc_ops, THIS_MODULE);
+ info->rtc_dev = devm_rtc_device_register(&pdev->dev, "max8997-rtc",
+ &max8997_rtc_ops, THIS_MODULE);
if (IS_ERR(info->rtc_dev)) {
ret = PTR_ERR(info->rtc_dev);
@@ -507,17 +507,11 @@ static int max8997_rtc_probe(struct platform_device *pdev)
return ret;
err_out:
- rtc_device_unregister(info->rtc_dev);
return ret;
}
static int max8997_rtc_remove(struct platform_device *pdev)
{
- struct max8997_rtc_info *info = platform_get_drvdata(pdev);
-
- if (info)
- rtc_device_unregister(info->rtc_dev);
-
return 0;
}
diff --git a/drivers/rtc/rtc-mc13xxx.c b/drivers/rtc/rtc-mc13xxx.c
index 2643d8874925..5391b154b43c 100644
--- a/drivers/rtc/rtc-mc13xxx.c
+++ b/drivers/rtc/rtc-mc13xxx.c
@@ -420,17 +420,7 @@ static struct platform_driver mc13xxx_rtc_driver = {
},
};
-static int __init mc13xxx_rtc_init(void)
-{
- return platform_driver_probe(&mc13xxx_rtc_driver, &mc13xxx_rtc_probe);
-}
-module_init(mc13xxx_rtc_init);
-
-static void __exit mc13xxx_rtc_exit(void)
-{
- platform_driver_unregister(&mc13xxx_rtc_driver);
-}
-module_exit(mc13xxx_rtc_exit);
+module_platform_driver_probe(mc13xxx_rtc_driver, &mc13xxx_rtc_probe);
MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
MODULE_DESCRIPTION("RTC driver for Freescale MC13XXX PMIC");
diff --git a/drivers/rtc/rtc-msm6242.c b/drivers/rtc/rtc-msm6242.c
index fcb113c11122..3ac1e8eca89d 100644
--- a/drivers/rtc/rtc-msm6242.c
+++ b/drivers/rtc/rtc-msm6242.c
@@ -252,18 +252,7 @@ static struct platform_driver msm6242_rtc_driver = {
.remove = __exit_p(msm6242_rtc_remove),
};
-static int __init msm6242_rtc_init(void)
-{
- return platform_driver_probe(&msm6242_rtc_driver, msm6242_rtc_probe);
-}
-
-static void __exit msm6242_rtc_fini(void)
-{
- platform_driver_unregister(&msm6242_rtc_driver);
-}
-
-module_init(msm6242_rtc_init);
-module_exit(msm6242_rtc_fini);
+module_platform_driver_probe(msm6242_rtc_driver, msm6242_rtc_probe);
MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-mv.c b/drivers/rtc/rtc-mv.c
index 8f87fec27ce7..baab802f2153 100644
--- a/drivers/rtc/rtc-mv.c
+++ b/drivers/rtc/rtc-mv.c
@@ -217,7 +217,7 @@ static const struct rtc_class_ops mv_rtc_alarm_ops = {
.alarm_irq_enable = mv_rtc_alarm_irq_enable,
};
-static int mv_rtc_probe(struct platform_device *pdev)
+static int __init mv_rtc_probe(struct platform_device *pdev)
{
struct resource *res;
struct rtc_plat_data *pdata;
@@ -272,12 +272,13 @@ static int mv_rtc_probe(struct platform_device *pdev)
if (pdata->irq >= 0) {
device_init_wakeup(&pdev->dev, 1);
- pdata->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&mv_rtc_alarm_ops,
THIS_MODULE);
- } else
- pdata->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ } else {
+ pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&mv_rtc_ops, THIS_MODULE);
+ }
if (IS_ERR(pdata->rtc)) {
ret = PTR_ERR(pdata->rtc);
goto out;
@@ -308,7 +309,6 @@ static int __exit mv_rtc_remove(struct platform_device *pdev)
if (pdata->irq >= 0)
device_init_wakeup(&pdev->dev, 0);
- rtc_device_unregister(pdata->rtc);
if (!IS_ERR(pdata->clk))
clk_disable_unprepare(pdata->clk);
@@ -331,18 +331,7 @@ static struct platform_driver mv_rtc_driver = {
},
};
-static __init int mv_init(void)
-{
- return platform_driver_probe(&mv_rtc_driver, mv_rtc_probe);
-}
-
-static __exit void mv_exit(void)
-{
- platform_driver_unregister(&mv_rtc_driver);
-}
-
-module_init(mv_init);
-module_exit(mv_exit);
+module_platform_driver_probe(mv_rtc_driver, mv_rtc_probe);
MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
MODULE_DESCRIPTION("Marvell RTC driver");
diff --git a/drivers/rtc/rtc-mxc.c b/drivers/rtc/rtc-mxc.c
index 1c3ef7289565..9a3895bc4f4d 100644
--- a/drivers/rtc/rtc-mxc.c
+++ b/drivers/rtc/rtc-mxc.c
@@ -439,7 +439,7 @@ static int mxc_rtc_probe(struct platform_device *pdev)
if (pdata->irq >=0)
device_init_wakeup(&pdev->dev, 1);
- rtc = rtc_device_register(pdev->name, &pdev->dev, &mxc_rtc_ops,
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name, &mxc_rtc_ops,
THIS_MODULE);
if (IS_ERR(rtc)) {
ret = PTR_ERR(rtc);
@@ -464,15 +464,13 @@ static int mxc_rtc_remove(struct platform_device *pdev)
{
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
- rtc_device_unregister(pdata->rtc);
-
clk_disable_unprepare(pdata->clk);
platform_set_drvdata(pdev, NULL);
return 0;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int mxc_rtc_suspend(struct device *dev)
{
struct rtc_plat_data *pdata = dev_get_drvdata(dev);
@@ -492,19 +490,14 @@ static int mxc_rtc_resume(struct device *dev)
return 0;
}
-
-static struct dev_pm_ops mxc_rtc_pm_ops = {
- .suspend = mxc_rtc_suspend,
- .resume = mxc_rtc_resume,
-};
#endif
+static SIMPLE_DEV_PM_OPS(mxc_rtc_pm_ops, mxc_rtc_suspend, mxc_rtc_resume);
+
static struct platform_driver mxc_rtc_driver = {
.driver = {
.name = "mxc_rtc",
-#ifdef CONFIG_PM
.pm = &mxc_rtc_pm_ops,
-#endif
.owner = THIS_MODULE,
},
.id_table = imx_rtc_devtype,
diff --git a/drivers/rtc/rtc-nuc900.c b/drivers/rtc/rtc-nuc900.c
index a63680850fef..4d9525cc1cf4 100644
--- a/drivers/rtc/rtc-nuc900.c
+++ b/drivers/rtc/rtc-nuc900.c
@@ -222,7 +222,7 @@ static struct rtc_class_ops nuc900_rtc_ops = {
.alarm_irq_enable = nuc900_alarm_irq_enable,
};
-static int nuc900_rtc_probe(struct platform_device *pdev)
+static int __init nuc900_rtc_probe(struct platform_device *pdev)
{
struct resource *res;
struct nuc900_rtc *nuc900_rtc;
@@ -284,7 +284,7 @@ fail1: kfree(nuc900_rtc);
return err;
}
-static int nuc900_rtc_remove(struct platform_device *pdev)
+static int __exit nuc900_rtc_remove(struct platform_device *pdev)
{
struct nuc900_rtc *nuc900_rtc = platform_get_drvdata(pdev);
struct resource *res;
@@ -304,25 +304,14 @@ static int nuc900_rtc_remove(struct platform_device *pdev)
}
static struct platform_driver nuc900_rtc_driver = {
- .remove = nuc900_rtc_remove,
+ .remove = __exit_p(nuc900_rtc_remove),
.driver = {
.name = "nuc900-rtc",
.owner = THIS_MODULE,
},
};
-static int __init nuc900_rtc_init(void)
-{
- return platform_driver_probe(&nuc900_rtc_driver, nuc900_rtc_probe);
-}
-
-static void __exit nuc900_rtc_exit(void)
-{
- platform_driver_unregister(&nuc900_rtc_driver);
-}
-
-module_init(nuc900_rtc_init);
-module_exit(nuc900_rtc_exit);
+module_platform_driver_probe(nuc900_rtc_driver, nuc900_rtc_probe);
MODULE_AUTHOR("Wan ZongShun <mcuos.com@gmail.com>");
MODULE_DESCRIPTION("nuc910/nuc920 RTC driver");
diff --git a/drivers/rtc/rtc-omap.c b/drivers/rtc/rtc-omap.c
index 600971407aac..172cc5ca7489 100644
--- a/drivers/rtc/rtc-omap.c
+++ b/drivers/rtc/rtc-omap.c
@@ -550,17 +550,7 @@ static struct platform_driver omap_rtc_driver = {
.id_table = omap_rtc_devtype,
};
-static int __init rtc_init(void)
-{
- return platform_driver_probe(&omap_rtc_driver, omap_rtc_probe);
-}
-module_init(rtc_init);
-
-static void __exit rtc_exit(void)
-{
- platform_driver_unregister(&omap_rtc_driver);
-}
-module_exit(rtc_exit);
+module_platform_driver_probe(omap_rtc_driver, omap_rtc_probe);
MODULE_AUTHOR("George G. Davis (and others)");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-palmas.c b/drivers/rtc/rtc-palmas.c
index 59c42986254e..bbc3b9efdeb2 100644
--- a/drivers/rtc/rtc-palmas.c
+++ b/drivers/rtc/rtc-palmas.c
@@ -264,7 +264,7 @@ static int palmas_rtc_probe(struct platform_device *pdev)
palmas_rtc->irq = platform_get_irq(pdev, 0);
- palmas_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ palmas_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&palmas_rtc_ops, THIS_MODULE);
if (IS_ERR(palmas_rtc->rtc)) {
ret = PTR_ERR(palmas_rtc->rtc);
@@ -272,14 +272,13 @@ static int palmas_rtc_probe(struct platform_device *pdev)
return ret;
}
- ret = request_threaded_irq(palmas_rtc->irq, NULL,
+ ret = devm_request_threaded_irq(&pdev->dev, palmas_rtc->irq, NULL,
palmas_rtc_interrupt,
IRQF_TRIGGER_LOW | IRQF_ONESHOT |
IRQF_EARLY_RESUME,
dev_name(&pdev->dev), palmas_rtc);
if (ret < 0) {
dev_err(&pdev->dev, "IRQ request failed, err = %d\n", ret);
- rtc_device_unregister(palmas_rtc->rtc);
return ret;
}
@@ -289,11 +288,7 @@ static int palmas_rtc_probe(struct platform_device *pdev)
static int palmas_rtc_remove(struct platform_device *pdev)
{
- struct palmas_rtc *palmas_rtc = platform_get_drvdata(pdev);
-
palmas_rtc_alarm_irq_enable(&pdev->dev, 0);
- free_irq(palmas_rtc->irq, palmas_rtc);
- rtc_device_unregister(palmas_rtc->rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-pcap.c b/drivers/rtc/rtc-pcap.c
index e0019cd0bf71..ce0982490e8c 100644
--- a/drivers/rtc/rtc-pcap.c
+++ b/drivers/rtc/rtc-pcap.c
@@ -139,7 +139,7 @@ static const struct rtc_class_ops pcap_rtc_ops = {
.alarm_irq_enable = pcap_rtc_alarm_irq_enable,
};
-static int pcap_rtc_probe(struct platform_device *pdev)
+static int __init pcap_rtc_probe(struct platform_device *pdev)
{
struct pcap_rtc *pcap_rtc;
int timer_irq, alarm_irq;
@@ -183,7 +183,7 @@ fail_rtc:
return err;
}
-static int pcap_rtc_remove(struct platform_device *pdev)
+static int __exit pcap_rtc_remove(struct platform_device *pdev)
{
struct pcap_rtc *pcap_rtc = platform_get_drvdata(pdev);
@@ -196,25 +196,14 @@ static int pcap_rtc_remove(struct platform_device *pdev)
}
static struct platform_driver pcap_rtc_driver = {
- .remove = pcap_rtc_remove,
+ .remove = __exit_p(pcap_rtc_remove),
.driver = {
.name = "pcap-rtc",
.owner = THIS_MODULE,
},
};
-static int __init rtc_pcap_init(void)
-{
- return platform_driver_probe(&pcap_rtc_driver, pcap_rtc_probe);
-}
-
-static void __exit rtc_pcap_exit(void)
-{
- platform_driver_unregister(&pcap_rtc_driver);
-}
-
-module_init(rtc_pcap_init);
-module_exit(rtc_pcap_exit);
+module_platform_driver_probe(pcap_rtc_driver, pcap_rtc_probe);
MODULE_DESCRIPTION("Motorola pcap rtc driver");
MODULE_AUTHOR("guiming zhuo <gmzhuo@gmail.com>");
diff --git a/drivers/rtc/rtc-pcf8523.c b/drivers/rtc/rtc-pcf8523.c
index 889e3160e701..305c9515e5bb 100644
--- a/drivers/rtc/rtc-pcf8523.c
+++ b/drivers/rtc/rtc-pcf8523.c
@@ -307,7 +307,7 @@ static int pcf8523_probe(struct i2c_client *client,
if (err < 0)
return err;
- pcf->rtc = rtc_device_register(DRIVER_NAME, &client->dev,
+ pcf->rtc = devm_rtc_device_register(&client->dev, DRIVER_NAME,
&pcf8523_rtc_ops, THIS_MODULE);
if (IS_ERR(pcf->rtc))
return PTR_ERR(pcf->rtc);
@@ -319,10 +319,6 @@ static int pcf8523_probe(struct i2c_client *client,
static int pcf8523_remove(struct i2c_client *client)
{
- struct pcf8523 *pcf = i2c_get_clientdata(client);
-
- rtc_device_unregister(pcf->rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-ps3.c b/drivers/rtc/rtc-ps3.c
index 968133ce1ee8..4bb825bb5804 100644
--- a/drivers/rtc/rtc-ps3.c
+++ b/drivers/rtc/rtc-ps3.c
@@ -62,7 +62,7 @@ static int __init ps3_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
- rtc = rtc_device_register("rtc-ps3", &dev->dev, &ps3_rtc_ops,
+ rtc = devm_rtc_device_register(&dev->dev, "rtc-ps3", &ps3_rtc_ops,
THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -73,7 +73,6 @@ static int __init ps3_rtc_probe(struct platform_device *dev)
static int __exit ps3_rtc_remove(struct platform_device *dev)
{
- rtc_device_unregister(platform_get_drvdata(dev));
return 0;
}
@@ -85,18 +84,7 @@ static struct platform_driver ps3_rtc_driver = {
.remove = __exit_p(ps3_rtc_remove),
};
-static int __init ps3_rtc_init(void)
-{
- return platform_driver_probe(&ps3_rtc_driver, ps3_rtc_probe);
-}
-
-static void __exit ps3_rtc_fini(void)
-{
- platform_driver_unregister(&ps3_rtc_driver);
-}
-
-module_init(ps3_rtc_init);
-module_exit(ps3_rtc_fini);
+module_platform_driver_probe(ps3_rtc_driver, ps3_rtc_probe);
MODULE_AUTHOR("Sony Corporation");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-pxa.c b/drivers/rtc/rtc-pxa.c
index 03c85ee719a7..a2c75c1ed4cc 100644
--- a/drivers/rtc/rtc-pxa.c
+++ b/drivers/rtc/rtc-pxa.c
@@ -19,6 +19,7 @@
*
*/
+#include <linux/delay.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
@@ -80,22 +81,29 @@
#define RYAR1 0x1c
#define RTCPICR 0x34
#define PIAR 0x38
+#define PSBR_RTC 0x00
#define rtc_readl(pxa_rtc, reg) \
__raw_readl((pxa_rtc)->base + (reg))
#define rtc_writel(pxa_rtc, reg, value) \
__raw_writel((value), (pxa_rtc)->base + (reg))
+#define rtc_readl_psbr(pxa_rtc, reg) \
+ __raw_readl((pxa_rtc)->base_psbr + (reg))
+#define rtc_writel_psbr(pxa_rtc, reg, value) \
+ __raw_writel((value), (pxa_rtc)->base_psbr + (reg))
struct pxa_rtc {
struct resource *ress;
+ struct resource *ress_psbr;
void __iomem *base;
+ void __iomem *base_psbr;
int irq_1Hz;
int irq_Alrm;
struct rtc_device *rtc;
spinlock_t lock; /* Protects this structure */
};
-
+static struct pxa_rtc *rtc_info;
static u32 ryxr_calc(struct rtc_time *tm)
{
return ((tm->tm_year + 1900) << RYxR_YEAR_S)
@@ -117,7 +125,7 @@ static void tm_calc(u32 rycr, u32 rdcr, struct rtc_time *tm)
tm->tm_year = ((rycr & RYxR_YEAR_MASK) >> RYxR_YEAR_S) - 1900;
tm->tm_mon = (((rycr & RYxR_MONTH_MASK) >> RYxR_MONTH_S)) - 1;
tm->tm_mday = (rycr & RYxR_DAY_MASK);
- tm->tm_wday = ((rycr & RDxR_DOW_MASK) >> RDxR_DOW_S) - 1;
+ tm->tm_wday = ((rdcr & RDxR_DOW_MASK) >> RDxR_DOW_S) - 1;
tm->tm_hour = (rdcr & RDxR_HOUR_MASK) >> RDxR_HOUR_S;
tm->tm_min = (rdcr & RDxR_MIN_MASK) >> RDxR_MIN_S;
tm->tm_sec = rdcr & RDxR_SEC_MASK;
@@ -175,7 +183,6 @@ static irqreturn_t pxa_rtc_irq(int irq, void *dev_id)
/* enable back rtc interrupts */
rtc_writel(pxa_rtc, RTSR, rtsr & ~RTSR_TRIG_MASK);
-
spin_unlock(&pxa_rtc->lock);
return IRQ_HANDLED;
}
@@ -250,12 +257,45 @@ static int pxa_rtc_read_time(struct device *dev, struct rtc_time *tm)
static int pxa_rtc_set_time(struct device *dev, struct rtc_time *tm)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
-
+ /* sequence to wirte pxa rtc register RCNR RDCR RYCR is
+ *1. set PSBR[RWE] bit, take 2x32-khz to complete
+ *2. write to RTC register,take 2x32-khz to complete
+ *3. clear PSBR[RWE] bit,take 2x32-khz to complete
+ */
+ if ((tm->tm_year < 70) || (tm->tm_year > 138))
+ return -EINVAL;
+ rtc_writel_psbr(rtc_info, PSBR_RTC, 0x01);
+ udelay(100);
rtc_writel(pxa_rtc, RYCR, ryxr_calc(tm));
rtc_writel(pxa_rtc, RDCR, rdxr_calc(tm));
+ udelay(100);
+ rtc_writel_psbr(rtc_info, PSBR_RTC, 0x00);
+ udelay(100);
+ pxa_rtc_read_time(dev, tm);
+ dev_info(dev, "tm.year = %d, tm.month = %d, tm.day = %d\n",
+ tm->tm_year + 1900, tm->tm_mon, tm->tm_mday);
+ return 0;
+}
+int pxa_rtc_sync_time(unsigned int ticks)
+{
+ /* sequence to wirte pxa rtc register RCNR RDCR RYCR is
+ *1. set PSBR[RWE] bit, take 2x32-khz to complete
+ *2. write to RTC register,take 2x32-khz to complete
+ *3. clear PSBR[RWE] bit,take 2x32-khz to complete
+ */
+ struct rtc_time tm;
+ rtc_time_to_tm(ticks, &tm);
+ rtc_writel_psbr(rtc_info, PSBR_RTC, 0x01);
+ udelay(100);
+ rtc_writel(rtc_info, RYCR, ryxr_calc(&tm));
+ rtc_writel(rtc_info, RDCR, rdxr_calc(&tm));
+ udelay(100);
+ rtc_writel_psbr(rtc_info, PSBR_RTC, 0x00);
+ udelay(100);
return 0;
}
+EXPORT_SYMBOL(pxa_rtc_sync_time);
static int pxa_rtc_read_alarm(struct device *dev, struct rtc_wkalrm *alrm)
{
@@ -327,7 +367,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
pxa_rtc = kzalloc(sizeof(struct pxa_rtc), GFP_KERNEL);
if (!pxa_rtc)
return -ENOMEM;
-
+ rtc_info = pxa_rtc;
spin_lock_init(&pxa_rtc->lock);
platform_set_drvdata(pdev, pxa_rtc);
@@ -337,6 +377,11 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
dev_err(dev, "No I/O memory resource defined\n");
goto err_ress;
}
+ pxa_rtc->ress_psbr = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!pxa_rtc->ress_psbr) {
+ dev_err(dev, "No I/O memory resource defined\n");
+ goto err_ress;
+ }
pxa_rtc->irq_1Hz = platform_get_irq(pdev, 0);
if (pxa_rtc->irq_1Hz < 0) {
@@ -348,7 +393,6 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
dev_err(dev, "No alarm IRQ resource defined\n");
goto err_ress;
}
- pxa_rtc_open(dev);
ret = -ENOMEM;
pxa_rtc->base = ioremap(pxa_rtc->ress->start,
resource_size(pxa_rtc->ress));
@@ -357,6 +401,12 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
goto err_map;
}
+ pxa_rtc->base_psbr = ioremap(pxa_rtc->ress_psbr->start,
+ resource_size(pxa_rtc->ress_psbr));
+ if (!pxa_rtc->base_psbr) {
+ dev_err(&pdev->dev, "Unable to map pxa RTC PSBR I/O memory\n");
+ goto err_map;
+ }
/*
* If the clock divider is uninitialized then reset it to the
* default value to get the 1Hz clock.
@@ -379,7 +429,7 @@ static int __init pxa_rtc_probe(struct platform_device *pdev)
}
device_init_wakeup(dev, 1);
-
+ pxa_rtc_open(dev);
return 0;
err_rtc_reg:
@@ -416,7 +466,7 @@ static struct of_device_id pxa_rtc_dt_ids[] = {
MODULE_DEVICE_TABLE(of, pxa_rtc_dt_ids);
#endif
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int pxa_rtc_suspend(struct device *dev)
{
struct pxa_rtc *pxa_rtc = dev_get_drvdata(dev);
@@ -434,36 +484,20 @@ static int pxa_rtc_resume(struct device *dev)
disable_irq_wake(pxa_rtc->irq_Alrm);
return 0;
}
-
-static const struct dev_pm_ops pxa_rtc_pm_ops = {
- .suspend = pxa_rtc_suspend,
- .resume = pxa_rtc_resume,
-};
#endif
+static SIMPLE_DEV_PM_OPS(pxa_rtc_pm_ops, pxa_rtc_suspend, pxa_rtc_resume);
+
static struct platform_driver pxa_rtc_driver = {
.remove = __exit_p(pxa_rtc_remove),
.driver = {
.name = "pxa-rtc",
.of_match_table = of_match_ptr(pxa_rtc_dt_ids),
-#ifdef CONFIG_PM
.pm = &pxa_rtc_pm_ops,
-#endif
},
};
-static int __init pxa_rtc_init(void)
-{
- return platform_driver_probe(&pxa_rtc_driver, pxa_rtc_probe);
-}
-
-static void __exit pxa_rtc_exit(void)
-{
- platform_driver_unregister(&pxa_rtc_driver);
-}
-
-module_init(pxa_rtc_init);
-module_exit(pxa_rtc_exit);
+module_platform_driver_probe(pxa_rtc_driver, pxa_rtc_probe);
MODULE_AUTHOR("Robert Jarzmik <robert.jarzmik@free.fr>");
MODULE_DESCRIPTION("PXA27x/PXA3xx Realtime Clock Driver (RTC)");
diff --git a/drivers/rtc/rtc-r9701.c b/drivers/rtc/rtc-r9701.c
index 7726f4a4f2d0..0adf5b528129 100644
--- a/drivers/rtc/rtc-r9701.c
+++ b/drivers/rtc/rtc-r9701.c
@@ -154,8 +154,8 @@ static int r9701_probe(struct spi_device *spi)
}
}
- rtc = rtc_device_register("r9701",
- &spi->dev, &r9701_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, "r9701",
+ &r9701_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -166,9 +166,6 @@ static int r9701_probe(struct spi_device *spi)
static int r9701_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = dev_get_drvdata(&spi->dev);
-
- rtc_device_unregister(rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-rc5t583.c b/drivers/rtc/rtc-rc5t583.c
index eb3194d664a8..8eabcf51b35a 100644
--- a/drivers/rtc/rtc-rc5t583.c
+++ b/drivers/rtc/rtc-rc5t583.c
@@ -256,7 +256,7 @@ static int rc5t583_rtc_probe(struct platform_device *pdev)
}
device_init_wakeup(&pdev->dev, 1);
- ricoh_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ ricoh_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&rc5t583_rtc_ops, THIS_MODULE);
if (IS_ERR(ricoh_rtc->rtc)) {
ret = PTR_ERR(ricoh_rtc->rtc);
@@ -276,13 +276,10 @@ static int rc5t583_rtc_remove(struct platform_device *pdev)
struct rc5t583_rtc *rc5t583_rtc = dev_get_drvdata(&pdev->dev);
rc5t583_rtc_alarm_irq_enable(&rc5t583_rtc->rtc->dev, 0);
-
- rtc_device_unregister(rc5t583_rtc->rtc);
return 0;
}
#ifdef CONFIG_PM_SLEEP
-
static int rc5t583_rtc_suspend(struct device *dev)
{
struct rc5t583 *rc5t583 = dev_get_drvdata(dev->parent);
@@ -304,24 +301,18 @@ static int rc5t583_rtc_resume(struct device *dev)
return regmap_write(rc5t583->regmap, RC5T583_RTC_CTL1,
rc5t583_rtc->irqen);
}
-
-static const struct dev_pm_ops rc5t583_rtc_pm_ops = {
- .suspend = rc5t583_rtc_suspend,
- .resume = rc5t583_rtc_resume,
-};
-
-#define DEV_PM_OPS (&rc5t583_rtc_pm_ops)
-#else
-#define DEV_PM_OPS NULL
#endif
+static SIMPLE_DEV_PM_OPS(rc5t583_rtc_pm_ops, rc5t583_rtc_suspend,
+ rc5t583_rtc_resume);
+
static struct platform_driver rc5t583_rtc_driver = {
.probe = rc5t583_rtc_probe,
.remove = rc5t583_rtc_remove,
.driver = {
.owner = THIS_MODULE,
.name = "rtc-rc5t583",
- .pm = DEV_PM_OPS,
+ .pm = &rc5t583_rtc_pm_ops,
},
};
diff --git a/drivers/rtc/rtc-rp5c01.c b/drivers/rtc/rtc-rp5c01.c
index 359da6d020b9..d25d2f6c0cad 100644
--- a/drivers/rtc/rtc-rp5c01.c
+++ b/drivers/rtc/rtc-rp5c01.c
@@ -294,18 +294,7 @@ static struct platform_driver rp5c01_rtc_driver = {
.remove = __exit_p(rp5c01_rtc_remove),
};
-static int __init rp5c01_rtc_init(void)
-{
- return platform_driver_probe(&rp5c01_rtc_driver, rp5c01_rtc_probe);
-}
-
-static void __exit rp5c01_rtc_fini(void)
-{
- platform_driver_unregister(&rp5c01_rtc_driver);
-}
-
-module_init(rp5c01_rtc_init);
-module_exit(rp5c01_rtc_fini);
+module_platform_driver_probe(rp5c01_rtc_driver, rp5c01_rtc_probe);
MODULE_AUTHOR("Geert Uytterhoeven <geert@linux-m68k.org>");
MODULE_LICENSE("GPL");
diff --git a/drivers/rtc/rtc-rs5c313.c b/drivers/rtc/rtc-rs5c313.c
index d98ea5b759c8..8089fc63e403 100644
--- a/drivers/rtc/rtc-rs5c313.c
+++ b/drivers/rtc/rtc-rs5c313.c
@@ -367,7 +367,7 @@ static const struct rtc_class_ops rs5c313_rtc_ops = {
static int rs5c313_rtc_probe(struct platform_device *pdev)
{
- struct rtc_device *rtc = rtc_device_register("rs5c313", &pdev->dev,
+ struct rtc_device *rtc = devm_rtc_device_register(&pdev->dev, "rs5c313",
&rs5c313_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
@@ -380,10 +380,6 @@ static int rs5c313_rtc_probe(struct platform_device *pdev)
static int rs5c313_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata( pdev );
-
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-rv3029c2.c b/drivers/rtc/rtc-rv3029c2.c
index f8ee8ad7825e..5032c24ec159 100644
--- a/drivers/rtc/rtc-rv3029c2.c
+++ b/drivers/rtc/rtc-rv3029c2.c
@@ -395,9 +395,8 @@ static int rv3029c2_probe(struct i2c_client *client,
if (!i2c_check_functionality(client->adapter, I2C_FUNC_SMBUS_EMUL))
return -ENODEV;
- rtc = rtc_device_register(client->name,
- &client->dev, &rv3029c2_rtc_ops,
- THIS_MODULE);
+ rtc = devm_rtc_device_register(&client->dev, client->name,
+ &rv3029c2_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -407,23 +406,14 @@ static int rv3029c2_probe(struct i2c_client *client,
rc = rv3029c2_i2c_get_sr(client, buf);
if (rc < 0) {
dev_err(&client->dev, "reading status failed\n");
- goto exit_unregister;
+ return rc;
}
return 0;
-
-exit_unregister:
- rtc_device_unregister(rtc);
-
- return rc;
}
static int rv3029c2_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-rx4581.c b/drivers/rtc/rtc-rx4581.c
index 599ec73ec886..d1b88dbc5c2c 100644
--- a/drivers/rtc/rtc-rx4581.c
+++ b/drivers/rtc/rtc-rx4581.c
@@ -273,8 +273,8 @@ static int rx4581_probe(struct spi_device *spi)
if (res != 0)
return res;
- rtc = rtc_device_register("rx4581",
- &spi->dev, &rx4581_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&spi->dev, "rx4581",
+ &rx4581_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -284,9 +284,6 @@ static int rx4581_probe(struct spi_device *spi)
static int rx4581_remove(struct spi_device *spi)
{
- struct rtc_device *rtc = dev_get_drvdata(&spi->dev);
-
- rtc_device_unregister(rtc);
return 0;
}
diff --git a/drivers/rtc/rtc-rx8581.c b/drivers/rtc/rtc-rx8581.c
index b0c272658fa2..07f3037b18f4 100644
--- a/drivers/rtc/rtc-rx8581.c
+++ b/drivers/rtc/rtc-rx8581.c
@@ -240,8 +240,8 @@ static int rx8581_probe(struct i2c_client *client,
dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
- rtc = rtc_device_register(rx8581_driver.driver.name,
- &client->dev, &rx8581_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&client->dev, rx8581_driver.driver.name,
+ &rx8581_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -253,10 +253,6 @@ static int rx8581_probe(struct i2c_client *client,
static int rx8581_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- rtc_device_unregister(rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-s3c.c b/drivers/rtc/rtc-s3c.c
index fb994e9ddc15..653a4dcbfe7d 100644
--- a/drivers/rtc/rtc-s3c.c
+++ b/drivers/rtc/rtc-s3c.c
@@ -51,7 +51,6 @@ static struct clk *rtc_clk;
static void __iomem *s3c_rtc_base;
static int s3c_rtc_alarmno = NO_IRQ;
static int s3c_rtc_tickno = NO_IRQ;
-static bool wake_en;
static enum s3c_cpu_type s3c_rtc_cpu_type;
static DEFINE_SPINLOCK(s3c_rtc_pie_lock);
@@ -423,10 +422,7 @@ static void s3c_rtc_enable(struct platform_device *pdev, int en)
static int s3c_rtc_remove(struct platform_device *dev)
{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
platform_set_drvdata(dev, NULL);
- rtc_device_unregister(rtc);
s3c_rtc_setaie(&dev->dev, 0);
@@ -511,7 +507,7 @@ static int s3c_rtc_probe(struct platform_device *pdev)
/* register RTC and exit */
- rtc = rtc_device_register("s3c", &pdev->dev, &s3c_rtcops,
+ rtc = devm_rtc_device_register(&pdev->dev, "s3c", &s3c_rtcops,
THIS_MODULE);
if (IS_ERR(rtc)) {
@@ -574,7 +570,6 @@ static int s3c_rtc_probe(struct platform_device *pdev)
err_alarm_irq:
platform_set_drvdata(pdev, NULL);
- rtc_device_unregister(rtc);
err_nortc:
s3c_rtc_enable(pdev, 0);
@@ -583,14 +578,16 @@ static int s3c_rtc_probe(struct platform_device *pdev)
return ret;
}
-#ifdef CONFIG_PM
-
+#ifdef CONFIG_PM_SLEEP
/* RTC Power management control */
static int ticnt_save, ticnt_en_save;
+static bool wake_en;
-static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state)
+static int s3c_rtc_suspend(struct device *dev)
{
+ struct platform_device *pdev = to_platform_device(dev);
+
clk_enable(rtc_clk);
/* save TICNT for anyone using periodic interrupts */
ticnt_save = readb(s3c_rtc_base + S3C2410_TICNT);
@@ -600,19 +597,20 @@ static int s3c_rtc_suspend(struct platform_device *pdev, pm_message_t state)
}
s3c_rtc_enable(pdev, 0);
- if (device_may_wakeup(&pdev->dev) && !wake_en) {
+ if (device_may_wakeup(dev) && !wake_en) {
if (enable_irq_wake(s3c_rtc_alarmno) == 0)
wake_en = true;
else
- dev_err(&pdev->dev, "enable_irq_wake failed\n");
+ dev_err(dev, "enable_irq_wake failed\n");
}
clk_disable(rtc_clk);
return 0;
}
-static int s3c_rtc_resume(struct platform_device *pdev)
+static int s3c_rtc_resume(struct device *dev)
{
+ struct platform_device *pdev = to_platform_device(dev);
unsigned int tmp;
clk_enable(rtc_clk);
@@ -623,7 +621,7 @@ static int s3c_rtc_resume(struct platform_device *pdev)
writew(tmp | ticnt_en_save, s3c_rtc_base + S3C2410_RTCCON);
}
- if (device_may_wakeup(&pdev->dev) && wake_en) {
+ if (device_may_wakeup(dev) && wake_en) {
disable_irq_wake(s3c_rtc_alarmno);
wake_en = false;
}
@@ -631,11 +629,10 @@ static int s3c_rtc_resume(struct platform_device *pdev)
return 0;
}
-#else
-#define s3c_rtc_suspend NULL
-#define s3c_rtc_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(s3c_rtc_pm_ops, s3c_rtc_suspend, s3c_rtc_resume);
+
#ifdef CONFIG_OF
static struct s3c_rtc_drv_data s3c_rtc_drv_data_array[] = {
[TYPE_S3C2410] = { TYPE_S3C2410 },
@@ -685,12 +682,11 @@ MODULE_DEVICE_TABLE(platform, s3c_rtc_driver_ids);
static struct platform_driver s3c_rtc_driver = {
.probe = s3c_rtc_probe,
.remove = s3c_rtc_remove,
- .suspend = s3c_rtc_suspend,
- .resume = s3c_rtc_resume,
.id_table = s3c_rtc_driver_ids,
.driver = {
.name = "s3c-rtc",
.owner = THIS_MODULE,
+ .pm = &s3c_rtc_pm_ops,
.of_match_table = of_match_ptr(s3c_rtc_dt_match),
},
};
diff --git a/drivers/rtc/rtc-sa1100.c b/drivers/rtc/rtc-sa1100.c
index 5ec5036df0bc..7e0a0887c256 100644
--- a/drivers/rtc/rtc-sa1100.c
+++ b/drivers/rtc/rtc-sa1100.c
@@ -327,7 +327,7 @@ static int sa1100_rtc_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int sa1100_rtc_suspend(struct device *dev)
{
struct sa1100_rtc *info = dev_get_drvdata(dev);
@@ -343,13 +343,11 @@ static int sa1100_rtc_resume(struct device *dev)
disable_irq_wake(info->irq_alarm);
return 0;
}
-
-static const struct dev_pm_ops sa1100_rtc_pm_ops = {
- .suspend = sa1100_rtc_suspend,
- .resume = sa1100_rtc_resume,
-};
#endif
+static SIMPLE_DEV_PM_OPS(sa1100_rtc_pm_ops, sa1100_rtc_suspend,
+ sa1100_rtc_resume);
+
#ifdef CONFIG_OF
static struct of_device_id sa1100_rtc_dt_ids[] = {
{ .compatible = "mrvl,sa1100-rtc", },
@@ -364,9 +362,7 @@ static struct platform_driver sa1100_rtc_driver = {
.remove = sa1100_rtc_remove,
.driver = {
.name = "sa1100-rtc",
-#ifdef CONFIG_PM
.pm = &sa1100_rtc_pm_ops,
-#endif
.of_match_table = of_match_ptr(sa1100_rtc_dt_ids),
},
};
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index e55a7635ae5f..8d5bd2e36776 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -790,6 +790,7 @@ static void sh_rtc_set_irq_wake(struct device *dev, int enabled)
}
}
+#ifdef CONFIG_PM_SLEEP
static int sh_rtc_suspend(struct device *dev)
{
if (device_may_wakeup(dev))
@@ -805,33 +806,20 @@ static int sh_rtc_resume(struct device *dev)
return 0;
}
+#endif
-static const struct dev_pm_ops sh_rtc_dev_pm_ops = {
- .suspend = sh_rtc_suspend,
- .resume = sh_rtc_resume,
-};
+static SIMPLE_DEV_PM_OPS(sh_rtc_pm_ops, sh_rtc_suspend, sh_rtc_resume);
static struct platform_driver sh_rtc_platform_driver = {
.driver = {
.name = DRV_NAME,
.owner = THIS_MODULE,
- .pm = &sh_rtc_dev_pm_ops,
+ .pm = &sh_rtc_pm_ops,
},
.remove = __exit_p(sh_rtc_remove),
};
-static int __init sh_rtc_init(void)
-{
- return platform_driver_probe(&sh_rtc_platform_driver, sh_rtc_probe);
-}
-
-static void __exit sh_rtc_exit(void)
-{
- platform_driver_unregister(&sh_rtc_platform_driver);
-}
-
-module_init(sh_rtc_init);
-module_exit(sh_rtc_exit);
+module_platform_driver_probe(sh_rtc_platform_driver, sh_rtc_probe);
MODULE_DESCRIPTION("SuperH on-chip RTC driver");
MODULE_VERSION(DRV_VERSION);
diff --git a/drivers/rtc/rtc-snvs.c b/drivers/rtc/rtc-snvs.c
index f7d90703db5e..b04f09a1df2a 100644
--- a/drivers/rtc/rtc-snvs.c
+++ b/drivers/rtc/rtc-snvs.c
@@ -283,7 +283,7 @@ static int snvs_rtc_probe(struct platform_device *pdev)
return ret;
}
- data->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ data->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&snvs_rtc_ops, THIS_MODULE);
if (IS_ERR(data->rtc)) {
ret = PTR_ERR(data->rtc);
@@ -296,10 +296,6 @@ static int snvs_rtc_probe(struct platform_device *pdev)
static int snvs_rtc_remove(struct platform_device *pdev)
{
- struct snvs_rtc_data *data = platform_get_drvdata(pdev);
-
- rtc_device_unregister(data->rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-spear.c b/drivers/rtc/rtc-spear.c
index a18c3192ed40..db3ef610dd7c 100644
--- a/drivers/rtc/rtc-spear.c
+++ b/drivers/rtc/rtc-spear.c
@@ -400,8 +400,8 @@ static int spear_rtc_probe(struct platform_device *pdev)
spin_lock_init(&config->lock);
platform_set_drvdata(pdev, config);
- config->rtc = rtc_device_register(pdev->name, &pdev->dev,
- &spear_rtc_ops, THIS_MODULE);
+ config->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
+ &spear_rtc_ops, THIS_MODULE);
if (IS_ERR(config->rtc)) {
dev_err(&pdev->dev, "can't register RTC device, err %ld\n",
PTR_ERR(config->rtc));
@@ -427,7 +427,6 @@ static int spear_rtc_remove(struct platform_device *pdev)
{
struct spear_rtc_config *config = platform_get_drvdata(pdev);
- rtc_device_unregister(config->rtc);
spear_rtc_disable_interrupt(config);
clk_disable_unprepare(config->clk);
device_init_wakeup(&pdev->dev, 0);
diff --git a/drivers/rtc/rtc-starfire.c b/drivers/rtc/rtc-starfire.c
index 5be98bfd7ed3..987b5ec0ae56 100644
--- a/drivers/rtc/rtc-starfire.c
+++ b/drivers/rtc/rtc-starfire.c
@@ -39,8 +39,10 @@ static const struct rtc_class_ops starfire_rtc_ops = {
static int __init starfire_rtc_probe(struct platform_device *pdev)
{
- struct rtc_device *rtc = rtc_device_register("starfire", &pdev->dev,
- &starfire_rtc_ops, THIS_MODULE);
+ struct rtc_device *rtc;
+
+ rtc = devm_rtc_device_register(&pdev->dev, "starfire",
+ &starfire_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -51,10 +53,6 @@ static int __init starfire_rtc_probe(struct platform_device *pdev)
static int __exit starfire_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc);
-
return 0;
}
@@ -66,15 +64,4 @@ static struct platform_driver starfire_rtc_driver = {
.remove = __exit_p(starfire_rtc_remove),
};
-static int __init starfire_rtc_init(void)
-{
- return platform_driver_probe(&starfire_rtc_driver, starfire_rtc_probe);
-}
-
-static void __exit starfire_rtc_exit(void)
-{
- platform_driver_unregister(&starfire_rtc_driver);
-}
-
-module_init(starfire_rtc_init);
-module_exit(starfire_rtc_exit);
+module_platform_driver_probe(starfire_rtc_driver, starfire_rtc_probe);
diff --git a/drivers/rtc/rtc-stk17ta8.c b/drivers/rtc/rtc-stk17ta8.c
index 7e4a6f65cb91..af5e97e3f272 100644
--- a/drivers/rtc/rtc-stk17ta8.c
+++ b/drivers/rtc/rtc-stk17ta8.c
@@ -336,14 +336,13 @@ static int stk17ta8_rtc_probe(struct platform_device *pdev)
}
}
- pdata->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ pdata->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&stk17ta8_rtc_ops, THIS_MODULE);
if (IS_ERR(pdata->rtc))
return PTR_ERR(pdata->rtc);
ret = sysfs_create_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
- if (ret)
- rtc_device_unregister(pdata->rtc);
+
return ret;
}
@@ -352,7 +351,6 @@ static int stk17ta8_rtc_remove(struct platform_device *pdev)
struct rtc_plat_data *pdata = platform_get_drvdata(pdev);
sysfs_remove_bin_file(&pdev->dev.kobj, &stk17ta8_nvram_attr);
- rtc_device_unregister(pdata->rtc);
if (pdata->irq > 0)
writeb(0, pdata->ioaddr + RTC_INTERRUPTS);
return 0;
diff --git a/drivers/rtc/rtc-sun4v.c b/drivers/rtc/rtc-sun4v.c
index 59b5c2dcb58c..ce42e5fa9e09 100644
--- a/drivers/rtc/rtc-sun4v.c
+++ b/drivers/rtc/rtc-sun4v.c
@@ -81,8 +81,10 @@ static const struct rtc_class_ops sun4v_rtc_ops = {
static int __init sun4v_rtc_probe(struct platform_device *pdev)
{
- struct rtc_device *rtc = rtc_device_register("sun4v", &pdev->dev,
- &sun4v_rtc_ops, THIS_MODULE);
+ struct rtc_device *rtc;
+
+ rtc = devm_rtc_device_register(&pdev->dev, "sun4v",
+ &sun4v_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -92,9 +94,6 @@ static int __init sun4v_rtc_probe(struct platform_device *pdev)
static int __exit sun4v_rtc_remove(struct platform_device *pdev)
{
- struct rtc_device *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc);
return 0;
}
@@ -106,18 +105,7 @@ static struct platform_driver sun4v_rtc_driver = {
.remove = __exit_p(sun4v_rtc_remove),
};
-static int __init sun4v_rtc_init(void)
-{
- return platform_driver_probe(&sun4v_rtc_driver, sun4v_rtc_probe);
-}
-
-static void __exit sun4v_rtc_exit(void)
-{
- platform_driver_unregister(&sun4v_rtc_driver);
-}
-
-module_init(sun4v_rtc_init);
-module_exit(sun4v_rtc_exit);
+module_platform_driver_probe(sun4v_rtc_driver, sun4v_rtc_probe);
MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
MODULE_DESCRIPTION("SUN4V RTC driver");
diff --git a/drivers/rtc/rtc-tegra.c b/drivers/rtc/rtc-tegra.c
index 7c033756d6b5..7cb3da0e879a 100644
--- a/drivers/rtc/rtc-tegra.c
+++ b/drivers/rtc/rtc-tegra.c
@@ -26,6 +26,7 @@
#include <linux/delay.h>
#include <linux/rtc.h>
#include <linux/platform_device.h>
+#include <linux/pm.h>
/* set to 1 = busy every eight 32kHz clocks during copy of sec+msec to AHB */
#define TEGRA_RTC_REG_BUSY 0x004
@@ -309,7 +310,7 @@ static const struct of_device_id tegra_rtc_dt_match[] = {
};
MODULE_DEVICE_TABLE(of, tegra_rtc_dt_match);
-static int tegra_rtc_probe(struct platform_device *pdev)
+static int __init tegra_rtc_probe(struct platform_device *pdev)
{
struct tegra_rtc_info *info;
struct resource *res;
@@ -348,53 +349,34 @@ static int tegra_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- info->rtc_dev = rtc_device_register(
- pdev->name, &pdev->dev, &tegra_rtc_ops, THIS_MODULE);
+ info->rtc_dev = devm_rtc_device_register(dev_name(&pdev->dev),
+ &pdev->dev, &tegra_rtc_ops, THIS_MODULE);
if (IS_ERR(info->rtc_dev)) {
ret = PTR_ERR(info->rtc_dev);
- info->rtc_dev = NULL;
- dev_err(&pdev->dev,
- "Unable to register device (err=%d).\n",
+ dev_err(&pdev->dev, "Unable to register device (err=%d).\n",
ret);
return ret;
}
ret = devm_request_irq(&pdev->dev, info->tegra_rtc_irq,
tegra_rtc_irq_handler, IRQF_TRIGGER_HIGH,
- "rtc alarm", &pdev->dev);
+ dev_name(&pdev->dev), &pdev->dev);
if (ret) {
dev_err(&pdev->dev,
"Unable to request interrupt for device (err=%d).\n",
ret);
- goto err_dev_unreg;
+ return ret;
}
dev_notice(&pdev->dev, "Tegra internal Real Time Clock\n");
return 0;
-
-err_dev_unreg:
- rtc_device_unregister(info->rtc_dev);
-
- return ret;
}
-static int tegra_rtc_remove(struct platform_device *pdev)
+#ifdef CONFIG_PM_SLEEP
+static int tegra_rtc_suspend(struct device *dev)
{
- struct tegra_rtc_info *info = platform_get_drvdata(pdev);
-
- rtc_device_unregister(info->rtc_dev);
-
- platform_set_drvdata(pdev, NULL);
-
- return 0;
-}
-
-#ifdef CONFIG_PM
-static int tegra_rtc_suspend(struct platform_device *pdev, pm_message_t state)
-{
- struct device *dev = &pdev->dev;
- struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+ struct tegra_rtc_info *info = dev_get_drvdata(dev);
tegra_rtc_wait_while_busy(dev);
@@ -416,10 +398,9 @@ static int tegra_rtc_suspend(struct platform_device *pdev, pm_message_t state)
return 0;
}
-static int tegra_rtc_resume(struct platform_device *pdev)
+static int tegra_rtc_resume(struct device *dev)
{
- struct device *dev = &pdev->dev;
- struct tegra_rtc_info *info = platform_get_drvdata(pdev);
+ struct tegra_rtc_info *info = dev_get_drvdata(dev);
dev_vdbg(dev, "Resume (device_may_wakeup=%d)\n",
device_may_wakeup(dev));
@@ -431,6 +412,8 @@ static int tegra_rtc_resume(struct platform_device *pdev)
}
#endif
+static SIMPLE_DEV_PM_OPS(tegra_rtc_pm_ops, tegra_rtc_suspend, tegra_rtc_resume);
+
static void tegra_rtc_shutdown(struct platform_device *pdev)
{
dev_vdbg(&pdev->dev, "disabling interrupts.\n");
@@ -439,30 +422,16 @@ static void tegra_rtc_shutdown(struct platform_device *pdev)
MODULE_ALIAS("platform:tegra_rtc");
static struct platform_driver tegra_rtc_driver = {
- .remove = tegra_rtc_remove,
.shutdown = tegra_rtc_shutdown,
.driver = {
.name = "tegra_rtc",
.owner = THIS_MODULE,
.of_match_table = tegra_rtc_dt_match,
+ .pm = &tegra_rtc_pm_ops,
},
-#ifdef CONFIG_PM
- .suspend = tegra_rtc_suspend,
- .resume = tegra_rtc_resume,
-#endif
};
-static int __init tegra_rtc_init(void)
-{
- return platform_driver_probe(&tegra_rtc_driver, tegra_rtc_probe);
-}
-module_init(tegra_rtc_init);
-
-static void __exit tegra_rtc_exit(void)
-{
- platform_driver_unregister(&tegra_rtc_driver);
-}
-module_exit(tegra_rtc_exit);
+module_platform_driver_probe(tegra_rtc_driver, tegra_rtc_probe);
MODULE_AUTHOR("Jon Mayo <jmayo@nvidia.com>");
MODULE_DESCRIPTION("driver for Tegra internal RTC");
diff --git a/drivers/rtc/rtc-test.c b/drivers/rtc/rtc-test.c
index b92e0f6383e6..7746e65b93f2 100644
--- a/drivers/rtc/rtc-test.c
+++ b/drivers/rtc/rtc-test.c
@@ -99,8 +99,10 @@ static DEVICE_ATTR(irq, S_IRUGO | S_IWUSR, test_irq_show, test_irq_store);
static int test_probe(struct platform_device *plat_dev)
{
int err;
- struct rtc_device *rtc = rtc_device_register("test", &plat_dev->dev,
- &test_rtc_ops, THIS_MODULE);
+ struct rtc_device *rtc;
+
+ rtc = devm_rtc_device_register(&plat_dev->dev, "test",
+ &test_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc)) {
err = PTR_ERR(rtc);
return err;
@@ -115,15 +117,11 @@ static int test_probe(struct platform_device *plat_dev)
return 0;
err:
- rtc_device_unregister(rtc);
return err;
}
static int test_remove(struct platform_device *plat_dev)
{
- struct rtc_device *rtc = platform_get_drvdata(plat_dev);
-
- rtc_device_unregister(rtc);
device_remove_file(&plat_dev->dev, &dev_attr_irq);
return 0;
diff --git a/drivers/rtc/rtc-tile.c b/drivers/rtc/rtc-tile.c
index 62db4841078b..249b6531f119 100644
--- a/drivers/rtc/rtc-tile.c
+++ b/drivers/rtc/rtc-tile.c
@@ -80,8 +80,8 @@ static int tile_rtc_probe(struct platform_device *dev)
{
struct rtc_device *rtc;
- rtc = rtc_device_register("tile",
- &dev->dev, &tile_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&dev->dev, "tile",
+ &tile_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -96,11 +96,6 @@ static int tile_rtc_probe(struct platform_device *dev)
*/
static int tile_rtc_remove(struct platform_device *dev)
{
- struct rtc_device *rtc = platform_get_drvdata(dev);
-
- if (rtc)
- rtc_device_unregister(rtc);
-
platform_set_drvdata(dev, NULL);
return 0;
diff --git a/drivers/rtc/rtc-tps6586x.c b/drivers/rtc/rtc-tps6586x.c
index aab4e8c93622..badfea491a93 100644
--- a/drivers/rtc/rtc-tps6586x.c
+++ b/drivers/rtc/rtc-tps6586x.c
@@ -274,7 +274,7 @@ static int tps6586x_rtc_probe(struct platform_device *pdev)
}
platform_set_drvdata(pdev, rtc);
- rtc->rtc = rtc_device_register(dev_name(&pdev->dev), &pdev->dev,
+ rtc->rtc = devm_rtc_device_register(&pdev->dev, dev_name(&pdev->dev),
&tps6586x_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc)) {
ret = PTR_ERR(rtc->rtc);
@@ -306,12 +306,10 @@ fail_rtc_register:
static int tps6586x_rtc_remove(struct platform_device *pdev)
{
- struct tps6586x_rtc *rtc = platform_get_drvdata(pdev);
struct device *tps_dev = to_tps6586x_dev(&pdev->dev);
tps6586x_update(tps_dev, RTC_CTRL, 0,
RTC_ENABLE | OSC_SRC_SEL | PRE_BYPASS | CL_SEL_MASK);
- rtc_device_unregister(rtc->rtc);
return 0;
}
@@ -335,9 +333,8 @@ static int tps6586x_rtc_resume(struct device *dev)
}
#endif
-static const struct dev_pm_ops tps6586x_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(tps6586x_rtc_suspend, tps6586x_rtc_resume)
-};
+static SIMPLE_DEV_PM_OPS(tps6586x_pm_ops, tps6586x_rtc_suspend,
+ tps6586x_rtc_resume);
static struct platform_driver tps6586x_rtc_driver = {
.driver = {
diff --git a/drivers/rtc/rtc-tps65910.c b/drivers/rtc/rtc-tps65910.c
index 8bd8115329b5..26b8bd252769 100644
--- a/drivers/rtc/rtc-tps65910.c
+++ b/drivers/rtc/rtc-tps65910.c
@@ -276,7 +276,7 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
tps_rtc->irq = irq;
device_set_wakeup_capable(&pdev->dev, 1);
- tps_rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ tps_rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&tps65910_rtc_ops, THIS_MODULE);
if (IS_ERR(tps_rtc->rtc)) {
ret = PTR_ERR(tps_rtc->rtc);
@@ -295,12 +295,8 @@ static int tps65910_rtc_probe(struct platform_device *pdev)
*/
static int tps65910_rtc_remove(struct platform_device *pdev)
{
- /* leave rtc running, but disable irqs */
- struct tps65910_rtc *tps_rtc = platform_get_drvdata(pdev);
-
tps65910_rtc_alarm_irq_enable(&pdev->dev, 0);
- rtc_device_unregister(tps_rtc->rtc);
return 0;
}
@@ -324,9 +320,8 @@ static int tps65910_rtc_resume(struct device *dev)
}
#endif
-static const struct dev_pm_ops tps65910_rtc_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(tps65910_rtc_suspend, tps65910_rtc_resume)
-};
+static SIMPLE_DEV_PM_OPS(tps65910_rtc_pm_ops, tps65910_rtc_suspend,
+ tps65910_rtc_resume);
static struct platform_driver tps65910_rtc_driver = {
.probe = tps65910_rtc_probe,
diff --git a/drivers/rtc/rtc-tps80031.c b/drivers/rtc/rtc-tps80031.c
index 9aaf8aaebae9..72662eafb938 100644
--- a/drivers/rtc/rtc-tps80031.c
+++ b/drivers/rtc/rtc-tps80031.c
@@ -277,7 +277,7 @@ static int tps80031_rtc_probe(struct platform_device *pdev)
return ret;
}
- rtc->rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc->rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&tps80031_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc->rtc)) {
ret = PTR_ERR(rtc->rtc);
@@ -292,7 +292,6 @@ static int tps80031_rtc_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "request IRQ:%d failed, err = %d\n",
rtc->irq, ret);
- rtc_device_unregister(rtc->rtc);
return ret;
}
device_set_wakeup_capable(&pdev->dev, 1);
@@ -301,9 +300,6 @@ static int tps80031_rtc_probe(struct platform_device *pdev)
static int tps80031_rtc_remove(struct platform_device *pdev)
{
- struct tps80031_rtc *rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(rtc->rtc);
return 0;
}
@@ -327,9 +323,8 @@ static int tps80031_rtc_resume(struct device *dev)
};
#endif
-static const struct dev_pm_ops tps80031_pm_ops = {
- SET_SYSTEM_SLEEP_PM_OPS(tps80031_rtc_suspend, tps80031_rtc_resume)
-};
+static SIMPLE_DEV_PM_OPS(tps80031_pm_ops, tps80031_rtc_suspend,
+ tps80031_rtc_resume);
static struct platform_driver tps80031_rtc_driver = {
.driver = {
diff --git a/drivers/rtc/rtc-tx4939.c b/drivers/rtc/rtc-tx4939.c
index a12bfac49d36..f9a0677e4e3b 100644
--- a/drivers/rtc/rtc-tx4939.c
+++ b/drivers/rtc/rtc-tx4939.c
@@ -268,14 +268,13 @@ static int __init tx4939_rtc_probe(struct platform_device *pdev)
if (devm_request_irq(&pdev->dev, irq, tx4939_rtc_interrupt,
0, pdev->name, &pdev->dev) < 0)
return -EBUSY;
- rtc = rtc_device_register(pdev->name, &pdev->dev,
+ rtc = devm_rtc_device_register(&pdev->dev, pdev->name,
&tx4939_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
pdata->rtc = rtc;
ret = sysfs_create_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
- if (ret)
- rtc_device_unregister(rtc);
+
return ret;
}
@@ -284,7 +283,6 @@ static int __exit tx4939_rtc_remove(struct platform_device *pdev)
struct tx4939rtc_plat_data *pdata = platform_get_drvdata(pdev);
sysfs_remove_bin_file(&pdev->dev.kobj, &tx4939_rtc_nvram_attr);
- rtc_device_unregister(pdata->rtc);
spin_lock_irq(&pdata->lock);
tx4939_rtc_cmd(pdata->rtcreg, TX4939_RTCCTL_COMMAND_NOP);
spin_unlock_irq(&pdata->lock);
@@ -299,18 +297,7 @@ static struct platform_driver tx4939_rtc_driver = {
},
};
-static int __init tx4939rtc_init(void)
-{
- return platform_driver_probe(&tx4939_rtc_driver, tx4939_rtc_probe);
-}
-
-static void __exit tx4939rtc_exit(void)
-{
- platform_driver_unregister(&tx4939_rtc_driver);
-}
-
-module_init(tx4939rtc_init);
-module_exit(tx4939rtc_exit);
+module_platform_driver_probe(tx4939_rtc_driver, tx4939_rtc_probe);
MODULE_AUTHOR("Atsushi Nemoto <anemo@mba.ocn.ne.jp>");
MODULE_DESCRIPTION("TX4939 internal RTC driver");
diff --git a/drivers/rtc/rtc-v3020.c b/drivers/rtc/rtc-v3020.c
index bca5d677bc85..600798cd4d0c 100644
--- a/drivers/rtc/rtc-v3020.c
+++ b/drivers/rtc/rtc-v3020.c
@@ -49,18 +49,13 @@ struct v3020_chip_ops {
#define V3020_RD 2
#define V3020_IO 3
-struct v3020_gpio {
- const char *name;
- unsigned int gpio;
-};
-
struct v3020 {
/* MMIO access */
void __iomem *ioaddress;
int leftshift;
/* GPIO access */
- struct v3020_gpio *gpio;
+ struct gpio *gpio;
struct v3020_chip_ops *ops;
@@ -107,48 +102,40 @@ static struct v3020_chip_ops v3020_mmio_ops = {
.write_bit = v3020_mmio_write_bit,
};
-static struct v3020_gpio v3020_gpio[] = {
- { "RTC CS", 0 },
- { "RTC WR", 0 },
- { "RTC RD", 0 },
- { "RTC IO", 0 },
+static struct gpio v3020_gpio[] = {
+ { 0, GPIOF_OUT_INIT_HIGH, "RTC CS"},
+ { 0, GPIOF_OUT_INIT_HIGH, "RTC WR"},
+ { 0, GPIOF_OUT_INIT_HIGH, "RTC RD"},
+ { 0, GPIOF_OUT_INIT_HIGH, "RTC IO"},
};
static int v3020_gpio_map(struct v3020 *chip, struct platform_device *pdev,
struct v3020_platform_data *pdata)
{
- int i, err;
+ int err;
v3020_gpio[V3020_CS].gpio = pdata->gpio_cs;
v3020_gpio[V3020_WR].gpio = pdata->gpio_wr;
v3020_gpio[V3020_RD].gpio = pdata->gpio_rd;
v3020_gpio[V3020_IO].gpio = pdata->gpio_io;
- for (i = 0; i < ARRAY_SIZE(v3020_gpio); i++) {
- err = gpio_request(v3020_gpio[i].gpio, v3020_gpio[i].name);
- if (err)
- goto err_request;
-
- gpio_direction_output(v3020_gpio[i].gpio, 1);
- }
+ err = gpio_request_array(v3020_gpio, ARRAY_SIZE(v3020_gpio));
+ if (err)
+ goto err_request;
chip->gpio = v3020_gpio;
return 0;
err_request:
- while (--i >= 0)
- gpio_free(v3020_gpio[i].gpio);
+ gpio_free_array(v3020_gpio, ARRAY_SIZE(v3020_gpio));
return err;
}
static void v3020_gpio_unmap(struct v3020 *chip)
{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(v3020_gpio); i++)
- gpio_free(v3020_gpio[i].gpio);
+ gpio_free_array(v3020_gpio, ARRAY_SIZE(v3020_gpio));
}
static void v3020_gpio_write_bit(struct v3020 *chip, unsigned char bit)
diff --git a/drivers/rtc/rtc-vt8500.c b/drivers/rtc/rtc-vt8500.c
index a000bc0a8bff..d89efee6d29e 100644
--- a/drivers/rtc/rtc-vt8500.c
+++ b/drivers/rtc/rtc-vt8500.c
@@ -252,7 +252,7 @@ static int vt8500_rtc_probe(struct platform_device *pdev)
writel(VT8500_RTC_CR_ENABLE,
vt8500_rtc->regbase + VT8500_RTC_CR);
- vt8500_rtc->rtc = rtc_device_register("vt8500-rtc", &pdev->dev,
+ vt8500_rtc->rtc = devm_rtc_device_register(&pdev->dev, "vt8500-rtc",
&vt8500_rtc_ops, THIS_MODULE);
if (IS_ERR(vt8500_rtc->rtc)) {
ret = PTR_ERR(vt8500_rtc->rtc);
@@ -266,13 +266,11 @@ static int vt8500_rtc_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "can't get irq %i, err %d\n",
vt8500_rtc->irq_alarm, ret);
- goto err_unreg;
+ goto err_return;
}
return 0;
-err_unreg:
- rtc_device_unregister(vt8500_rtc->rtc);
err_return:
return ret;
}
@@ -281,8 +279,6 @@ static int vt8500_rtc_remove(struct platform_device *pdev)
{
struct vt8500_rtc *vt8500_rtc = platform_get_drvdata(pdev);
- rtc_device_unregister(vt8500_rtc->rtc);
-
/* Disable alarm matching */
writel(0, vt8500_rtc->regbase + VT8500_RTC_IS);
diff --git a/drivers/rtc/rtc-wm831x.c b/drivers/rtc/rtc-wm831x.c
index 2f0ac7b30a0c..8d65b94e5a7e 100644
--- a/drivers/rtc/rtc-wm831x.c
+++ b/drivers/rtc/rtc-wm831x.c
@@ -436,7 +436,7 @@ static int wm831x_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- wm831x_rtc->rtc = rtc_device_register("wm831x", &pdev->dev,
+ wm831x_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm831x",
&wm831x_rtc_ops, THIS_MODULE);
if (IS_ERR(wm831x_rtc->rtc)) {
ret = PTR_ERR(wm831x_rtc->rtc);
@@ -462,10 +462,6 @@ err:
static int wm831x_rtc_remove(struct platform_device *pdev)
{
- struct wm831x_rtc *wm831x_rtc = platform_get_drvdata(pdev);
-
- rtc_device_unregister(wm831x_rtc->rtc);
-
return 0;
}
diff --git a/drivers/rtc/rtc-wm8350.c b/drivers/rtc/rtc-wm8350.c
index 8ad86ae0d30f..fa247deb9cf4 100644
--- a/drivers/rtc/rtc-wm8350.c
+++ b/drivers/rtc/rtc-wm8350.c
@@ -339,7 +339,7 @@ static const struct rtc_class_ops wm8350_rtc_ops = {
.alarm_irq_enable = wm8350_rtc_alarm_irq_enable,
};
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int wm8350_rtc_suspend(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
@@ -375,10 +375,6 @@ static int wm8350_rtc_resume(struct device *dev)
return 0;
}
-
-#else
-#define wm8350_rtc_suspend NULL
-#define wm8350_rtc_resume NULL
#endif
static int wm8350_rtc_probe(struct platform_device *pdev)
@@ -439,8 +435,8 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
device_init_wakeup(&pdev->dev, 1);
- wm_rtc->rtc = rtc_device_register("wm8350", &pdev->dev,
- &wm8350_rtc_ops, THIS_MODULE);
+ wm_rtc->rtc = devm_rtc_device_register(&pdev->dev, "wm8350",
+ &wm8350_rtc_ops, THIS_MODULE);
if (IS_ERR(wm_rtc->rtc)) {
ret = PTR_ERR(wm_rtc->rtc);
dev_err(&pdev->dev, "failed to register RTC: %d\n", ret);
@@ -462,20 +458,15 @@ static int wm8350_rtc_probe(struct platform_device *pdev)
static int wm8350_rtc_remove(struct platform_device *pdev)
{
struct wm8350 *wm8350 = platform_get_drvdata(pdev);
- struct wm8350_rtc *wm_rtc = &wm8350->rtc;
wm8350_free_irq(wm8350, WM8350_IRQ_RTC_SEC, wm8350);
wm8350_free_irq(wm8350, WM8350_IRQ_RTC_ALM, wm8350);
- rtc_device_unregister(wm_rtc->rtc);
-
return 0;
}
-static struct dev_pm_ops wm8350_rtc_pm_ops = {
- .suspend = wm8350_rtc_suspend,
- .resume = wm8350_rtc_resume,
-};
+static SIMPLE_DEV_PM_OPS(wm8350_rtc_pm_ops, wm8350_rtc_suspend,
+ wm8350_rtc_resume);
static struct platform_driver wm8350_rtc_driver = {
.probe = wm8350_rtc_probe,
diff --git a/drivers/rtc/rtc-x1205.c b/drivers/rtc/rtc-x1205.c
index f36e59c6bc01..fa9b0679fb60 100644
--- a/drivers/rtc/rtc-x1205.c
+++ b/drivers/rtc/rtc-x1205.c
@@ -630,8 +630,8 @@ static int x1205_probe(struct i2c_client *client,
dev_info(&client->dev, "chip found, driver version " DRV_VERSION "\n");
- rtc = rtc_device_register(x1205_driver.driver.name, &client->dev,
- &x1205_rtc_ops, THIS_MODULE);
+ rtc = devm_rtc_device_register(&client->dev, x1205_driver.driver.name,
+ &x1205_rtc_ops, THIS_MODULE);
if (IS_ERR(rtc))
return PTR_ERR(rtc);
@@ -653,21 +653,13 @@ static int x1205_probe(struct i2c_client *client,
err = x1205_sysfs_register(&client->dev);
if (err)
- goto exit_devreg;
+ return err;
return 0;
-
-exit_devreg:
- rtc_device_unregister(rtc);
-
- return err;
}
static int x1205_remove(struct i2c_client *client)
{
- struct rtc_device *rtc = i2c_get_clientdata(client);
-
- rtc_device_unregister(rtc);
x1205_sysfs_unregister(&client->dev);
return 0;
}
diff --git a/drivers/scsi/fcoe/fcoe_ctlr.c b/drivers/scsi/fcoe/fcoe_ctlr.c
index 08c3bc398da2..43bb55827f30 100644
--- a/drivers/scsi/fcoe/fcoe_ctlr.c
+++ b/drivers/scsi/fcoe/fcoe_ctlr.c
@@ -2161,7 +2161,7 @@ static void fcoe_ctlr_vn_restart(struct fcoe_ctlr *fip)
if (fip->probe_tries < FIP_VN_RLIM_COUNT) {
fip->probe_tries++;
- wait = random32() % FIP_VN_PROBE_WAIT;
+ wait = prandom_u32() % FIP_VN_PROBE_WAIT;
} else
wait = FIP_VN_RLIM_INT;
mod_timer(&fip->timer, jiffies + msecs_to_jiffies(wait));
@@ -2794,7 +2794,7 @@ static void fcoe_ctlr_vn_timeout(struct fcoe_ctlr *fip)
fcoe_all_vn2vn, 0);
fip->port_ka_time = jiffies +
msecs_to_jiffies(FIP_VN_BEACON_INT +
- (random32() % FIP_VN_BEACON_FUZZ));
+ (prandom_u32() % FIP_VN_BEACON_FUZZ));
}
if (time_before(fip->port_ka_time, next_time))
next_time = fip->port_ka_time;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d7096ad94d3f..bfda18467ee6 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -1732,7 +1732,7 @@ lpfc_check_pending_fcoe_event(struct lpfc_hba *phba, uint8_t unreg_fcf)
* use through a sequence of @fcf_cnt eligible FCF records with equal
* probability. To perform integer manunipulation of random numbers with
* size unit32_t, the lower 16 bits of the 32-bit random number returned
- * from random32() are taken as the random random number generated.
+ * from prandom_u32() are taken as the random random number generated.
*
* Returns true when outcome is for the newly read FCF record should be
* chosen; otherwise, return false when outcome is for keeping the previously
@@ -1744,7 +1744,7 @@ lpfc_sli4_new_fcf_random_select(struct lpfc_hba *phba, uint32_t fcf_cnt)
uint32_t rand_num;
/* Get 16-bit uniform random number */
- rand_num = (0xFFFF & random32());
+ rand_num = 0xFFFF & prandom_u32();
/* Decision with probability 1/fcf_cnt */
if ((fcf_cnt * rand_num) < 0xFFFF)
@@ -2380,7 +2380,7 @@ lpfc_mbx_cmpl_fcf_scan_read_fcf_rec(struct lpfc_hba *phba, LPFC_MBOXQ_t *mboxq)
phba->fcf.eligible_fcf_cnt = 1;
/* Seeding the random number generator for random selection */
seed = (uint32_t)(0xFFFFFFFF & jiffies);
- srandom32(seed);
+ prandom_seed(seed);
}
spin_unlock_irq(&phba->hbalock);
goto read_next_fcf;
diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
index 1d82eef4e1eb..e44d47e23fe8 100644
--- a/drivers/scsi/qla2xxx/qla_attr.c
+++ b/drivers/scsi/qla2xxx/qla_attr.c
@@ -1939,10 +1939,13 @@ qla24xx_vport_delete(struct fc_vport *fc_vport)
}
/* No pending activities shall be there on the vha now */
- if (ql2xextended_error_logging & ql_dbg_user)
- msleep(random32()%10); /* Just to see if something falls on
- * the net we have placed below */
-
+ if (ql2xextended_error_logging & ql_dbg_user) {
+ /*
+ * Just to see if something falls on the net we have placed
+ * below
+ */
+ msleep(prandom_u32() % 10);
+ }
BUG_ON(atomic_read(&vha->vref_count));
qla2x00_free_fcports(vha);
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 9f0c46547459..df5e961484e1 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -35,6 +35,7 @@ static int sg_version_num = 30534; /* 2 digits for each component */
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/mm.h>
+#include <linux/aio.h>
#include <linux/errno.h>
#include <linux/mtio.h>
#include <linux/ioctl.h>
diff --git a/drivers/staging/android/logger.c b/drivers/staging/android/logger.c
index b14a55742559..b040200a5a55 100644
--- a/drivers/staging/android/logger.c
+++ b/drivers/staging/android/logger.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/vmalloc.h>
+#include <linux/aio.h>
#include "logger.h"
#include <asm/ioctls.h>
diff --git a/drivers/staging/zcache/Kconfig b/drivers/staging/zcache/Kconfig
index c3b8a10e170f..d30722fe5730 100644
--- a/drivers/staging/zcache/Kconfig
+++ b/drivers/staging/zcache/Kconfig
@@ -1,5 +1,5 @@
config ZCACHE
- bool "Dynamic compression of swap pages and clean pagecache pages"
+ tristate "Dynamic compression of swap pages and clean pagecache pages"
depends on CRYPTO=y && SWAP=y && CLEANCACHE && FRONTSWAP
select CRYPTO_LZO
default n
@@ -19,8 +19,8 @@ config ZCACHE_DEBUG
how zcache is doing. You probably want to set this to 'N'.
config RAMSTER
- bool "Cross-machine RAM capacity sharing, aka peer-to-peer tmem"
- depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE=y
+ tristate "Cross-machine RAM capacity sharing, aka peer-to-peer tmem"
+ depends on CONFIGFS_FS=y && SYSFS=y && !HIGHMEM && ZCACHE
depends on NET
# must ensure struct page is 8-byte aligned
select HAVE_ALIGNED_STRUCT_PAGE if !64BIT
diff --git a/drivers/staging/zcache/ramster.h b/drivers/staging/zcache/ramster.h
index 1b71aea2ff62..e1f91d5a0f6a 100644
--- a/drivers/staging/zcache/ramster.h
+++ b/drivers/staging/zcache/ramster.h
@@ -11,10 +11,14 @@
#ifndef _ZCACHE_RAMSTER_H_
#define _ZCACHE_RAMSTER_H_
+#ifdef CONFIG_RAMSTER_MODULE
+#define CONFIG_RAMSTER
+#endif
+
#ifdef CONFIG_RAMSTER
#include "ramster/ramster.h"
#else
-static inline void ramster_init(bool x, bool y, bool z)
+static inline void ramster_init(bool x, bool y, bool z, bool w)
{
}
diff --git a/drivers/staging/zcache/ramster/nodemanager.c b/drivers/staging/zcache/ramster/nodemanager.c
index c0f48158735d..2cfe93342c0d 100644
--- a/drivers/staging/zcache/ramster/nodemanager.c
+++ b/drivers/staging/zcache/ramster/nodemanager.c
@@ -949,7 +949,7 @@ static void __exit exit_r2nm(void)
r2hb_exit();
}
-static int __init init_r2nm(void)
+int r2nm_init(void)
{
int ret = -1;
@@ -986,10 +986,11 @@ out_r2hb:
out:
return ret;
}
+EXPORT_SYMBOL_GPL(r2nm_init);
MODULE_AUTHOR("Oracle");
MODULE_LICENSE("GPL");
-/* module_init(init_r2nm) */
-late_initcall(init_r2nm);
-/* module_exit(exit_r2nm) */
+#ifndef CONFIG_RAMSTER_MODULE
+late_initcall(r2nm_init);
+#endif
diff --git a/drivers/staging/zcache/ramster/ramster.c b/drivers/staging/zcache/ramster/ramster.c
index bf96a1cbf7c1..4f715c791188 100644
--- a/drivers/staging/zcache/ramster/ramster.c
+++ b/drivers/staging/zcache/ramster/ramster.c
@@ -92,7 +92,7 @@ static ssize_t ramster_remote_page_flushes_failed;
#include <linux/debugfs.h>
#define zdfs debugfs_create_size_t
#define zdfs64 debugfs_create_u64
-static int __init ramster_debugfs_init(void)
+static int ramster_debugfs_init(void)
{
struct dentry *root = debugfs_create_dir("ramster", NULL);
if (root == NULL)
@@ -191,6 +191,7 @@ int ramster_do_preload_flnode(struct tmem_pool *pool)
kmem_cache_free(ramster_flnode_cache, flnode);
return ret;
}
+EXPORT_SYMBOL_GPL(ramster_do_preload_flnode);
/*
* Called by the message handler after a (still compressed) page has been
@@ -458,6 +459,7 @@ void *ramster_pampd_free(void *pampd, struct tmem_pool *pool,
}
return local_pampd;
}
+EXPORT_SYMBOL_GPL(ramster_pampd_free);
void ramster_count_foreign_pages(bool eph, int count)
{
@@ -489,6 +491,7 @@ void ramster_count_foreign_pages(bool eph, int count)
ramster_foreign_pers_pages = c;
}
}
+EXPORT_SYMBOL_GPL(ramster_count_foreign_pages);
/*
* For now, just push over a few pages every few seconds to
@@ -674,7 +677,7 @@ requeue:
ramster_remotify_queue_delayed_work(HZ);
}
-void __init ramster_remotify_init(void)
+void ramster_remotify_init(void)
{
unsigned long n = 60UL;
ramster_remotify_workqueue =
@@ -849,8 +852,10 @@ static bool frontswap_selfshrinking __read_mostly;
static void selfshrink_process(struct work_struct *work);
static DECLARE_DELAYED_WORK(selfshrink_worker, selfshrink_process);
+#ifndef CONFIG_RAMSTER_MODULE
/* Enable/disable with kernel boot option. */
static bool use_frontswap_selfshrink __initdata = true;
+#endif
/*
* The default values for the following parameters were deemed reasonable
@@ -905,6 +910,7 @@ static void frontswap_selfshrink(void)
frontswap_shrink(tgt_frontswap_pages);
}
+#ifndef CONFIG_RAMSTER_MODULE
static int __init ramster_nofrontswap_selfshrink_setup(char *s)
{
use_frontswap_selfshrink = false;
@@ -912,6 +918,7 @@ static int __init ramster_nofrontswap_selfshrink_setup(char *s)
}
__setup("noselfshrink", ramster_nofrontswap_selfshrink_setup);
+#endif
static void selfshrink_process(struct work_struct *work)
{
@@ -930,6 +937,7 @@ void ramster_cpu_up(int cpu)
per_cpu(ramster_remoteputmem1, cpu) = p1;
per_cpu(ramster_remoteputmem2, cpu) = p2;
}
+EXPORT_SYMBOL_GPL(ramster_cpu_up);
void ramster_cpu_down(int cpu)
{
@@ -945,6 +953,7 @@ void ramster_cpu_down(int cpu)
kp->flnode = NULL;
}
}
+EXPORT_SYMBOL_GPL(ramster_cpu_down);
void ramster_register_pamops(struct tmem_pamops *pamops)
{
@@ -955,9 +964,11 @@ void ramster_register_pamops(struct tmem_pamops *pamops)
pamops->repatriate = ramster_pampd_repatriate;
pamops->repatriate_preload = ramster_pampd_repatriate_preload;
}
+EXPORT_SYMBOL_GPL(ramster_register_pamops);
-void __init ramster_init(bool cleancache, bool frontswap,
- bool frontswap_exclusive_gets)
+void ramster_init(bool cleancache, bool frontswap,
+ bool frontswap_exclusive_gets,
+ bool frontswap_selfshrink)
{
int ret = 0;
@@ -972,10 +983,17 @@ void __init ramster_init(bool cleancache, bool frontswap,
if (ret)
pr_err("ramster: can't create sysfs for ramster\n");
(void)r2net_register_handlers();
+#ifdef CONFIG_RAMSTER_MODULE
+ ret = r2nm_init();
+ if (ret)
+ pr_err("ramster: can't init r2net\n");
+ frontswap_selfshrinking = frontswap_selfshrink;
+#else
+ frontswap_selfshrinking = use_frontswap_selfshrink;
+#endif
INIT_LIST_HEAD(&ramster_rem_op_list);
ramster_flnode_cache = kmem_cache_create("ramster_flnode",
sizeof(struct flushlist_node), 0, 0, NULL);
- frontswap_selfshrinking = use_frontswap_selfshrink;
if (frontswap_selfshrinking) {
pr_info("ramster: Initializing frontswap selfshrink driver.\n");
schedule_delayed_work(&selfshrink_worker,
@@ -983,3 +1001,4 @@ void __init ramster_init(bool cleancache, bool frontswap,
}
ramster_remotify_init();
}
+EXPORT_SYMBOL_GPL(ramster_init);
diff --git a/drivers/staging/zcache/ramster/ramster.h b/drivers/staging/zcache/ramster/ramster.h
index 12ae56f09ca4..6d41a7a772e3 100644
--- a/drivers/staging/zcache/ramster/ramster.h
+++ b/drivers/staging/zcache/ramster/ramster.h
@@ -147,7 +147,7 @@ extern int r2net_register_handlers(void);
extern int r2net_remote_target_node_set(int);
extern int ramster_remotify_pageframe(bool);
-extern void ramster_init(bool, bool, bool);
+extern void ramster_init(bool, bool, bool, bool);
extern void ramster_register_pamops(struct tmem_pamops *);
extern int ramster_localify(int, struct tmem_oid *oidp, uint32_t, char *,
unsigned int, void *);
diff --git a/drivers/staging/zcache/ramster/ramster_nodemanager.h b/drivers/staging/zcache/ramster/ramster_nodemanager.h
index 49f879d943ab..dbaae34ea613 100644
--- a/drivers/staging/zcache/ramster/ramster_nodemanager.h
+++ b/drivers/staging/zcache/ramster/ramster_nodemanager.h
@@ -36,4 +36,6 @@
/* host name, group name, cluster name all 64 bytes */
#define R2NM_MAX_NAME_LEN 64 /* __NEW_UTS_LEN */
+extern int r2nm_init(void);
+
#endif /* _RAMSTER_NODEMANAGER_H */
diff --git a/drivers/staging/zcache/tmem.c b/drivers/staging/zcache/tmem.c
index a2b7e03b6062..d7e51e4152eb 100644
--- a/drivers/staging/zcache/tmem.c
+++ b/drivers/staging/zcache/tmem.c
@@ -35,7 +35,8 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/atomic.h>
-#ifdef CONFIG_RAMSTER
+#include <linux/export.h>
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
#include <linux/delay.h>
#endif
@@ -641,6 +642,7 @@ void *tmem_localify_get_pampd(struct tmem_pool *pool, struct tmem_oid *oidp,
/* note, hashbucket remains locked */
return pampd;
}
+EXPORT_SYMBOL_GPL(tmem_localify_get_pampd);
void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,
void *pampd, void *saved_hb, bool delete)
@@ -658,6 +660,7 @@ void tmem_localify_finish(struct tmem_obj *obj, uint32_t index,
}
spin_unlock(&hb->lock);
}
+EXPORT_SYMBOL_GPL(tmem_localify_finish);
/*
* For ramster only. Helper function to support asynchronous tmem_get.
@@ -719,6 +722,7 @@ out:
spin_unlock(&hb->lock);
return ret;
}
+EXPORT_SYMBOL_GPL(tmem_replace);
#endif
/*
diff --git a/drivers/staging/zcache/tmem.h b/drivers/staging/zcache/tmem.h
index adbe5a8f28aa..d128ce290f1f 100644
--- a/drivers/staging/zcache/tmem.h
+++ b/drivers/staging/zcache/tmem.h
@@ -126,7 +126,7 @@ static inline unsigned tmem_oid_hash(struct tmem_oid *oidp)
TMEM_HASH_BUCKET_BITS);
}
-#ifdef CONFIG_RAMSTER
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
struct tmem_xhandle {
uint8_t client_id;
uint8_t xh_data_cksum;
@@ -171,7 +171,7 @@ struct tmem_obj {
unsigned int objnode_tree_height;
unsigned long objnode_count;
long pampd_count;
-#ifdef CONFIG_RAMSTER
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
/*
* for current design of ramster, all pages belonging to
* an object reside on the same remotenode and extra is
@@ -215,7 +215,7 @@ struct tmem_pamops {
uint32_t);
void (*free)(void *, struct tmem_pool *,
struct tmem_oid *, uint32_t, bool);
-#ifdef CONFIG_RAMSTER
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
void (*new_obj)(struct tmem_obj *);
void (*free_obj)(struct tmem_pool *, struct tmem_obj *, bool);
void *(*repatriate_preload)(void *, struct tmem_pool *,
@@ -247,7 +247,7 @@ extern int tmem_flush_page(struct tmem_pool *, struct tmem_oid *,
extern int tmem_flush_object(struct tmem_pool *, struct tmem_oid *);
extern int tmem_destroy_pool(struct tmem_pool *);
extern void tmem_new_pool(struct tmem_pool *, uint32_t);
-#ifdef CONFIG_RAMSTER
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
extern int tmem_replace(struct tmem_pool *, struct tmem_oid *, uint32_t index,
void *);
extern void *tmem_localify_get_pampd(struct tmem_pool *, struct tmem_oid *,
diff --git a/drivers/staging/zcache/zcache-main.c b/drivers/staging/zcache/zcache-main.c
index 86ead8d96796..00c3e9aad74f 100644
--- a/drivers/staging/zcache/zcache-main.c
+++ b/drivers/staging/zcache/zcache-main.c
@@ -37,8 +37,10 @@
#include "debug.h"
#ifdef CONFIG_RAMSTER
static bool ramster_enabled __read_mostly;
+static int disable_frontswap_selfshrink;
#else
#define ramster_enabled false
+#define disable_frontswap_selfshrink 0
#endif
#ifndef __PG_WAS_ACTIVE
@@ -75,8 +77,12 @@ static char *namestr __read_mostly = "zcache";
(__GFP_FS | __GFP_NORETRY | __GFP_NOWARN | __GFP_NOMEMALLOC)
/* crypto API for zcache */
+#ifdef CONFIG_ZCACHE_MODULE
+static char *zcache_comp_name = "lzo";
+#else
#define ZCACHE_COMP_NAME_SZ CRYPTO_MAX_ALG_NAME
static char zcache_comp_name[ZCACHE_COMP_NAME_SZ] __read_mostly;
+#endif
static struct crypto_comp * __percpu *zcache_comp_pcpu_tfms __read_mostly;
enum comp_op {
@@ -1509,9 +1515,9 @@ static struct cleancache_ops zcache_cleancache_ops = {
.init_fs = zcache_cleancache_init_fs
};
-struct cleancache_ops zcache_cleancache_register_ops(void)
+struct cleancache_ops *zcache_cleancache_register_ops(void)
{
- struct cleancache_ops old_ops =
+ struct cleancache_ops *old_ops =
cleancache_register_ops(&zcache_cleancache_ops);
return old_ops;
@@ -1640,9 +1646,9 @@ static struct frontswap_ops zcache_frontswap_ops = {
.init = zcache_frontswap_init
};
-struct frontswap_ops zcache_frontswap_register_ops(void)
+struct frontswap_ops *zcache_frontswap_register_ops(void)
{
- struct frontswap_ops old_ops =
+ struct frontswap_ops *old_ops =
frontswap_register_ops(&zcache_frontswap_ops);
return old_ops;
@@ -1654,6 +1660,7 @@ struct frontswap_ops zcache_frontswap_register_ops(void)
* OR NOTHING HAPPENS!
*/
+#ifndef CONFIG_ZCACHE_MODULE
static int __init enable_zcache(char *s)
{
zcache_enabled = true;
@@ -1720,18 +1727,27 @@ static int __init enable_zcache_compressor(char *s)
return 1;
}
__setup("zcache=", enable_zcache_compressor);
+#endif
-static int __init zcache_comp_init(void)
+static int zcache_comp_init(void)
{
int ret = 0;
/* check crypto algorithm */
+#ifdef CONFIG_ZCACHE_MODULE
+ ret = crypto_has_comp(zcache_comp_name, 0, 0);
+ if (!ret) {
+ ret = -1;
+ goto out;
+ }
+#else
if (*zcache_comp_name != '\0') {
ret = crypto_has_comp(zcache_comp_name, 0, 0);
if (!ret)
pr_info("zcache: %s not supported\n",
zcache_comp_name);
+ goto out;
}
if (!ret)
strcpy(zcache_comp_name, "lzo");
@@ -1740,6 +1756,7 @@ static int __init zcache_comp_init(void)
ret = 1;
goto out;
}
+#endif
pr_info("zcache: using %s compressor\n", zcache_comp_name);
/* alloc percpu transforms */
@@ -1751,10 +1768,13 @@ out:
return ret;
}
-static int __init zcache_init(void)
+static int zcache_init(void)
{
int ret = 0;
+#ifdef CONFIG_ZCACHE_MODULE
+ zcache_enabled = 1;
+#endif
if (ramster_enabled) {
namestr = "ramster";
ramster_register_pamops(&zcache_pamops);
@@ -1795,7 +1815,7 @@ static int __init zcache_init(void)
}
zbud_init();
if (zcache_enabled && !disable_cleancache) {
- struct cleancache_ops old_ops;
+ struct cleancache_ops *old_ops;
register_shrinker(&zcache_shrinker);
old_ops = zcache_cleancache_register_ops();
@@ -1805,11 +1825,11 @@ static int __init zcache_init(void)
pr_info("%s: cleancache: ignorenonactive = %d\n",
namestr, !disable_cleancache_ignore_nonactive);
#endif
- if (old_ops.init_fs != NULL)
+ if (old_ops != NULL)
pr_warn("%s: cleancache_ops overridden\n", namestr);
}
if (zcache_enabled && !disable_frontswap) {
- struct frontswap_ops old_ops;
+ struct frontswap_ops *old_ops;
old_ops = zcache_frontswap_register_ops();
if (frontswap_has_exclusive_gets)
@@ -1821,14 +1841,36 @@ static int __init zcache_init(void)
namestr, frontswap_has_exclusive_gets,
!disable_frontswap_ignore_nonactive);
#endif
- if (old_ops.init != NULL)
+ if (IS_ERR(old_ops) || old_ops) {
+ if (IS_ERR(old_ops))
+ return PTR_RET(old_ops);
pr_warn("%s: frontswap_ops overridden\n", namestr);
+ }
}
if (ramster_enabled)
ramster_init(!disable_cleancache, !disable_frontswap,
- frontswap_has_exclusive_gets);
+ frontswap_has_exclusive_gets,
+ !disable_frontswap_selfshrink);
out:
return ret;
}
+#ifdef CONFIG_ZCACHE_MODULE
+#ifdef CONFIG_RAMSTER
+module_param(ramster_enabled, int, S_IRUGO);
+module_param(disable_frontswap_selfshrink, int, S_IRUGO);
+#endif
+module_param(disable_cleancache, int, S_IRUGO);
+module_param(disable_frontswap, int, S_IRUGO);
+#ifdef FRONTSWAP_HAS_EXCLUSIVE_GETS
+module_param(frontswap_has_exclusive_gets, bool, S_IRUGO);
+#endif
+module_param(disable_frontswap_ignore_nonactive, int, S_IRUGO);
+module_param(zcache_comp_name, charp, S_IRUGO);
+module_init(zcache_init);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>");
+MODULE_DESCRIPTION("In-kernel compression of cleancache/frontswap pages");
+#else
late_initcall(zcache_init);
+#endif
diff --git a/drivers/staging/zcache/zcache.h b/drivers/staging/zcache/zcache.h
index 81722b33b087..849120095e79 100644
--- a/drivers/staging/zcache/zcache.h
+++ b/drivers/staging/zcache/zcache.h
@@ -39,7 +39,7 @@ extern int zcache_flush_page(int, int, struct tmem_oid *, uint32_t);
extern int zcache_flush_object(int, int, struct tmem_oid *);
extern void zcache_decompress_to_page(char *, unsigned int, struct page *);
-#ifdef CONFIG_RAMSTER
+#if defined(CONFIG_RAMSTER) || defined(CONFIG_RAMSTER_MODULE)
extern void *zcache_pampd_create(char *, unsigned int, bool, int,
struct tmem_handle *);
int zcache_autocreate_pool(unsigned int cli_id, unsigned int pool_id, bool eph);
diff --git a/drivers/usb/gadget/amd5536udc.c b/drivers/usb/gadget/amd5536udc.c
index f52dcfe8f545..24bd363ca351 100644
--- a/drivers/usb/gadget/amd5536udc.c
+++ b/drivers/usb/gadget/amd5536udc.c
@@ -3099,7 +3099,7 @@ static int init_dma_pools(struct udc *dev)
}
/* DMA setup */
- dev->data_requests = dma_pool_create("data_requests", NULL,
+ dev->data_requests = dma_pool_create("data_requests", &dev->pdev->dev,
sizeof(struct udc_data_dma), 0, 0);
if (!dev->data_requests) {
DBG(dev, "can't get request data pool\n");
@@ -3111,7 +3111,7 @@ static int init_dma_pools(struct udc *dev)
dev->ep[UDC_EP0IN_IX].dma = &dev->regs->ctl;
/* dma desc for setup data */
- dev->stp_requests = dma_pool_create("setup requests", NULL,
+ dev->stp_requests = dma_pool_create("setup requests", &dev->pdev->dev,
sizeof(struct udc_stp_dma), 0, 0);
if (!dev->stp_requests) {
DBG(dev, "can't get stp request pool\n");
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index e2b2e9cf254a..5cc4e7eed4a9 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -24,6 +24,8 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/poll.h>
+#include <linux/mmu_context.h>
+#include <linux/aio.h>
#include <linux/device.h>
#include <linux/moduleparam.h>
@@ -513,6 +515,9 @@ static long ep_ioctl(struct file *fd, unsigned code, unsigned long value)
struct kiocb_priv {
struct usb_request *req;
struct ep_data *epdata;
+ struct kiocb *iocb;
+ struct mm_struct *mm;
+ struct work_struct work;
void *buf;
const struct iovec *iv;
unsigned long nr_segs;
@@ -528,7 +533,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
local_irq_disable();
epdata = priv->epdata;
// spin_lock(&epdata->dev->lock);
- kiocbSetCancelled(iocb);
if (likely(epdata && epdata->ep && priv->req))
value = usb_ep_dequeue (epdata->ep, priv->req);
else
@@ -540,15 +544,12 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e)
return value;
}
-static ssize_t ep_aio_read_retry(struct kiocb *iocb)
+static ssize_t ep_copy_to_user(struct kiocb_priv *priv)
{
- struct kiocb_priv *priv = iocb->private;
ssize_t len, total;
void *to_copy;
int i;
- /* we "retry" to get the right mm context for this: */
-
/* copy stuff into user buffers */
total = priv->actual;
len = 0;
@@ -568,9 +569,26 @@ static ssize_t ep_aio_read_retry(struct kiocb *iocb)
if (total == 0)
break;
}
+
+ return len;
+}
+
+static void ep_user_copy_worker(struct work_struct *work)
+{
+ struct kiocb_priv *priv = container_of(work, struct kiocb_priv, work);
+ struct mm_struct *mm = priv->mm;
+ struct kiocb *iocb = priv->iocb;
+ size_t ret;
+
+ use_mm(mm);
+ ret = ep_copy_to_user(priv);
+ unuse_mm(mm);
+
+ /* completing the iocb can drop the ctx and mm, don't touch mm after */
+ aio_complete(iocb, ret, ret);
+
kfree(priv->buf);
kfree(priv);
- return len;
}
static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
@@ -596,14 +614,14 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req)
aio_complete(iocb, req->actual ? req->actual : req->status,
req->status);
} else {
- /* retry() won't report both; so we hide some faults */
+ /* ep_copy_to_user() won't report both; we hide some faults */
if (unlikely(0 != req->status))
DBG(epdata->dev, "%s fault %d len %d\n",
ep->name, req->status, req->actual);
priv->buf = req->buf;
priv->actual = req->actual;
- kick_iocb(iocb);
+ schedule_work(&priv->work);
}
spin_unlock(&epdata->dev->lock);
@@ -633,8 +651,10 @@ fail:
return value;
}
iocb->private = priv;
+ priv->iocb = iocb;
priv->iv = iv;
priv->nr_segs = nr_segs;
+ INIT_WORK(&priv->work, ep_user_copy_worker);
value = get_ready_ep(iocb->ki_filp->f_flags, epdata);
if (unlikely(value < 0)) {
@@ -642,10 +662,11 @@ fail:
goto fail;
}
- iocb->ki_cancel = ep_aio_cancel;
+ kiocb_set_cancel_fn(iocb, ep_aio_cancel);
get_ep(epdata);
priv->epdata = epdata;
priv->actual = 0;
+ priv->mm = current->mm; /* mm teardown waits for iocbs in exit_aio() */
/* each kiocb is coupled to one usb_request, but we can't
* allocate or submit those if the host disconnected.
@@ -674,7 +695,7 @@ fail:
kfree(priv);
put_ep(epdata);
} else
- value = (iv ? -EIOCBRETRY : -EIOCBQUEUED);
+ value = -EIOCBQUEUED;
return value;
}
@@ -692,7 +713,6 @@ ep_aio_read(struct kiocb *iocb, const struct iovec *iov,
if (unlikely(!buf))
return -ENOMEM;
- iocb->ki_retry = ep_aio_read_retry;
return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs);
}
diff --git a/drivers/uwb/rsv.c b/drivers/uwb/rsv.c
index 0b0d8bce842e..f4ae05f78c42 100644
--- a/drivers/uwb/rsv.c
+++ b/drivers/uwb/rsv.c
@@ -231,7 +231,7 @@ void uwb_rsv_backoff_win_increment(struct uwb_rc *rc)
return;
bow->window <<= 1;
- bow->n = random32() & (bow->window - 1);
+ bow->n = prandom_u32() & (bow->window - 1);
dev_dbg(dev, "new_window=%d, n=%d\n: ", bow->window, bow->n);
/* reset the timer associated variables */
@@ -557,7 +557,7 @@ int uwb_rsv_establish(struct uwb_rsv *rsv)
if (ret)
goto out;
- rsv->tiebreaker = random32() & 1;
+ rsv->tiebreaker = prandom_u32() & 1;
/* get available mas bitmap */
uwb_drp_available(rc, &available);
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index e7718fdad1e1..0181a87dc4bc 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -2453,6 +2453,15 @@ config FB_PUV3_UNIGFX
Choose this option if you want to use the Unigfx device as a
framebuffer device. Without the support of PCI & AGP.
+config FB_HYPERV
+ tristate "Microsoft Hyper-V Synthetic Video support"
+ depends on FB && HYPERV
+ select FB_CFB_FILLRECT
+ select FB_CFB_COPYAREA
+ select FB_CFB_IMAGEBLIT
+ help
+ This framebuffer driver supports Microsoft Hyper-V Synthetic Video.
+
source "drivers/video/omap/Kconfig"
source "drivers/video/omap2/Kconfig"
source "drivers/video/exynos/Kconfig"
diff --git a/drivers/video/Makefile b/drivers/video/Makefile
index 9df387334cb7..97f7b6d6a0b2 100644
--- a/drivers/video/Makefile
+++ b/drivers/video/Makefile
@@ -149,6 +149,7 @@ obj-$(CONFIG_FB_MSM) += msm/
obj-$(CONFIG_FB_NUC900) += nuc900fb.o
obj-$(CONFIG_FB_JZ4740) += jz4740_fb.o
obj-$(CONFIG_FB_PUV3_UNIGFX) += fb-puv3.o
+obj-$(CONFIG_FB_HYPERV) += hyperv_fb.o
# Platform or fallback drivers go here
obj-$(CONFIG_FB_UVESA) += uvesafb.o
diff --git a/drivers/video/backlight/Kconfig b/drivers/video/backlight/Kconfig
index db10d0120d2b..2e166c3fc4c3 100644
--- a/drivers/video/backlight/Kconfig
+++ b/drivers/video/backlight/Kconfig
@@ -59,6 +59,13 @@ config LCD_LTV350QV
The LTV350QV panel is present on all ATSTK1000 boards.
+config LCD_ILI922X
+ tristate "ILI Technology ILI9221/ILI9222 support"
+ depends on SPI
+ help
+ If you have a panel based on the ILI9221/9222 controller
+ chip then say y to include a driver for it.
+
config LCD_ILI9320
tristate "ILI Technology ILI9320 controller support"
depends on SPI
@@ -161,7 +168,7 @@ if BACKLIGHT_CLASS_DEVICE
config BACKLIGHT_ATMEL_LCDC
bool "Atmel LCDC Contrast-as-Backlight control"
depends on FB_ATMEL
- default y if MACH_SAM9261EK || MACH_SAM9G10EK || MACH_SAM9263EK
+ default y if MACH_AT91SAM9261EK || MACH_AT91SAM9G10EK || MACH_AT91SAM9263EK
help
This provides a backlight control internal to the Atmel LCDC
driver. If the LCD "contrast control" on your board is wired
diff --git a/drivers/video/backlight/Makefile b/drivers/video/backlight/Makefile
index 96c4d620c5ce..92711fe60464 100644
--- a/drivers/video/backlight/Makefile
+++ b/drivers/video/backlight/Makefile
@@ -5,6 +5,7 @@ obj-$(CONFIG_LCD_CLASS_DEVICE) += lcd.o
obj-$(CONFIG_LCD_CORGI) += corgi_lcd.o
obj-$(CONFIG_LCD_HP700) += jornada720_lcd.o
obj-$(CONFIG_LCD_HX8357) += hx8357.o
+obj-$(CONFIG_LCD_ILI922X) += ili922x.o
obj-$(CONFIG_LCD_ILI9320) += ili9320.o
obj-$(CONFIG_LCD_L4F00242T03) += l4f00242t03.o
obj-$(CONFIG_LCD_LD9040) += ld9040.o
diff --git a/drivers/video/backlight/adp5520_bl.c b/drivers/video/backlight/adp5520_bl.c
index a1e41d4faa71..c84701b7ca6e 100644
--- a/drivers/video/backlight/adp5520_bl.c
+++ b/drivers/video/backlight/adp5520_bl.c
@@ -143,13 +143,16 @@ static int adp5520_bl_setup(struct backlight_device *bl)
static ssize_t adp5520_show(struct device *dev, char *buf, int reg)
{
struct adp5520_bl *data = dev_get_drvdata(dev);
- int error;
+ int ret;
uint8_t reg_val;
mutex_lock(&data->lock);
- error = adp5520_read(data->master, reg, &reg_val);
+ ret = adp5520_read(data->master, reg, &reg_val);
mutex_unlock(&data->lock);
+ if (ret < 0)
+ return ret;
+
return sprintf(buf, "%u\n", reg_val);
}
@@ -349,35 +352,34 @@ static int adp5520_bl_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-static int adp5520_bl_suspend(struct platform_device *pdev,
- pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int adp5520_bl_suspend(struct device *dev)
{
- struct backlight_device *bl = platform_get_drvdata(pdev);
+ struct backlight_device *bl = dev_get_drvdata(dev);
+
return adp5520_bl_set(bl, 0);
}
-static int adp5520_bl_resume(struct platform_device *pdev)
+static int adp5520_bl_resume(struct device *dev)
{
- struct backlight_device *bl = platform_get_drvdata(pdev);
+ struct backlight_device *bl = dev_get_drvdata(dev);
backlight_update_status(bl);
return 0;
}
-#else
-#define adp5520_bl_suspend NULL
-#define adp5520_bl_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(adp5520_bl_pm_ops, adp5520_bl_suspend,
+ adp5520_bl_resume);
+
static struct platform_driver adp5520_bl_driver = {
.driver = {
.name = "adp5520-backlight",
.owner = THIS_MODULE,
+ .pm = &adp5520_bl_pm_ops,
},
.probe = adp5520_bl_probe,
.remove = adp5520_bl_remove,
- .suspend = adp5520_bl_suspend,
- .resume = adp5520_bl_resume,
};
module_platform_driver(adp5520_bl_driver);
diff --git a/drivers/video/backlight/adp8860_bl.c b/drivers/video/backlight/adp8860_bl.c
index a77c9cad3320..6bb7f3644b3d 100644
--- a/drivers/video/backlight/adp8860_bl.c
+++ b/drivers/video/backlight/adp8860_bl.c
@@ -773,25 +773,29 @@ static int adp8860_remove(struct i2c_client *client)
return 0;
}
-#ifdef CONFIG_PM
-static int adp8860_i2c_suspend(struct i2c_client *client, pm_message_t message)
+#ifdef CONFIG_PM_SLEEP
+static int adp8860_i2c_suspend(struct device *dev)
{
+ struct i2c_client *client = to_i2c_client(dev);
+
adp8860_clr_bits(client, ADP8860_MDCR, NSTBY);
return 0;
}
-static int adp8860_i2c_resume(struct i2c_client *client)
+static int adp8860_i2c_resume(struct device *dev)
{
+ struct i2c_client *client = to_i2c_client(dev);
+
adp8860_set_bits(client, ADP8860_MDCR, NSTBY | BLEN);
return 0;
}
-#else
-#define adp8860_i2c_suspend NULL
-#define adp8860_i2c_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(adp8860_i2c_pm_ops, adp8860_i2c_suspend,
+ adp8860_i2c_resume);
+
static const struct i2c_device_id adp8860_id[] = {
{ "adp8860", adp8860 },
{ "adp8861", adp8861 },
@@ -802,12 +806,11 @@ MODULE_DEVICE_TABLE(i2c, adp8860_id);
static struct i2c_driver adp8860_driver = {
.driver = {
- .name = KBUILD_MODNAME,
+ .name = KBUILD_MODNAME,
+ .pm = &adp8860_i2c_pm_ops,
},
.probe = adp8860_probe,
.remove = adp8860_remove,
- .suspend = adp8860_i2c_suspend,
- .resume = adp8860_i2c_resume,
.id_table = adp8860_id,
};
diff --git a/drivers/video/backlight/adp8870_bl.c b/drivers/video/backlight/adp8870_bl.c
index 712c25a0d8fe..302c8009a7dc 100644
--- a/drivers/video/backlight/adp8870_bl.c
+++ b/drivers/video/backlight/adp8870_bl.c
@@ -895,13 +895,13 @@ static int adp8870_probe(struct i2c_client *client,
data->bl = bl;
- if (pdata->en_ambl_sens)
+ if (pdata->en_ambl_sens) {
ret = sysfs_create_group(&bl->dev.kobj,
&adp8870_bl_attr_group);
-
- if (ret) {
- dev_err(&client->dev, "failed to register sysfs\n");
- goto out1;
+ if (ret) {
+ dev_err(&client->dev, "failed to register sysfs\n");
+ goto out1;
+ }
}
ret = adp8870_bl_setup(bl);
@@ -947,25 +947,29 @@ static int adp8870_remove(struct i2c_client *client)
return 0;
}
-#ifdef CONFIG_PM
-static int adp8870_i2c_suspend(struct i2c_client *client, pm_message_t message)
+#ifdef CONFIG_PM_SLEEP
+static int adp8870_i2c_suspend(struct device *dev)
{
+ struct i2c_client *client = to_i2c_client(dev);
+
adp8870_clr_bits(client, ADP8870_MDCR, NSTBY);
return 0;
}
-static int adp8870_i2c_resume(struct i2c_client *client)
+static int adp8870_i2c_resume(struct device *dev)
{
+ struct i2c_client *client = to_i2c_client(dev);
+
adp8870_set_bits(client, ADP8870_MDCR, NSTBY | BLEN);
return 0;
}
-#else
-#define adp8870_i2c_suspend NULL
-#define adp8870_i2c_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(adp8870_i2c_pm_ops, adp8870_i2c_suspend,
+ adp8870_i2c_resume);
+
static const struct i2c_device_id adp8870_id[] = {
{ "adp8870", 0 },
{ }
@@ -974,12 +978,11 @@ MODULE_DEVICE_TABLE(i2c, adp8870_id);
static struct i2c_driver adp8870_driver = {
.driver = {
- .name = KBUILD_MODNAME,
+ .name = KBUILD_MODNAME,
+ .pm = &adp8870_i2c_pm_ops,
},
.probe = adp8870_probe,
.remove = adp8870_remove,
- .suspend = adp8870_i2c_suspend,
- .resume = adp8870_i2c_resume,
.id_table = adp8870_id,
};
diff --git a/drivers/video/backlight/ams369fg06.c b/drivers/video/backlight/ams369fg06.c
index c02aa2c2575a..319fef6cb422 100644
--- a/drivers/video/backlight/ams369fg06.c
+++ b/drivers/video/backlight/ams369fg06.c
@@ -533,12 +533,12 @@ static int ams369fg06_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_PM)
-static int ams369fg06_suspend(struct spi_device *spi, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int ams369fg06_suspend(struct device *dev)
{
- struct ams369fg06 *lcd = spi_get_drvdata(spi);
+ struct ams369fg06 *lcd = dev_get_drvdata(dev);
- dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power);
+ dev_dbg(dev, "lcd->power = %d\n", lcd->power);
/*
* when lcd panel is suspend, lcd panel becomes off
@@ -547,19 +547,19 @@ static int ams369fg06_suspend(struct spi_device *spi, pm_message_t mesg)
return ams369fg06_power(lcd, FB_BLANK_POWERDOWN);
}
-static int ams369fg06_resume(struct spi_device *spi)
+static int ams369fg06_resume(struct device *dev)
{
- struct ams369fg06 *lcd = spi_get_drvdata(spi);
+ struct ams369fg06 *lcd = dev_get_drvdata(dev);
lcd->power = FB_BLANK_POWERDOWN;
return ams369fg06_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define ams369fg06_suspend NULL
-#define ams369fg06_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(ams369fg06_pm_ops, ams369fg06_suspend,
+ ams369fg06_resume);
+
static void ams369fg06_shutdown(struct spi_device *spi)
{
struct ams369fg06 *lcd = spi_get_drvdata(spi);
@@ -571,12 +571,11 @@ static struct spi_driver ams369fg06_driver = {
.driver = {
.name = "ams369fg06",
.owner = THIS_MODULE,
+ .pm = &ams369fg06_pm_ops,
},
.probe = ams369fg06_probe,
.remove = ams369fg06_remove,
.shutdown = ams369fg06_shutdown,
- .suspend = ams369fg06_suspend,
- .resume = ams369fg06_resume,
};
module_spi_driver(ams369fg06_driver);
diff --git a/drivers/video/backlight/atmel-pwm-bl.c b/drivers/video/backlight/atmel-pwm-bl.c
index de5e5e74e2a7..a60d6afca97c 100644
--- a/drivers/video/backlight/atmel-pwm-bl.c
+++ b/drivers/video/backlight/atmel-pwm-bl.c
@@ -118,7 +118,7 @@ static const struct backlight_ops atmel_pwm_bl_ops = {
.update_status = atmel_pwm_bl_set_intensity,
};
-static int atmel_pwm_bl_probe(struct platform_device *pdev)
+static int __init atmel_pwm_bl_probe(struct platform_device *pdev)
{
struct backlight_properties props;
const struct atmel_pwm_bl_platform_data *pdata;
@@ -225,17 +225,7 @@ static struct platform_driver atmel_pwm_bl_driver = {
.remove = __exit_p(atmel_pwm_bl_remove),
};
-static int __init atmel_pwm_bl_init(void)
-{
- return platform_driver_probe(&atmel_pwm_bl_driver, atmel_pwm_bl_probe);
-}
-module_init(atmel_pwm_bl_init);
-
-static void __exit atmel_pwm_bl_exit(void)
-{
- platform_driver_unregister(&atmel_pwm_bl_driver);
-}
-module_exit(atmel_pwm_bl_exit);
+module_platform_driver_probe(atmel_pwm_bl_driver, atmel_pwm_bl_probe);
MODULE_AUTHOR("Hans-Christian egtvedt <hans-christian.egtvedt@atmel.com>");
MODULE_DESCRIPTION("Atmel PWM backlight driver");
diff --git a/drivers/video/backlight/corgi_lcd.c b/drivers/video/backlight/corgi_lcd.c
index aa782f302983..c97867a717a7 100644
--- a/drivers/video/backlight/corgi_lcd.c
+++ b/drivers/video/backlight/corgi_lcd.c
@@ -457,10 +457,10 @@ static const struct backlight_ops corgi_bl_ops = {
.update_status = corgi_bl_update_status,
};
-#ifdef CONFIG_PM
-static int corgi_lcd_suspend(struct spi_device *spi, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int corgi_lcd_suspend(struct device *dev)
{
- struct corgi_lcd *lcd = spi_get_drvdata(spi);
+ struct corgi_lcd *lcd = dev_get_drvdata(dev);
corgibl_flags |= CORGIBL_SUSPENDED;
corgi_bl_set_intensity(lcd, 0);
@@ -468,20 +468,19 @@ static int corgi_lcd_suspend(struct spi_device *spi, pm_message_t state)
return 0;
}
-static int corgi_lcd_resume(struct spi_device *spi)
+static int corgi_lcd_resume(struct device *dev)
{
- struct corgi_lcd *lcd = spi_get_drvdata(spi);
+ struct corgi_lcd *lcd = dev_get_drvdata(dev);
corgibl_flags &= ~CORGIBL_SUSPENDED;
corgi_lcd_set_power(lcd->lcd_dev, FB_BLANK_UNBLANK);
backlight_update_status(lcd->bl_dev);
return 0;
}
-#else
-#define corgi_lcd_suspend NULL
-#define corgi_lcd_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(corgi_lcd_pm_ops, corgi_lcd_suspend, corgi_lcd_resume);
+
static int setup_gpio_backlight(struct corgi_lcd *lcd,
struct corgi_lcd_platform_data *pdata)
{
@@ -611,11 +610,10 @@ static struct spi_driver corgi_lcd_driver = {
.driver = {
.name = "corgi-lcd",
.owner = THIS_MODULE,
+ .pm = &corgi_lcd_pm_ops,
},
.probe = corgi_lcd_probe,
.remove = corgi_lcd_remove,
- .suspend = corgi_lcd_suspend,
- .resume = corgi_lcd_resume,
};
module_spi_driver(corgi_lcd_driver);
diff --git a/drivers/video/backlight/da903x_bl.c b/drivers/video/backlight/da903x_bl.c
index 8179cef0730f..67cadd30e273 100644
--- a/drivers/video/backlight/da903x_bl.c
+++ b/drivers/video/backlight/da903x_bl.c
@@ -88,16 +88,21 @@ static int da903x_backlight_update_status(struct backlight_device *bl)
if (bl->props.fb_blank != FB_BLANK_UNBLANK)
brightness = 0;
+ if (bl->props.state & BL_CORE_SUSPENDED)
+ brightness = 0;
+
return da903x_backlight_set(bl, brightness);
}
static int da903x_backlight_get_brightness(struct backlight_device *bl)
{
struct da903x_backlight_data *data = bl_get_data(bl);
+
return data->current_brightness;
}
static const struct backlight_ops da903x_backlight_ops = {
+ .options = BL_CORE_SUSPENDRESUME,
.update_status = da903x_backlight_update_status,
.get_brightness = da903x_backlight_get_brightness,
};
@@ -161,35 +166,10 @@ static int da903x_backlight_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-static int da903x_backlight_suspend(struct device *dev)
-{
- struct backlight_device *bl = dev_get_drvdata(dev);
-
- return da903x_backlight_set(bl, 0);
-}
-
-static int da903x_backlight_resume(struct device *dev)
-{
- struct backlight_device *bl = dev_get_drvdata(dev);
-
- backlight_update_status(bl);
- return 0;
-}
-
-static const struct dev_pm_ops da903x_backlight_pm_ops = {
- .suspend = da903x_backlight_suspend,
- .resume = da903x_backlight_resume,
-};
-#endif
-
static struct platform_driver da903x_backlight_driver = {
.driver = {
.name = "da903x-backlight",
.owner = THIS_MODULE,
-#ifdef CONFIG_PM
- .pm = &da903x_backlight_pm_ops,
-#endif
},
.probe = da903x_backlight_probe,
.remove = da903x_backlight_remove,
diff --git a/drivers/video/backlight/ep93xx_bl.c b/drivers/video/backlight/ep93xx_bl.c
index ef3e21e8f825..33455821dd31 100644
--- a/drivers/video/backlight/ep93xx_bl.c
+++ b/drivers/video/backlight/ep93xx_bl.c
@@ -60,7 +60,7 @@ static const struct backlight_ops ep93xxbl_ops = {
.get_brightness = ep93xxbl_get_brightness,
};
-static int __init ep93xxbl_probe(struct platform_device *dev)
+static int ep93xxbl_probe(struct platform_device *dev)
{
struct ep93xxbl *ep93xxbl;
struct backlight_device *bl;
@@ -115,35 +115,33 @@ static int ep93xxbl_remove(struct platform_device *dev)
return 0;
}
-#ifdef CONFIG_PM
-static int ep93xxbl_suspend(struct platform_device *dev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int ep93xxbl_suspend(struct device *dev)
{
- struct backlight_device *bl = platform_get_drvdata(dev);
+ struct backlight_device *bl = dev_get_drvdata(dev);
return ep93xxbl_set(bl, 0);
}
-static int ep93xxbl_resume(struct platform_device *dev)
+static int ep93xxbl_resume(struct device *dev)
{
- struct backlight_device *bl = platform_get_drvdata(dev);
+ struct backlight_device *bl = dev_get_drvdata(dev);
backlight_update_status(bl);
return 0;
}
-#else
-#define ep93xxbl_suspend NULL
-#define ep93xxbl_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(ep93xxbl_pm_ops, ep93xxbl_suspend, ep93xxbl_resume);
+
static struct platform_driver ep93xxbl_driver = {
.driver = {
.name = "ep93xx-bl",
.owner = THIS_MODULE,
+ .pm = &ep93xxbl_pm_ops,
},
.probe = ep93xxbl_probe,
.remove = ep93xxbl_remove,
- .suspend = ep93xxbl_suspend,
- .resume = ep93xxbl_resume,
};
module_platform_driver(ep93xxbl_driver);
diff --git a/drivers/video/backlight/generic_bl.c b/drivers/video/backlight/generic_bl.c
index 0ae155be9c89..19e393b41438 100644
--- a/drivers/video/backlight/generic_bl.c
+++ b/drivers/video/backlight/generic_bl.c
@@ -9,8 +9,6 @@
*
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -108,7 +106,7 @@ static int genericbl_probe(struct platform_device *pdev)
generic_backlight_device = bd;
- pr_info("Generic Backlight Driver Initialized.\n");
+ dev_info(&pdev->dev, "Generic Backlight Driver Initialized.\n");
return 0;
}
@@ -122,7 +120,7 @@ static int genericbl_remove(struct platform_device *pdev)
backlight_device_unregister(bd);
- pr_info("Generic Backlight Driver Unloaded\n");
+ dev_info(&pdev->dev, "Generic Backlight Driver Unloaded\n");
return 0;
}
diff --git a/drivers/video/backlight/hp680_bl.c b/drivers/video/backlight/hp680_bl.c
index 5cefd73526f8..00076ecfe9b8 100644
--- a/drivers/video/backlight/hp680_bl.c
+++ b/drivers/video/backlight/hp680_bl.c
@@ -64,29 +64,28 @@ static void hp680bl_send_intensity(struct backlight_device *bd)
}
-#ifdef CONFIG_PM
-static int hp680bl_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int hp680bl_suspend(struct device *dev)
{
- struct backlight_device *bd = platform_get_drvdata(pdev);
+ struct backlight_device *bd = dev_get_drvdata(dev);
hp680bl_suspended = 1;
hp680bl_send_intensity(bd);
return 0;
}
-static int hp680bl_resume(struct platform_device *pdev)
+static int hp680bl_resume(struct device *dev)
{
- struct backlight_device *bd = platform_get_drvdata(pdev);
+ struct backlight_device *bd = dev_get_drvdata(dev);
hp680bl_suspended = 0;
hp680bl_send_intensity(bd);
return 0;
}
-#else
-#define hp680bl_suspend NULL
-#define hp680bl_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(hp680bl_pm_ops, hp680bl_suspend, hp680bl_resume);
+
static int hp680bl_set_intensity(struct backlight_device *bd)
{
hp680bl_send_intensity(bd);
@@ -140,10 +139,9 @@ static int hp680bl_remove(struct platform_device *pdev)
static struct platform_driver hp680bl_driver = {
.probe = hp680bl_probe,
.remove = hp680bl_remove,
- .suspend = hp680bl_suspend,
- .resume = hp680bl_resume,
.driver = {
.name = "hp680-bl",
+ .pm = &hp680bl_pm_ops,
},
};
diff --git a/drivers/video/backlight/ili922x.c b/drivers/video/backlight/ili922x.c
new file mode 100644
index 000000000000..8b8465ed0014
--- /dev/null
+++ b/drivers/video/backlight/ili922x.c
@@ -0,0 +1,555 @@
+/*
+ * (C) Copyright 2008
+ * Stefano Babic, DENX Software Engineering, sbabic@denx.de.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This driver implements a lcd device for the ILITEK 922x display
+ * controller. The interface to the display is SPI and the display's
+ * memory is cyclically updated over the RGB interface.
+ */
+
+#include <linux/fb.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/lcd.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/slab.h>
+#include <linux/spi/spi.h>
+#include <linux/string.h>
+
+/* Register offset, see manual section 8.2 */
+#define REG_START_OSCILLATION 0x00
+#define REG_DRIVER_CODE_READ 0x00
+#define REG_DRIVER_OUTPUT_CONTROL 0x01
+#define REG_LCD_AC_DRIVEING_CONTROL 0x02
+#define REG_ENTRY_MODE 0x03
+#define REG_COMPARE_1 0x04
+#define REG_COMPARE_2 0x05
+#define REG_DISPLAY_CONTROL_1 0x07
+#define REG_DISPLAY_CONTROL_2 0x08
+#define REG_DISPLAY_CONTROL_3 0x09
+#define REG_FRAME_CYCLE_CONTROL 0x0B
+#define REG_EXT_INTF_CONTROL 0x0C
+#define REG_POWER_CONTROL_1 0x10
+#define REG_POWER_CONTROL_2 0x11
+#define REG_POWER_CONTROL_3 0x12
+#define REG_POWER_CONTROL_4 0x13
+#define REG_RAM_ADDRESS_SET 0x21
+#define REG_WRITE_DATA_TO_GRAM 0x22
+#define REG_RAM_WRITE_MASK1 0x23
+#define REG_RAM_WRITE_MASK2 0x24
+#define REG_GAMMA_CONTROL_1 0x30
+#define REG_GAMMA_CONTROL_2 0x31
+#define REG_GAMMA_CONTROL_3 0x32
+#define REG_GAMMA_CONTROL_4 0x33
+#define REG_GAMMA_CONTROL_5 0x34
+#define REG_GAMMA_CONTROL_6 0x35
+#define REG_GAMMA_CONTROL_7 0x36
+#define REG_GAMMA_CONTROL_8 0x37
+#define REG_GAMMA_CONTROL_9 0x38
+#define REG_GAMMA_CONTROL_10 0x39
+#define REG_GATE_SCAN_CONTROL 0x40
+#define REG_VERT_SCROLL_CONTROL 0x41
+#define REG_FIRST_SCREEN_DRIVE_POS 0x42
+#define REG_SECOND_SCREEN_DRIVE_POS 0x43
+#define REG_RAM_ADDR_POS_H 0x44
+#define REG_RAM_ADDR_POS_V 0x45
+#define REG_OSCILLATOR_CONTROL 0x4F
+#define REG_GPIO 0x60
+#define REG_OTP_VCM_PROGRAMMING 0x61
+#define REG_OTP_VCM_STATUS_ENABLE 0x62
+#define REG_OTP_PROGRAMMING_ID_KEY 0x65
+
+/*
+ * maximum frequency for register access
+ * (not for the GRAM access)
+ */
+#define ILITEK_MAX_FREQ_REG 4000000
+
+/*
+ * Device ID as found in the datasheet (supports 9221 and 9222)
+ */
+#define ILITEK_DEVICE_ID 0x9220
+#define ILITEK_DEVICE_ID_MASK 0xFFF0
+
+/* Last two bits in the START BYTE */
+#define START_RS_INDEX 0
+#define START_RS_REG 1
+#define START_RW_WRITE 0
+#define START_RW_READ 1
+
+/**
+ * START_BYTE(id, rs, rw)
+ *
+ * Set the start byte according to the required operation.
+ * The start byte is defined as:
+ * ----------------------------------
+ * | 0 | 1 | 1 | 1 | 0 | ID | RS | RW |
+ * ----------------------------------
+ * @id: display's id as set by the manufacturer
+ * @rs: operation type bit, one of:
+ * - START_RS_INDEX set the index register
+ * - START_RS_REG write/read registers/GRAM
+ * @rw: read/write operation
+ * - START_RW_WRITE write
+ * - START_RW_READ read
+ */
+#define START_BYTE(id, rs, rw) \
+ (0x70 | (((id) & 0x01) << 2) | (((rs) & 0x01) << 1) | ((rw) & 0x01))
+
+/**
+ * CHECK_FREQ_REG(spi_device s, spi_transfer x) - Check the frequency
+ * for the SPI transfer. According to the datasheet, the controller
+ * accept higher frequency for the GRAM transfer, but it requires
+ * lower frequency when the registers are read/written.
+ * The macro sets the frequency in the spi_transfer structure if
+ * the frequency exceeds the maximum value.
+ */
+#define CHECK_FREQ_REG(s, x) \
+ do { \
+ if (s->max_speed_hz > ILITEK_MAX_FREQ_REG) \
+ ((struct spi_transfer *)x)->speed_hz = \
+ ILITEK_MAX_FREQ_REG; \
+ } while (0)
+
+#define CMD_BUFSIZE 16
+
+#define POWER_IS_ON(pwr) ((pwr) <= FB_BLANK_NORMAL)
+
+#define set_tx_byte(b) (tx_invert ? ~(b) : b)
+
+/**
+ * ili922x_id - id as set by manufacturer
+ */
+static int ili922x_id = 1;
+module_param(ili922x_id, int, 0);
+
+static int tx_invert;
+module_param(tx_invert, int, 0);
+
+/**
+ * driver's private structure
+ */
+struct ili922x {
+ struct spi_device *spi;
+ struct lcd_device *ld;
+ int power;
+};
+
+/**
+ * ili922x_read_status - read status register from display
+ * @spi: spi device
+ * @rs: output value
+ */
+static int ili922x_read_status(struct spi_device *spi, u16 *rs)
+{
+ struct spi_message msg;
+ struct spi_transfer xfer;
+ unsigned char tbuf[CMD_BUFSIZE];
+ unsigned char rbuf[CMD_BUFSIZE];
+ int ret, i;
+
+ memset(&xfer, 0, sizeof(struct spi_transfer));
+ spi_message_init(&msg);
+ xfer.tx_buf = tbuf;
+ xfer.rx_buf = rbuf;
+ xfer.cs_change = 1;
+ CHECK_FREQ_REG(spi, &xfer);
+
+ tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX,
+ START_RW_READ));
+ /*
+ * we need 4-byte xfer here due to invalid dummy byte
+ * received after start byte
+ */
+ for (i = 1; i < 4; i++)
+ tbuf[i] = set_tx_byte(0); /* dummy */
+
+ xfer.bits_per_word = 8;
+ xfer.len = 4;
+ spi_message_add_tail(&xfer, &msg);
+ ret = spi_sync(spi, &msg);
+ if (ret < 0) {
+ dev_dbg(&spi->dev, "Error sending SPI message 0x%x", ret);
+ return ret;
+ }
+
+ *rs = (rbuf[2] << 8) + rbuf[3];
+ return 0;
+}
+
+/**
+ * ili922x_read - read register from display
+ * @spi: spi device
+ * @reg: offset of the register to be read
+ * @rx: output value
+ */
+static int ili922x_read(struct spi_device *spi, u8 reg, u16 *rx)
+{
+ struct spi_message msg;
+ struct spi_transfer xfer_regindex, xfer_regvalue;
+ unsigned char tbuf[CMD_BUFSIZE];
+ unsigned char rbuf[CMD_BUFSIZE];
+ int ret, len = 0, send_bytes;
+
+ memset(&xfer_regindex, 0, sizeof(struct spi_transfer));
+ memset(&xfer_regvalue, 0, sizeof(struct spi_transfer));
+ spi_message_init(&msg);
+ xfer_regindex.tx_buf = tbuf;
+ xfer_regindex.rx_buf = rbuf;
+ xfer_regindex.cs_change = 1;
+ CHECK_FREQ_REG(spi, &xfer_regindex);
+
+ tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX,
+ START_RW_WRITE));
+ tbuf[1] = set_tx_byte(0);
+ tbuf[2] = set_tx_byte(reg);
+ xfer_regindex.bits_per_word = 8;
+ len = xfer_regindex.len = 3;
+ spi_message_add_tail(&xfer_regindex, &msg);
+
+ send_bytes = len;
+
+ tbuf[len++] = set_tx_byte(START_BYTE(ili922x_id, START_RS_REG,
+ START_RW_READ));
+ tbuf[len++] = set_tx_byte(0);
+ tbuf[len] = set_tx_byte(0);
+
+ xfer_regvalue.cs_change = 1;
+ xfer_regvalue.len = 3;
+ xfer_regvalue.tx_buf = &tbuf[send_bytes];
+ xfer_regvalue.rx_buf = &rbuf[send_bytes];
+ CHECK_FREQ_REG(spi, &xfer_regvalue);
+
+ spi_message_add_tail(&xfer_regvalue, &msg);
+ ret = spi_sync(spi, &msg);
+ if (ret < 0) {
+ dev_dbg(&spi->dev, "Error sending SPI message 0x%x", ret);
+ return ret;
+ }
+
+ *rx = (rbuf[1 + send_bytes] << 8) + rbuf[2 + send_bytes];
+ return 0;
+}
+
+/**
+ * ili922x_write - write a controller register
+ * @spi: struct spi_device *
+ * @reg: offset of the register to be written
+ * @value: value to be written
+ */
+static int ili922x_write(struct spi_device *spi, u8 reg, u16 value)
+{
+ struct spi_message msg;
+ struct spi_transfer xfer_regindex, xfer_regvalue;
+ unsigned char tbuf[CMD_BUFSIZE];
+ unsigned char rbuf[CMD_BUFSIZE];
+ int ret, len = 0;
+
+ memset(&xfer_regindex, 0, sizeof(struct spi_transfer));
+ memset(&xfer_regvalue, 0, sizeof(struct spi_transfer));
+
+ spi_message_init(&msg);
+ xfer_regindex.tx_buf = tbuf;
+ xfer_regindex.rx_buf = rbuf;
+ xfer_regindex.cs_change = 1;
+ CHECK_FREQ_REG(spi, &xfer_regindex);
+
+ tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_INDEX,
+ START_RW_WRITE));
+ tbuf[1] = set_tx_byte(0);
+ tbuf[2] = set_tx_byte(reg);
+ xfer_regindex.bits_per_word = 8;
+ xfer_regindex.len = 3;
+ spi_message_add_tail(&xfer_regindex, &msg);
+
+ ret = spi_sync(spi, &msg);
+
+ spi_message_init(&msg);
+ len = 0;
+ tbuf[0] = set_tx_byte(START_BYTE(ili922x_id, START_RS_REG,
+ START_RW_WRITE));
+ tbuf[1] = set_tx_byte((value & 0xFF00) >> 8);
+ tbuf[2] = set_tx_byte(value & 0x00FF);
+
+ xfer_regvalue.cs_change = 1;
+ xfer_regvalue.len = 3;
+ xfer_regvalue.tx_buf = tbuf;
+ xfer_regvalue.rx_buf = rbuf;
+ CHECK_FREQ_REG(spi, &xfer_regvalue);
+
+ spi_message_add_tail(&xfer_regvalue, &msg);
+
+ ret = spi_sync(spi, &msg);
+ if (ret < 0) {
+ dev_err(&spi->dev, "Error sending SPI message 0x%x", ret);
+ return ret;
+ }
+ return 0;
+}
+
+#ifdef DEBUG
+/**
+ * ili922x_reg_dump - dump all registers
+ */
+static void ili922x_reg_dump(struct spi_device *spi)
+{
+ u8 reg;
+ u16 rx;
+
+ dev_dbg(&spi->dev, "ILI922x configuration registers:\n");
+ for (reg = REG_START_OSCILLATION;
+ reg <= REG_OTP_PROGRAMMING_ID_KEY; reg++) {
+ ili922x_read(spi, reg, &rx);
+ dev_dbg(&spi->dev, "reg @ 0x%02X: 0x%04X\n", reg, rx);
+ }
+}
+#else
+static inline void ili922x_reg_dump(struct spi_device *spi) {}
+#endif
+
+/**
+ * set_write_to_gram_reg - initialize the display to write the GRAM
+ * @spi: spi device
+ */
+static void set_write_to_gram_reg(struct spi_device *spi)
+{
+ struct spi_message msg;
+ struct spi_transfer xfer;
+ unsigned char tbuf[CMD_BUFSIZE];
+
+ memset(&xfer, 0, sizeof(struct spi_transfer));
+
+ spi_message_init(&msg);
+ xfer.tx_buf = tbuf;
+ xfer.rx_buf = NULL;
+ xfer.cs_change = 1;
+
+ tbuf[0] = START_BYTE(ili922x_id, START_RS_INDEX, START_RW_WRITE);
+ tbuf[1] = 0;
+ tbuf[2] = REG_WRITE_DATA_TO_GRAM;
+
+ xfer.bits_per_word = 8;
+ xfer.len = 3;
+ spi_message_add_tail(&xfer, &msg);
+ spi_sync(spi, &msg);
+}
+
+/**
+ * ili922x_poweron - turn the display on
+ * @spi: spi device
+ *
+ * The sequence to turn on the display is taken from
+ * the datasheet and/or the example code provided by the
+ * manufacturer.
+ */
+static int ili922x_poweron(struct spi_device *spi)
+{
+ int ret;
+
+ /* Power on */
+ ret = ili922x_write(spi, REG_POWER_CONTROL_1, 0x0000);
+ usleep_range(10000, 10500);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0000);
+ msleep(40);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x0000);
+ msleep(40);
+ /* register 0x56 is not documented in the datasheet */
+ ret += ili922x_write(spi, 0x56, 0x080F);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_1, 0x4240);
+ usleep_range(10000, 10500);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0014);
+ msleep(40);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x1319);
+ msleep(40);
+
+ return ret;
+}
+
+/**
+ * ili922x_poweroff - turn the display off
+ * @spi: spi device
+ */
+static int ili922x_poweroff(struct spi_device *spi)
+{
+ int ret;
+
+ /* Power off */
+ ret = ili922x_write(spi, REG_POWER_CONTROL_1, 0x0000);
+ usleep_range(10000, 10500);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_2, 0x0000);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_3, 0x0000);
+ msleep(40);
+ ret += ili922x_write(spi, REG_POWER_CONTROL_4, 0x0000);
+ msleep(40);
+
+ return ret;
+}
+
+/**
+ * ili922x_display_init - initialize the display by setting
+ * the configuration registers
+ * @spi: spi device
+ */
+static void ili922x_display_init(struct spi_device *spi)
+{
+ ili922x_write(spi, REG_START_OSCILLATION, 1);
+ usleep_range(10000, 10500);
+ ili922x_write(spi, REG_DRIVER_OUTPUT_CONTROL, 0x691B);
+ ili922x_write(spi, REG_LCD_AC_DRIVEING_CONTROL, 0x0700);
+ ili922x_write(spi, REG_ENTRY_MODE, 0x1030);
+ ili922x_write(spi, REG_COMPARE_1, 0x0000);
+ ili922x_write(spi, REG_COMPARE_2, 0x0000);
+ ili922x_write(spi, REG_DISPLAY_CONTROL_1, 0x0037);
+ ili922x_write(spi, REG_DISPLAY_CONTROL_2, 0x0202);
+ ili922x_write(spi, REG_DISPLAY_CONTROL_3, 0x0000);
+ ili922x_write(spi, REG_FRAME_CYCLE_CONTROL, 0x0000);
+
+ /* Set RGB interface */
+ ili922x_write(spi, REG_EXT_INTF_CONTROL, 0x0110);
+
+ ili922x_poweron(spi);
+
+ ili922x_write(spi, REG_GAMMA_CONTROL_1, 0x0302);
+ ili922x_write(spi, REG_GAMMA_CONTROL_2, 0x0407);
+ ili922x_write(spi, REG_GAMMA_CONTROL_3, 0x0304);
+ ili922x_write(spi, REG_GAMMA_CONTROL_4, 0x0203);
+ ili922x_write(spi, REG_GAMMA_CONTROL_5, 0x0706);
+ ili922x_write(spi, REG_GAMMA_CONTROL_6, 0x0407);
+ ili922x_write(spi, REG_GAMMA_CONTROL_7, 0x0706);
+ ili922x_write(spi, REG_GAMMA_CONTROL_8, 0x0000);
+ ili922x_write(spi, REG_GAMMA_CONTROL_9, 0x0C06);
+ ili922x_write(spi, REG_GAMMA_CONTROL_10, 0x0F00);
+ ili922x_write(spi, REG_RAM_ADDRESS_SET, 0x0000);
+ ili922x_write(spi, REG_GATE_SCAN_CONTROL, 0x0000);
+ ili922x_write(spi, REG_VERT_SCROLL_CONTROL, 0x0000);
+ ili922x_write(spi, REG_FIRST_SCREEN_DRIVE_POS, 0xDB00);
+ ili922x_write(spi, REG_SECOND_SCREEN_DRIVE_POS, 0xDB00);
+ ili922x_write(spi, REG_RAM_ADDR_POS_H, 0xAF00);
+ ili922x_write(spi, REG_RAM_ADDR_POS_V, 0xDB00);
+ ili922x_reg_dump(spi);
+ set_write_to_gram_reg(spi);
+}
+
+static int ili922x_lcd_power(struct ili922x *lcd, int power)
+{
+ int ret = 0;
+
+ if (POWER_IS_ON(power) && !POWER_IS_ON(lcd->power))
+ ret = ili922x_poweron(lcd->spi);
+ else if (!POWER_IS_ON(power) && POWER_IS_ON(lcd->power))
+ ret = ili922x_poweroff(lcd->spi);
+
+ if (!ret)
+ lcd->power = power;
+
+ return ret;
+}
+
+static int ili922x_set_power(struct lcd_device *ld, int power)
+{
+ struct ili922x *ili = lcd_get_data(ld);
+
+ return ili922x_lcd_power(ili, power);
+}
+
+static int ili922x_get_power(struct lcd_device *ld)
+{
+ struct ili922x *ili = lcd_get_data(ld);
+
+ return ili->power;
+}
+
+static struct lcd_ops ili922x_ops = {
+ .get_power = ili922x_get_power,
+ .set_power = ili922x_set_power,
+};
+
+static int ili922x_probe(struct spi_device *spi)
+{
+ struct ili922x *ili;
+ struct lcd_device *lcd;
+ int ret;
+ u16 reg = 0;
+
+ ili = devm_kzalloc(&spi->dev, sizeof(*ili), GFP_KERNEL);
+ if (!ili) {
+ dev_err(&spi->dev, "cannot alloc priv data\n");
+ return -ENOMEM;
+ }
+
+ ili->spi = spi;
+ dev_set_drvdata(&spi->dev, ili);
+
+ /* check if the device is connected */
+ ret = ili922x_read(spi, REG_DRIVER_CODE_READ, &reg);
+ if (ret || ((reg & ILITEK_DEVICE_ID_MASK) != ILITEK_DEVICE_ID)) {
+ dev_err(&spi->dev,
+ "no LCD found: Chip ID 0x%x, ret %d\n",
+ reg, ret);
+ return -ENODEV;
+ } else {
+ dev_info(&spi->dev, "ILI%x found, SPI freq %d, mode %d\n",
+ reg, spi->max_speed_hz, spi->mode);
+ }
+
+ ret = ili922x_read_status(spi, &reg);
+ if (ret) {
+ dev_err(&spi->dev, "reading RS failed...\n");
+ return ret;
+ } else
+ dev_dbg(&spi->dev, "status: 0x%x\n", reg);
+
+ ili922x_display_init(spi);
+
+ ili->power = FB_BLANK_POWERDOWN;
+
+ lcd = lcd_device_register("ili922xlcd", &spi->dev, ili,
+ &ili922x_ops);
+ if (IS_ERR(lcd)) {
+ dev_err(&spi->dev, "cannot register LCD\n");
+ return PTR_ERR(lcd);
+ }
+
+ ili->ld = lcd;
+ spi_set_drvdata(spi, ili);
+
+ ili922x_lcd_power(ili, FB_BLANK_UNBLANK);
+
+ return 0;
+}
+
+static int ili922x_remove(struct spi_device *spi)
+{
+ struct ili922x *ili = spi_get_drvdata(spi);
+
+ ili922x_poweroff(spi);
+ lcd_device_unregister(ili->ld);
+ return 0;
+}
+
+static struct spi_driver ili922x_driver = {
+ .driver = {
+ .name = "ili922x",
+ .owner = THIS_MODULE,
+ },
+ .probe = ili922x_probe,
+ .remove = ili922x_remove,
+};
+
+module_spi_driver(ili922x_driver);
+
+MODULE_AUTHOR("Stefano Babic <sbabic@denx.de>");
+MODULE_DESCRIPTION("ILI9221/9222 LCD driver");
+MODULE_LICENSE("GPL");
+MODULE_PARM_DESC(ili922x_id, "set controller identifier (default=1)");
+MODULE_PARM_DESC(tx_invert, "invert bytes before sending");
diff --git a/drivers/video/backlight/jornada720_bl.c b/drivers/video/backlight/jornada720_bl.c
index fef6ce4fad71..3ccb89340f22 100644
--- a/drivers/video/backlight/jornada720_bl.c
+++ b/drivers/video/backlight/jornada720_bl.c
@@ -9,8 +9,6 @@
*
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/backlight.h>
#include <linux/device.h>
#include <linux/fb.h>
@@ -40,11 +38,13 @@ static int jornada_bl_get_brightness(struct backlight_device *bd)
ret = jornada_ssp_byte(GETBRIGHTNESS);
if (jornada_ssp_byte(GETBRIGHTNESS) != TXDUMMY) {
- pr_err("get brightness timeout\n");
+ dev_err(&bd->dev, "get brightness timeout\n");
jornada_ssp_end();
return -ETIMEDOUT;
- } else /* exchange txdummy for value */
+ } else {
+ /* exchange txdummy for value */
ret = jornada_ssp_byte(TXDUMMY);
+ }
jornada_ssp_end();
@@ -61,7 +61,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
if ((bd->props.power != FB_BLANK_UNBLANK) || (bd->props.fb_blank != FB_BLANK_UNBLANK)) {
ret = jornada_ssp_byte(BRIGHTNESSOFF);
if (ret != TXDUMMY) {
- pr_info("brightness off timeout\n");
+ dev_info(&bd->dev, "brightness off timeout\n");
/* turn off backlight */
PPSR &= ~PPC_LDD1;
PPDR |= PPC_LDD1;
@@ -72,7 +72,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
/* send command to our mcu */
if (jornada_ssp_byte(SETBRIGHTNESS) != TXDUMMY) {
- pr_info("failed to set brightness\n");
+ dev_info(&bd->dev, "failed to set brightness\n");
ret = -ETIMEDOUT;
goto out;
}
@@ -86,7 +86,7 @@ static int jornada_bl_update_status(struct backlight_device *bd)
*/
if (jornada_ssp_byte(BL_MAX_BRIGHT - bd->props.brightness)
!= TXDUMMY) {
- pr_err("set brightness failed\n");
+ dev_err(&bd->dev, "set brightness failed\n");
ret = -ETIMEDOUT;
}
@@ -120,7 +120,7 @@ static int jornada_bl_probe(struct platform_device *pdev)
if (IS_ERR(bd)) {
ret = PTR_ERR(bd);
- pr_err("failed to register device, err=%x\n", ret);
+ dev_err(&pdev->dev, "failed to register device, err=%x\n", ret);
return ret;
}
@@ -134,7 +134,7 @@ static int jornada_bl_probe(struct platform_device *pdev)
jornada_bl_update_status(bd);
platform_set_drvdata(pdev, bd);
- pr_info("HP Jornada 700 series backlight driver\n");
+ dev_info(&pdev->dev, "HP Jornada 700 series backlight driver\n");
return 0;
}
diff --git a/drivers/video/backlight/jornada720_lcd.c b/drivers/video/backlight/jornada720_lcd.c
index 635b30523fd5..b061413f1a65 100644
--- a/drivers/video/backlight/jornada720_lcd.c
+++ b/drivers/video/backlight/jornada720_lcd.c
@@ -9,8 +9,6 @@
*
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/device.h>
#include <linux/fb.h>
#include <linux/kernel.h>
@@ -27,7 +25,7 @@
#define LCD_MAX_CONTRAST 0xff
#define LCD_DEF_CONTRAST 0x80
-static int jornada_lcd_get_power(struct lcd_device *dev)
+static int jornada_lcd_get_power(struct lcd_device *ld)
{
/* LDD2 in PPC = LCD POWER */
if (PPSR & PPC_LDD2)
@@ -36,17 +34,17 @@ static int jornada_lcd_get_power(struct lcd_device *dev)
return FB_BLANK_POWERDOWN; /* PW OFF */
}
-static int jornada_lcd_get_contrast(struct lcd_device *dev)
+static int jornada_lcd_get_contrast(struct lcd_device *ld)
{
int ret;
- if (jornada_lcd_get_power(dev) != FB_BLANK_UNBLANK)
+ if (jornada_lcd_get_power(ld) != FB_BLANK_UNBLANK)
return 0;
jornada_ssp_start();
if (jornada_ssp_byte(GETCONTRAST) != TXDUMMY) {
- pr_err("get contrast failed\n");
+ dev_err(&ld->dev, "get contrast failed\n");
jornada_ssp_end();
return -ETIMEDOUT;
} else {
@@ -56,7 +54,7 @@ static int jornada_lcd_get_contrast(struct lcd_device *dev)
}
}
-static int jornada_lcd_set_contrast(struct lcd_device *dev, int value)
+static int jornada_lcd_set_contrast(struct lcd_device *ld, int value)
{
int ret;
@@ -67,7 +65,7 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value)
/* push the new value */
if (jornada_ssp_byte(value) != TXDUMMY) {
- pr_err("set contrast failed\n");
+ dev_err(&ld->dev, "set contrast failed\n");
jornada_ssp_end();
return -ETIMEDOUT;
}
@@ -78,13 +76,14 @@ static int jornada_lcd_set_contrast(struct lcd_device *dev, int value)
return 0;
}
-static int jornada_lcd_set_power(struct lcd_device *dev, int power)
+static int jornada_lcd_set_power(struct lcd_device *ld, int power)
{
if (power != FB_BLANK_UNBLANK) {
PPSR &= ~PPC_LDD2;
PPDR |= PPC_LDD2;
- } else
+ } else {
PPSR |= PPC_LDD2;
+ }
return 0;
}
@@ -105,7 +104,7 @@ static int jornada_lcd_probe(struct platform_device *pdev)
if (IS_ERR(lcd_device)) {
ret = PTR_ERR(lcd_device);
- pr_err("failed to register device\n");
+ dev_err(&pdev->dev, "failed to register device\n");
return ret;
}
diff --git a/drivers/video/backlight/kb3886_bl.c b/drivers/video/backlight/kb3886_bl.c
index 6c5ed6b242cc..bca6ccc74dfb 100644
--- a/drivers/video/backlight/kb3886_bl.c
+++ b/drivers/video/backlight/kb3886_bl.c
@@ -106,29 +106,28 @@ static int kb3886bl_send_intensity(struct backlight_device *bd)
return 0;
}
-#ifdef CONFIG_PM
-static int kb3886bl_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int kb3886bl_suspend(struct device *dev)
{
- struct backlight_device *bd = platform_get_drvdata(pdev);
+ struct backlight_device *bd = dev_get_drvdata(dev);
kb3886bl_flags |= KB3886BL_SUSPENDED;
backlight_update_status(bd);
return 0;
}
-static int kb3886bl_resume(struct platform_device *pdev)
+static int kb3886bl_resume(struct device *dev)
{
- struct backlight_device *bd = platform_get_drvdata(pdev);
+ struct backlight_device *bd = dev_get_drvdata(dev);
kb3886bl_flags &= ~KB3886BL_SUSPENDED;
backlight_update_status(bd);
return 0;
}
-#else
-#define kb3886bl_suspend NULL
-#define kb3886bl_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(kb3886bl_pm_ops, kb3886bl_suspend, kb3886bl_resume);
+
static int kb3886bl_get_intensity(struct backlight_device *bd)
{
return kb3886bl_intensity;
@@ -179,10 +178,9 @@ static int kb3886bl_remove(struct platform_device *pdev)
static struct platform_driver kb3886bl_driver = {
.probe = kb3886bl_probe,
.remove = kb3886bl_remove,
- .suspend = kb3886bl_suspend,
- .resume = kb3886bl_resume,
.driver = {
.name = "kb3886-bl",
+ .pm = &kb3886bl_pm_ops,
},
};
diff --git a/drivers/video/backlight/l4f00242t03.c b/drivers/video/backlight/l4f00242t03.c
index fb6155771326..a35a38c709cf 100644
--- a/drivers/video/backlight/l4f00242t03.c
+++ b/drivers/video/backlight/l4f00242t03.c
@@ -51,14 +51,33 @@ static void l4f00242t03_lcd_init(struct spi_device *spi)
struct l4f00242t03_pdata *pdata = spi->dev.platform_data;
struct l4f00242t03_priv *priv = spi_get_drvdata(spi);
const u16 cmd[] = { 0x36, param(0), 0x3A, param(0x60) };
+ int ret;
dev_dbg(&spi->dev, "initializing LCD\n");
- regulator_set_voltage(priv->io_reg, 1800000, 1800000);
- regulator_enable(priv->io_reg);
+ ret = regulator_set_voltage(priv->io_reg, 1800000, 1800000);
+ if (ret) {
+ dev_err(&spi->dev, "failed to set the IO regulator voltage.\n");
+ return;
+ }
+ ret = regulator_enable(priv->io_reg);
+ if (ret) {
+ dev_err(&spi->dev, "failed to enable the IO regulator.\n");
+ return;
+ }
- regulator_set_voltage(priv->core_reg, 2800000, 2800000);
- regulator_enable(priv->core_reg);
+ ret = regulator_set_voltage(priv->core_reg, 2800000, 2800000);
+ if (ret) {
+ dev_err(&spi->dev, "failed to set the core regulator voltage.\n");
+ regulator_disable(priv->io_reg);
+ return;
+ }
+ ret = regulator_enable(priv->core_reg);
+ if (ret) {
+ dev_err(&spi->dev, "failed to enable the core regulator.\n");
+ regulator_disable(priv->io_reg);
+ return;
+ }
l4f00242t03_reset(pdata->reset_gpio);
diff --git a/drivers/video/backlight/ld9040.c b/drivers/video/backlight/ld9040.c
index 1b642f5f381a..1e0a3093ce50 100644
--- a/drivers/video/backlight/ld9040.c
+++ b/drivers/video/backlight/ld9040.c
@@ -775,12 +775,12 @@ static int ld9040_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_PM)
-static int ld9040_suspend(struct spi_device *spi, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int ld9040_suspend(struct device *dev)
{
- struct ld9040 *lcd = spi_get_drvdata(spi);
+ struct ld9040 *lcd = dev_get_drvdata(dev);
- dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power);
+ dev_dbg(dev, "lcd->power = %d\n", lcd->power);
/*
* when lcd panel is suspend, lcd panel becomes off
@@ -789,19 +789,18 @@ static int ld9040_suspend(struct spi_device *spi, pm_message_t mesg)
return ld9040_power(lcd, FB_BLANK_POWERDOWN);
}
-static int ld9040_resume(struct spi_device *spi)
+static int ld9040_resume(struct device *dev)
{
- struct ld9040 *lcd = spi_get_drvdata(spi);
+ struct ld9040 *lcd = dev_get_drvdata(dev);
lcd->power = FB_BLANK_POWERDOWN;
return ld9040_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define ld9040_suspend NULL
-#define ld9040_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(ld9040_pm_ops, ld9040_suspend, ld9040_resume);
+
/* Power down all displays on reboot, poweroff or halt. */
static void ld9040_shutdown(struct spi_device *spi)
{
@@ -814,12 +813,11 @@ static struct spi_driver ld9040_driver = {
.driver = {
.name = "ld9040",
.owner = THIS_MODULE,
+ .pm = &ld9040_pm_ops,
},
.probe = ld9040_probe,
.remove = ld9040_remove,
.shutdown = ld9040_shutdown,
- .suspend = ld9040_suspend,
- .resume = ld9040_resume,
};
module_spi_driver(ld9040_driver);
diff --git a/drivers/video/backlight/lm3533_bl.c b/drivers/video/backlight/lm3533_bl.c
index 5d18d4d7f470..1d1dbfb789e3 100644
--- a/drivers/video/backlight/lm3533_bl.c
+++ b/drivers/video/backlight/lm3533_bl.c
@@ -368,29 +368,28 @@ static int lm3533_bl_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
-static int lm3533_bl_suspend(struct platform_device *pdev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int lm3533_bl_suspend(struct device *dev)
{
- struct lm3533_bl *bl = platform_get_drvdata(pdev);
+ struct lm3533_bl *bl = dev_get_drvdata(dev);
- dev_dbg(&pdev->dev, "%s\n", __func__);
+ dev_dbg(dev, "%s\n", __func__);
return lm3533_ctrlbank_disable(&bl->cb);
}
-static int lm3533_bl_resume(struct platform_device *pdev)
+static int lm3533_bl_resume(struct device *dev)
{
- struct lm3533_bl *bl = platform_get_drvdata(pdev);
+ struct lm3533_bl *bl = dev_get_drvdata(dev);
- dev_dbg(&pdev->dev, "%s\n", __func__);
+ dev_dbg(dev, "%s\n", __func__);
return lm3533_ctrlbank_enable(&bl->cb);
}
-#else
-#define lm3533_bl_suspend NULL
-#define lm3533_bl_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(lm3533_bl_pm_ops, lm3533_bl_suspend, lm3533_bl_resume);
+
static void lm3533_bl_shutdown(struct platform_device *pdev)
{
struct lm3533_bl *bl = platform_get_drvdata(pdev);
@@ -404,12 +403,11 @@ static struct platform_driver lm3533_bl_driver = {
.driver = {
.name = "lm3533-backlight",
.owner = THIS_MODULE,
+ .pm = &lm3533_bl_pm_ops,
},
.probe = lm3533_bl_probe,
.remove = lm3533_bl_remove,
.shutdown = lm3533_bl_shutdown,
- .suspend = lm3533_bl_suspend,
- .resume = lm3533_bl_resume,
};
module_platform_driver(lm3533_bl_driver);
diff --git a/drivers/video/backlight/lms501kf03.c b/drivers/video/backlight/lms501kf03.c
index b43882abefaf..cf01b9ac8131 100644
--- a/drivers/video/backlight/lms501kf03.c
+++ b/drivers/video/backlight/lms501kf03.c
@@ -387,13 +387,12 @@ static int lms501kf03_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_PM)
-
-static int lms501kf03_suspend(struct spi_device *spi, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int lms501kf03_suspend(struct device *dev)
{
- struct lms501kf03 *lcd = spi_get_drvdata(spi);
+ struct lms501kf03 *lcd = dev_get_drvdata(dev);
- dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power);
+ dev_dbg(dev, "lcd->power = %d\n", lcd->power);
/*
* when lcd panel is suspend, lcd panel becomes off
@@ -402,19 +401,19 @@ static int lms501kf03_suspend(struct spi_device *spi, pm_message_t mesg)
return lms501kf03_power(lcd, FB_BLANK_POWERDOWN);
}
-static int lms501kf03_resume(struct spi_device *spi)
+static int lms501kf03_resume(struct device *dev)
{
- struct lms501kf03 *lcd = spi_get_drvdata(spi);
+ struct lms501kf03 *lcd = dev_get_drvdata(dev);
lcd->power = FB_BLANK_POWERDOWN;
return lms501kf03_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define lms501kf03_suspend NULL
-#define lms501kf03_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(lms501kf03_pm_ops, lms501kf03_suspend,
+ lms501kf03_resume);
+
static void lms501kf03_shutdown(struct spi_device *spi)
{
struct lms501kf03 *lcd = spi_get_drvdata(spi);
@@ -426,12 +425,11 @@ static struct spi_driver lms501kf03_driver = {
.driver = {
.name = "lms501kf03",
.owner = THIS_MODULE,
+ .pm = &lms501kf03_pm_ops,
},
.probe = lms501kf03_probe,
.remove = lms501kf03_remove,
.shutdown = lms501kf03_shutdown,
- .suspend = lms501kf03_suspend,
- .resume = lms501kf03_resume,
};
module_spi_driver(lms501kf03_driver);
diff --git a/drivers/video/backlight/locomolcd.c b/drivers/video/backlight/locomolcd.c
index 146fea8aa431..6c3ec4259a60 100644
--- a/drivers/video/backlight/locomolcd.c
+++ b/drivers/video/backlight/locomolcd.c
@@ -157,25 +157,24 @@ static const struct backlight_ops locomobl_data = {
.update_status = locomolcd_set_intensity,
};
-#ifdef CONFIG_PM
-static int locomolcd_suspend(struct locomo_dev *dev, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int locomolcd_suspend(struct device *dev)
{
locomolcd_flags |= LOCOMOLCD_SUSPENDED;
locomolcd_set_intensity(locomolcd_bl_device);
return 0;
}
-static int locomolcd_resume(struct locomo_dev *dev)
+static int locomolcd_resume(struct device *dev)
{
locomolcd_flags &= ~LOCOMOLCD_SUSPENDED;
locomolcd_set_intensity(locomolcd_bl_device);
return 0;
}
-#else
-#define locomolcd_suspend NULL
-#define locomolcd_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(locomolcd_pm_ops, locomolcd_suspend, locomolcd_resume);
+
static int locomolcd_probe(struct locomo_dev *ldev)
{
struct backlight_properties props;
@@ -230,13 +229,12 @@ static int locomolcd_remove(struct locomo_dev *dev)
static struct locomo_driver poodle_lcd_driver = {
.drv = {
- .name = "locomo-backlight",
+ .name = "locomo-backlight",
+ .pm = &locomolcd_pm_ops,
},
.devid = LOCOMO_DEVID_BACKLIGHT,
.probe = locomolcd_probe,
.remove = locomolcd_remove,
- .suspend = locomolcd_suspend,
- .resume = locomolcd_resume,
};
static int __init locomolcd_init(void)
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index 7ae9ae6f4655..c98bdbfdc697 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -35,7 +35,6 @@
#define LP8557_EPROM_START 0x10
#define LP8557_EPROM_END 0x1E
-#define BUF_SIZE 20
#define DEFAULT_BL_NAME "lcd-backlight"
#define MAX_BRIGHTNESS 255
@@ -304,7 +303,7 @@ static ssize_t lp855x_get_chip_id(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct lp855x *lp = dev_get_drvdata(dev);
- return scnprintf(buf, BUF_SIZE, "%s\n", lp->chipname);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", lp->chipname);
}
static ssize_t lp855x_get_bl_ctl_mode(struct device *dev,
@@ -319,7 +318,7 @@ static ssize_t lp855x_get_bl_ctl_mode(struct device *dev,
else if (mode == REGISTER_BASED)
strmode = "register based";
- return scnprintf(buf, BUF_SIZE, "%s\n", strmode);
+ return scnprintf(buf, PAGE_SIZE, "%s\n", strmode);
}
static DEVICE_ATTR(chip_id, S_IRUGO, lp855x_get_chip_id, NULL);
@@ -339,7 +338,6 @@ static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id)
{
struct lp855x *lp;
struct lp855x_platform_data *pdata = cl->dev.platform_data;
- enum lp855x_brightness_ctrl_mode mode;
int ret;
if (!pdata) {
@@ -354,7 +352,6 @@ static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id)
if (!lp)
return -ENOMEM;
- mode = pdata->mode;
lp->client = cl;
lp->dev = &cl->dev;
lp->pdata = pdata;
diff --git a/drivers/video/backlight/ltv350qv.c b/drivers/video/backlight/ltv350qv.c
index c0b4b8f2de98..ed1b39268131 100644
--- a/drivers/video/backlight/ltv350qv.c
+++ b/drivers/video/backlight/ltv350qv.c
@@ -271,25 +271,24 @@ static int ltv350qv_remove(struct spi_device *spi)
return 0;
}
-#ifdef CONFIG_PM
-static int ltv350qv_suspend(struct spi_device *spi, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int ltv350qv_suspend(struct device *dev)
{
- struct ltv350qv *lcd = spi_get_drvdata(spi);
+ struct ltv350qv *lcd = dev_get_drvdata(dev);
return ltv350qv_power(lcd, FB_BLANK_POWERDOWN);
}
-static int ltv350qv_resume(struct spi_device *spi)
+static int ltv350qv_resume(struct device *dev)
{
- struct ltv350qv *lcd = spi_get_drvdata(spi);
+ struct ltv350qv *lcd = dev_get_drvdata(dev);
return ltv350qv_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define ltv350qv_suspend NULL
-#define ltv350qv_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(ltv350qv_pm_ops, ltv350qv_suspend, ltv350qv_resume);
+
/* Power down all displays on reboot, poweroff or halt */
static void ltv350qv_shutdown(struct spi_device *spi)
{
@@ -302,13 +301,12 @@ static struct spi_driver ltv350qv_driver = {
.driver = {
.name = "ltv350qv",
.owner = THIS_MODULE,
+ .pm = &ltv350qv_pm_ops,
},
.probe = ltv350qv_probe,
.remove = ltv350qv_remove,
.shutdown = ltv350qv_shutdown,
- .suspend = ltv350qv_suspend,
- .resume = ltv350qv_resume,
};
module_spi_driver(ltv350qv_driver);
diff --git a/drivers/video/backlight/omap1_bl.c b/drivers/video/backlight/omap1_bl.c
index 627110163067..0aed176cd6a0 100644
--- a/drivers/video/backlight/omap1_bl.c
+++ b/drivers/video/backlight/omap1_bl.c
@@ -18,8 +18,6 @@
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -170,7 +168,7 @@ static int omapbl_probe(struct platform_device *pdev)
dev->props.brightness = pdata->default_intensity;
omapbl_update_status(dev);
- pr_info("OMAP LCD backlight initialised\n");
+ dev_info(&pdev->dev, "OMAP LCD backlight initialised\n");
return 0;
}
diff --git a/drivers/video/backlight/platform_lcd.c b/drivers/video/backlight/platform_lcd.c
index 17a6b83f97af..54d94de652b0 100644
--- a/drivers/video/backlight/platform_lcd.c
+++ b/drivers/video/backlight/platform_lcd.c
@@ -121,7 +121,7 @@ static int platform_lcd_remove(struct platform_device *pdev)
return 0;
}
-#ifdef CONFIG_PM
+#ifdef CONFIG_PM_SLEEP
static int platform_lcd_suspend(struct device *dev)
{
struct platform_lcd *plcd = dev_get_drvdata(dev);
@@ -141,10 +141,10 @@ static int platform_lcd_resume(struct device *dev)
return 0;
}
+#endif
static SIMPLE_DEV_PM_OPS(platform_lcd_pm_ops, platform_lcd_suspend,
platform_lcd_resume);
-#endif
#ifdef CONFIG_OF
static const struct of_device_id platform_lcd_of_match[] = {
@@ -158,9 +158,7 @@ static struct platform_driver platform_lcd_driver = {
.driver = {
.name = "platform-lcd",
.owner = THIS_MODULE,
-#ifdef CONFIG_PM
.pm = &platform_lcd_pm_ops,
-#endif
.of_match_table = of_match_ptr(platform_lcd_of_match),
},
.probe = platform_lcd_probe,
diff --git a/drivers/video/backlight/s6e63m0.c b/drivers/video/backlight/s6e63m0.c
index 9c2677f0ef7d..b37bb1854bf4 100644
--- a/drivers/video/backlight/s6e63m0.c
+++ b/drivers/video/backlight/s6e63m0.c
@@ -817,12 +817,12 @@ static int s6e63m0_remove(struct spi_device *spi)
return 0;
}
-#if defined(CONFIG_PM)
-static int s6e63m0_suspend(struct spi_device *spi, pm_message_t mesg)
+#ifdef CONFIG_PM_SLEEP
+static int s6e63m0_suspend(struct device *dev)
{
- struct s6e63m0 *lcd = spi_get_drvdata(spi);
+ struct s6e63m0 *lcd = dev_get_drvdata(dev);
- dev_dbg(&spi->dev, "lcd->power = %d\n", lcd->power);
+ dev_dbg(dev, "lcd->power = %d\n", lcd->power);
/*
* when lcd panel is suspend, lcd panel becomes off
@@ -831,19 +831,18 @@ static int s6e63m0_suspend(struct spi_device *spi, pm_message_t mesg)
return s6e63m0_power(lcd, FB_BLANK_POWERDOWN);
}
-static int s6e63m0_resume(struct spi_device *spi)
+static int s6e63m0_resume(struct device *dev)
{
- struct s6e63m0 *lcd = spi_get_drvdata(spi);
+ struct s6e63m0 *lcd = dev_get_drvdata(dev);
lcd->power = FB_BLANK_POWERDOWN;
return s6e63m0_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define s6e63m0_suspend NULL
-#define s6e63m0_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(s6e63m0_pm_ops, s6e63m0_suspend, s6e63m0_resume);
+
/* Power down all displays on reboot, poweroff or halt. */
static void s6e63m0_shutdown(struct spi_device *spi)
{
@@ -856,12 +855,11 @@ static struct spi_driver s6e63m0_driver = {
.driver = {
.name = "s6e63m0",
.owner = THIS_MODULE,
+ .pm = &s6e63m0_pm_ops,
},
.probe = s6e63m0_probe,
.remove = s6e63m0_remove,
.shutdown = s6e63m0_shutdown,
- .suspend = s6e63m0_suspend,
- .resume = s6e63m0_resume,
};
module_spi_driver(s6e63m0_driver);
diff --git a/drivers/video/backlight/tdo24m.c b/drivers/video/backlight/tdo24m.c
index 00162085eec0..18cdf466d50a 100644
--- a/drivers/video/backlight/tdo24m.c
+++ b/drivers/video/backlight/tdo24m.c
@@ -412,25 +412,24 @@ static int tdo24m_remove(struct spi_device *spi)
return 0;
}
-#ifdef CONFIG_PM
-static int tdo24m_suspend(struct spi_device *spi, pm_message_t state)
+#ifdef CONFIG_PM_SLEEP
+static int tdo24m_suspend(struct device *dev)
{
- struct tdo24m *lcd = spi_get_drvdata(spi);
+ struct tdo24m *lcd = dev_get_drvdata(dev);
return tdo24m_power(lcd, FB_BLANK_POWERDOWN);
}
-static int tdo24m_resume(struct spi_device *spi)
+static int tdo24m_resume(struct device *dev)
{
- struct tdo24m *lcd = spi_get_drvdata(spi);
+ struct tdo24m *lcd = dev_get_drvdata(dev);
return tdo24m_power(lcd, FB_BLANK_UNBLANK);
}
-#else
-#define tdo24m_suspend NULL
-#define tdo24m_resume NULL
#endif
+static SIMPLE_DEV_PM_OPS(tdo24m_pm_ops, tdo24m_suspend, tdo24m_resume);
+
/* Power down all displays on reboot, poweroff or halt */
static void tdo24m_shutdown(struct spi_device *spi)
{
@@ -443,12 +442,11 @@ static struct spi_driver tdo24m_driver = {
.driver = {
.name = "tdo24m",
.owner = THIS_MODULE,
+ .pm = &tdo24m_pm_ops,
},
.probe = tdo24m_probe,
.remove = tdo24m_remove,
.shutdown = tdo24m_shutdown,
- .suspend = tdo24m_suspend,
- .resume = tdo24m_resume,
};
module_spi_driver(tdo24m_driver);
diff --git a/drivers/video/console/fbcon_cw.c b/drivers/video/console/fbcon_cw.c
index 6a737827beb1..a93670ef7f89 100644
--- a/drivers/video/console/fbcon_cw.c
+++ b/drivers/video/console/fbcon_cw.c
@@ -27,7 +27,7 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute,
{
int i, j, offset = (vc->vc_font.height < 10) ? 1 : 2;
int width = (vc->vc_font.height + 7) >> 3;
- u8 c, t = 0, msk = ~(0xff >> offset);
+ u8 c, msk = ~(0xff >> offset);
for (i = 0; i < vc->vc_font.width; i++) {
for (j = 0; j < width; j++) {
@@ -40,7 +40,6 @@ static void cw_update_attr(u8 *dst, u8 *src, int attribute,
c = ~c;
src++;
*dst++ = c;
- t = c;
}
}
}
diff --git a/drivers/video/cyber2000fb.c b/drivers/video/cyber2000fb.c
index 57886787ead0..e78d9f2233b8 100644
--- a/drivers/video/cyber2000fb.c
+++ b/drivers/video/cyber2000fb.c
@@ -518,6 +518,9 @@ static void cyber2000fb_set_timing(struct cfb_info *cfb, struct par_info *hw)
cyber2000_grphw(0xb9, 0x00, cfb);
spin_unlock(&cfb->reg_b0_lock);
+ /* wait (for the PLL?) to avoid palette corruption at higher clocks */
+ msleep(1000);
+
cfb->ramdac_ctrl = hw->ramdac;
cyber2000fb_write_ramdac_ctrl(cfb);
diff --git a/drivers/video/exynos/exynos_mipi_dsi.c b/drivers/video/exynos/exynos_mipi_dsi.c
index fac7df6d1aba..87cd13b5dee6 100644
--- a/drivers/video/exynos/exynos_mipi_dsi.c
+++ b/drivers/video/exynos/exynos_mipi_dsi.c
@@ -32,6 +32,7 @@
#include <linux/notifier.h>
#include <linux/regulator/consumer.h>
#include <linux/pm_runtime.h>
+#include <linux/err.h>
#include <video/exynos_mipi_dsim.h>
@@ -384,10 +385,9 @@ static int exynos_mipi_dsi_probe(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- dsim->reg_base = devm_request_and_ioremap(&pdev->dev, res);
- if (!dsim->reg_base) {
- dev_err(&pdev->dev, "failed to remap io region\n");
- ret = -ENOMEM;
+ dsim->reg_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(dsim->reg_base)) {
+ ret = PTR_ERR(dsim->reg_base);
goto error;
}
diff --git a/drivers/video/hyperv_fb.c b/drivers/video/hyperv_fb.c
new file mode 100644
index 000000000000..ceb33b0bbb98
--- /dev/null
+++ b/drivers/video/hyperv_fb.c
@@ -0,0 +1,829 @@
+/*
+ * Copyright (c) 2012, Microsoft Corporation.
+ *
+ * Author:
+ * Haiyang Zhang <haiyangz@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+/*
+ * Hyper-V Synthetic Video Frame Buffer Driver
+ *
+ * This is the driver for the Hyper-V Synthetic Video, which supports
+ * screen resolution up to Full HD 1920x1080 with 32 bit color on Windows
+ * Server 2012, and 1600x1200 with 16 bit color on Windows Server 2008 R2
+ * or earlier.
+ *
+ * It also solves the double mouse cursor issue of the emulated video mode.
+ *
+ * The default screen resolution is 1152x864, which may be changed by a
+ * kernel parameter:
+ * video=hyperv_fb:<width>x<height>
+ * For example: video=hyperv_fb:1280x1024
+ *
+ * Portrait orientation is also supported:
+ * For example: video=hyperv_fb:864x1152
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/completion.h>
+#include <linux/fb.h>
+#include <linux/pci.h>
+
+#include <linux/hyperv.h>
+
+
+/* Hyper-V Synthetic Video Protocol definitions and structures */
+#define MAX_VMBUS_PKT_SIZE 0x4000
+
+#define SYNTHVID_VERSION(major, minor) ((minor) << 16 | (major))
+#define SYNTHVID_VERSION_WIN7 SYNTHVID_VERSION(3, 0)
+#define SYNTHVID_VERSION_WIN8 SYNTHVID_VERSION(3, 2)
+
+#define SYNTHVID_DEPTH_WIN7 16
+#define SYNTHVID_DEPTH_WIN8 32
+
+#define SYNTHVID_FB_SIZE_WIN7 (4 * 1024 * 1024)
+#define SYNTHVID_WIDTH_MAX_WIN7 1600
+#define SYNTHVID_HEIGHT_MAX_WIN7 1200
+
+#define SYNTHVID_FB_SIZE_WIN8 (8 * 1024 * 1024)
+
+#define PCI_VENDOR_ID_MICROSOFT 0x1414
+#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353
+
+
+enum pipe_msg_type {
+ PIPE_MSG_INVALID,
+ PIPE_MSG_DATA,
+ PIPE_MSG_MAX
+};
+
+struct pipe_msg_hdr {
+ u32 type;
+ u32 size; /* size of message after this field */
+} __packed;
+
+
+enum synthvid_msg_type {
+ SYNTHVID_ERROR = 0,
+ SYNTHVID_VERSION_REQUEST = 1,
+ SYNTHVID_VERSION_RESPONSE = 2,
+ SYNTHVID_VRAM_LOCATION = 3,
+ SYNTHVID_VRAM_LOCATION_ACK = 4,
+ SYNTHVID_SITUATION_UPDATE = 5,
+ SYNTHVID_SITUATION_UPDATE_ACK = 6,
+ SYNTHVID_POINTER_POSITION = 7,
+ SYNTHVID_POINTER_SHAPE = 8,
+ SYNTHVID_FEATURE_CHANGE = 9,
+ SYNTHVID_DIRT = 10,
+
+ SYNTHVID_MAX = 11
+};
+
+struct synthvid_msg_hdr {
+ u32 type;
+ u32 size; /* size of this header + payload after this field*/
+} __packed;
+
+
+struct synthvid_version_req {
+ u32 version;
+} __packed;
+
+struct synthvid_version_resp {
+ u32 version;
+ u8 is_accepted;
+ u8 max_video_outputs;
+} __packed;
+
+struct synthvid_vram_location {
+ u64 user_ctx;
+ u8 is_vram_gpa_specified;
+ u64 vram_gpa;
+} __packed;
+
+struct synthvid_vram_location_ack {
+ u64 user_ctx;
+} __packed;
+
+struct video_output_situation {
+ u8 active;
+ u32 vram_offset;
+ u8 depth_bits;
+ u32 width_pixels;
+ u32 height_pixels;
+ u32 pitch_bytes;
+} __packed;
+
+struct synthvid_situation_update {
+ u64 user_ctx;
+ u8 video_output_count;
+ struct video_output_situation video_output[1];
+} __packed;
+
+struct synthvid_situation_update_ack {
+ u64 user_ctx;
+} __packed;
+
+struct synthvid_pointer_position {
+ u8 is_visible;
+ u8 video_output;
+ s32 image_x;
+ s32 image_y;
+} __packed;
+
+
+#define CURSOR_MAX_X 96
+#define CURSOR_MAX_Y 96
+#define CURSOR_ARGB_PIXEL_SIZE 4
+#define CURSOR_MAX_SIZE (CURSOR_MAX_X * CURSOR_MAX_Y * CURSOR_ARGB_PIXEL_SIZE)
+#define CURSOR_COMPLETE (-1)
+
+struct synthvid_pointer_shape {
+ u8 part_idx;
+ u8 is_argb;
+ u32 width; /* CURSOR_MAX_X at most */
+ u32 height; /* CURSOR_MAX_Y at most */
+ u32 hot_x; /* hotspot relative to upper-left of pointer image */
+ u32 hot_y;
+ u8 data[4];
+} __packed;
+
+struct synthvid_feature_change {
+ u8 is_dirt_needed;
+ u8 is_ptr_pos_needed;
+ u8 is_ptr_shape_needed;
+ u8 is_situ_needed;
+} __packed;
+
+struct rect {
+ s32 x1, y1; /* top left corner */
+ s32 x2, y2; /* bottom right corner, exclusive */
+} __packed;
+
+struct synthvid_dirt {
+ u8 video_output;
+ u8 dirt_count;
+ struct rect rect[1];
+} __packed;
+
+struct synthvid_msg {
+ struct pipe_msg_hdr pipe_hdr;
+ struct synthvid_msg_hdr vid_hdr;
+ union {
+ struct synthvid_version_req ver_req;
+ struct synthvid_version_resp ver_resp;
+ struct synthvid_vram_location vram;
+ struct synthvid_vram_location_ack vram_ack;
+ struct synthvid_situation_update situ;
+ struct synthvid_situation_update_ack situ_ack;
+ struct synthvid_pointer_position ptr_pos;
+ struct synthvid_pointer_shape ptr_shape;
+ struct synthvid_feature_change feature_chg;
+ struct synthvid_dirt dirt;
+ };
+} __packed;
+
+
+
+/* FB driver definitions and structures */
+#define HVFB_WIDTH 1152 /* default screen width */
+#define HVFB_HEIGHT 864 /* default screen height */
+#define HVFB_WIDTH_MIN 640
+#define HVFB_HEIGHT_MIN 480
+
+#define RING_BUFSIZE (256 * 1024)
+#define VSP_TIMEOUT (10 * HZ)
+#define HVFB_UPDATE_DELAY (HZ / 20)
+
+struct hvfb_par {
+ struct fb_info *info;
+ bool fb_ready; /* fb device is ready */
+ struct completion wait;
+ u32 synthvid_version;
+
+ struct delayed_work dwork;
+ bool update;
+
+ u32 pseudo_palette[16];
+ u8 init_buf[MAX_VMBUS_PKT_SIZE];
+ u8 recv_buf[MAX_VMBUS_PKT_SIZE];
+};
+
+static uint screen_width = HVFB_WIDTH;
+static uint screen_height = HVFB_HEIGHT;
+static uint screen_depth;
+static uint screen_fb_size;
+
+/* Send message to Hyper-V host */
+static inline int synthvid_send(struct hv_device *hdev,
+ struct synthvid_msg *msg)
+{
+ static atomic64_t request_id = ATOMIC64_INIT(0);
+ int ret;
+
+ msg->pipe_hdr.type = PIPE_MSG_DATA;
+ msg->pipe_hdr.size = msg->vid_hdr.size;
+
+ ret = vmbus_sendpacket(hdev->channel, msg,
+ msg->vid_hdr.size + sizeof(struct pipe_msg_hdr),
+ atomic64_inc_return(&request_id),
+ VM_PKT_DATA_INBAND, 0);
+
+ if (ret)
+ pr_err("Unable to send packet via vmbus\n");
+
+ return ret;
+}
+
+
+/* Send screen resolution info to host */
+static int synthvid_send_situ(struct hv_device *hdev)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct synthvid_msg msg;
+
+ if (!info)
+ return -ENODEV;
+
+ memset(&msg, 0, sizeof(struct synthvid_msg));
+
+ msg.vid_hdr.type = SYNTHVID_SITUATION_UPDATE;
+ msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_situation_update);
+ msg.situ.user_ctx = 0;
+ msg.situ.video_output_count = 1;
+ msg.situ.video_output[0].active = 1;
+ msg.situ.video_output[0].vram_offset = 0;
+ msg.situ.video_output[0].depth_bits = info->var.bits_per_pixel;
+ msg.situ.video_output[0].width_pixels = info->var.xres;
+ msg.situ.video_output[0].height_pixels = info->var.yres;
+ msg.situ.video_output[0].pitch_bytes = info->fix.line_length;
+
+ synthvid_send(hdev, &msg);
+
+ return 0;
+}
+
+/* Send mouse pointer info to host */
+static int synthvid_send_ptr(struct hv_device *hdev)
+{
+ struct synthvid_msg msg;
+
+ memset(&msg, 0, sizeof(struct synthvid_msg));
+ msg.vid_hdr.type = SYNTHVID_POINTER_POSITION;
+ msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_pointer_position);
+ msg.ptr_pos.is_visible = 1;
+ msg.ptr_pos.video_output = 0;
+ msg.ptr_pos.image_x = 0;
+ msg.ptr_pos.image_y = 0;
+ synthvid_send(hdev, &msg);
+
+ memset(&msg, 0, sizeof(struct synthvid_msg));
+ msg.vid_hdr.type = SYNTHVID_POINTER_SHAPE;
+ msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_pointer_shape);
+ msg.ptr_shape.part_idx = CURSOR_COMPLETE;
+ msg.ptr_shape.is_argb = 1;
+ msg.ptr_shape.width = 1;
+ msg.ptr_shape.height = 1;
+ msg.ptr_shape.hot_x = 0;
+ msg.ptr_shape.hot_y = 0;
+ msg.ptr_shape.data[0] = 0;
+ msg.ptr_shape.data[1] = 1;
+ msg.ptr_shape.data[2] = 1;
+ msg.ptr_shape.data[3] = 1;
+ synthvid_send(hdev, &msg);
+
+ return 0;
+}
+
+/* Send updated screen area (dirty rectangle) location to host */
+static int synthvid_update(struct fb_info *info)
+{
+ struct hv_device *hdev = device_to_hv_device(info->device);
+ struct synthvid_msg msg;
+
+ memset(&msg, 0, sizeof(struct synthvid_msg));
+
+ msg.vid_hdr.type = SYNTHVID_DIRT;
+ msg.vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_dirt);
+ msg.dirt.video_output = 0;
+ msg.dirt.dirt_count = 1;
+ msg.dirt.rect[0].x1 = 0;
+ msg.dirt.rect[0].y1 = 0;
+ msg.dirt.rect[0].x2 = info->var.xres;
+ msg.dirt.rect[0].y2 = info->var.yres;
+
+ synthvid_send(hdev, &msg);
+
+ return 0;
+}
+
+
+/*
+ * Actions on received messages from host:
+ * Complete the wait event.
+ * Or, reply with screen and cursor info.
+ */
+static void synthvid_recv_sub(struct hv_device *hdev)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par;
+ struct synthvid_msg *msg;
+
+ if (!info)
+ return;
+
+ par = info->par;
+ msg = (struct synthvid_msg *)par->recv_buf;
+
+ /* Complete the wait event */
+ if (msg->vid_hdr.type == SYNTHVID_VERSION_RESPONSE ||
+ msg->vid_hdr.type == SYNTHVID_VRAM_LOCATION_ACK) {
+ memcpy(par->init_buf, msg, MAX_VMBUS_PKT_SIZE);
+ complete(&par->wait);
+ return;
+ }
+
+ /* Reply with screen and cursor info */
+ if (msg->vid_hdr.type == SYNTHVID_FEATURE_CHANGE) {
+ if (par->fb_ready) {
+ synthvid_send_ptr(hdev);
+ synthvid_send_situ(hdev);
+ }
+
+ par->update = msg->feature_chg.is_dirt_needed;
+ if (par->update)
+ schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY);
+ }
+}
+
+/* Receive callback for messages from the host */
+static void synthvid_receive(void *ctx)
+{
+ struct hv_device *hdev = ctx;
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par;
+ struct synthvid_msg *recv_buf;
+ u32 bytes_recvd;
+ u64 req_id;
+ int ret;
+
+ if (!info)
+ return;
+
+ par = info->par;
+ recv_buf = (struct synthvid_msg *)par->recv_buf;
+
+ do {
+ ret = vmbus_recvpacket(hdev->channel, recv_buf,
+ MAX_VMBUS_PKT_SIZE,
+ &bytes_recvd, &req_id);
+ if (bytes_recvd > 0 &&
+ recv_buf->pipe_hdr.type == PIPE_MSG_DATA)
+ synthvid_recv_sub(hdev);
+ } while (bytes_recvd > 0 && ret == 0);
+}
+
+/* Check synthetic video protocol version with the host */
+static int synthvid_negotiate_ver(struct hv_device *hdev, u32 ver)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par = info->par;
+ struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf;
+ int t, ret = 0;
+
+ memset(msg, 0, sizeof(struct synthvid_msg));
+ msg->vid_hdr.type = SYNTHVID_VERSION_REQUEST;
+ msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_version_req);
+ msg->ver_req.version = ver;
+ synthvid_send(hdev, msg);
+
+ t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT);
+ if (!t) {
+ pr_err("Time out on waiting version response\n");
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+ if (!msg->ver_resp.is_accepted) {
+ ret = -ENODEV;
+ goto out;
+ }
+
+ par->synthvid_version = ver;
+
+out:
+ return ret;
+}
+
+/* Connect to VSP (Virtual Service Provider) on host */
+static int synthvid_connect_vsp(struct hv_device *hdev)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par = info->par;
+ int ret;
+
+ ret = vmbus_open(hdev->channel, RING_BUFSIZE, RING_BUFSIZE,
+ NULL, 0, synthvid_receive, hdev);
+ if (ret) {
+ pr_err("Unable to open vmbus channel\n");
+ return ret;
+ }
+
+ /* Negotiate the protocol version with host */
+ if (vmbus_proto_version == VERSION_WS2008 ||
+ vmbus_proto_version == VERSION_WIN7)
+ ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN7);
+ else
+ ret = synthvid_negotiate_ver(hdev, SYNTHVID_VERSION_WIN8);
+
+ if (ret) {
+ pr_err("Synthetic video device version not accepted\n");
+ goto error;
+ }
+
+ if (par->synthvid_version == SYNTHVID_VERSION_WIN7) {
+ screen_depth = SYNTHVID_DEPTH_WIN7;
+ screen_fb_size = SYNTHVID_FB_SIZE_WIN7;
+ } else {
+ screen_depth = SYNTHVID_DEPTH_WIN8;
+ screen_fb_size = SYNTHVID_FB_SIZE_WIN8;
+ }
+
+ return 0;
+
+error:
+ vmbus_close(hdev->channel);
+ return ret;
+}
+
+/* Send VRAM and Situation messages to the host */
+static int synthvid_send_config(struct hv_device *hdev)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par = info->par;
+ struct synthvid_msg *msg = (struct synthvid_msg *)par->init_buf;
+ int t, ret = 0;
+
+ /* Send VRAM location */
+ memset(msg, 0, sizeof(struct synthvid_msg));
+ msg->vid_hdr.type = SYNTHVID_VRAM_LOCATION;
+ msg->vid_hdr.size = sizeof(struct synthvid_msg_hdr) +
+ sizeof(struct synthvid_vram_location);
+ msg->vram.user_ctx = msg->vram.vram_gpa = info->fix.smem_start;
+ msg->vram.is_vram_gpa_specified = 1;
+ synthvid_send(hdev, msg);
+
+ t = wait_for_completion_timeout(&par->wait, VSP_TIMEOUT);
+ if (!t) {
+ pr_err("Time out on waiting vram location ack\n");
+ ret = -ETIMEDOUT;
+ goto out;
+ }
+ if (msg->vram_ack.user_ctx != info->fix.smem_start) {
+ pr_err("Unable to set VRAM location\n");
+ ret = -ENODEV;
+ goto out;
+ }
+
+ /* Send pointer and situation update */
+ synthvid_send_ptr(hdev);
+ synthvid_send_situ(hdev);
+
+out:
+ return ret;
+}
+
+
+/*
+ * Delayed work callback:
+ * It is called at HVFB_UPDATE_DELAY or longer time interval to process
+ * screen updates. It is re-scheduled if further update is necessary.
+ */
+static void hvfb_update_work(struct work_struct *w)
+{
+ struct hvfb_par *par = container_of(w, struct hvfb_par, dwork.work);
+ struct fb_info *info = par->info;
+
+ if (par->fb_ready)
+ synthvid_update(info);
+
+ if (par->update)
+ schedule_delayed_work(&par->dwork, HVFB_UPDATE_DELAY);
+}
+
+
+/* Framebuffer operation handlers */
+
+static int hvfb_check_var(struct fb_var_screeninfo *var, struct fb_info *info)
+{
+ if (var->xres < HVFB_WIDTH_MIN || var->yres < HVFB_HEIGHT_MIN ||
+ var->xres > screen_width || var->yres > screen_height ||
+ var->bits_per_pixel != screen_depth)
+ return -EINVAL;
+
+ var->xres_virtual = var->xres;
+ var->yres_virtual = var->yres;
+
+ return 0;
+}
+
+static int hvfb_set_par(struct fb_info *info)
+{
+ struct hv_device *hdev = device_to_hv_device(info->device);
+
+ return synthvid_send_situ(hdev);
+}
+
+
+static inline u32 chan_to_field(u32 chan, struct fb_bitfield *bf)
+{
+ return ((chan & 0xffff) >> (16 - bf->length)) << bf->offset;
+}
+
+static int hvfb_setcolreg(unsigned regno, unsigned red, unsigned green,
+ unsigned blue, unsigned transp, struct fb_info *info)
+{
+ u32 *pal = info->pseudo_palette;
+
+ if (regno > 15)
+ return -EINVAL;
+
+ pal[regno] = chan_to_field(red, &info->var.red)
+ | chan_to_field(green, &info->var.green)
+ | chan_to_field(blue, &info->var.blue)
+ | chan_to_field(transp, &info->var.transp);
+
+ return 0;
+}
+
+
+static struct fb_ops hvfb_ops = {
+ .owner = THIS_MODULE,
+ .fb_check_var = hvfb_check_var,
+ .fb_set_par = hvfb_set_par,
+ .fb_setcolreg = hvfb_setcolreg,
+ .fb_fillrect = cfb_fillrect,
+ .fb_copyarea = cfb_copyarea,
+ .fb_imageblit = cfb_imageblit,
+};
+
+
+/* Get options from kernel paramenter "video=" */
+static void hvfb_get_option(struct fb_info *info)
+{
+ struct hvfb_par *par = info->par;
+ char *opt = NULL, *p;
+ uint x = 0, y = 0;
+
+ if (fb_get_options(KBUILD_MODNAME, &opt) || !opt || !*opt)
+ return;
+
+ p = strsep(&opt, "x");
+ if (!*p || kstrtouint(p, 0, &x) ||
+ !opt || !*opt || kstrtouint(opt, 0, &y)) {
+ pr_err("Screen option is invalid: skipped\n");
+ return;
+ }
+
+ if (x < HVFB_WIDTH_MIN || y < HVFB_HEIGHT_MIN ||
+ (par->synthvid_version == SYNTHVID_VERSION_WIN8 &&
+ x * y * screen_depth / 8 > SYNTHVID_FB_SIZE_WIN8) ||
+ (par->synthvid_version == SYNTHVID_VERSION_WIN7 &&
+ (x > SYNTHVID_WIDTH_MAX_WIN7 || y > SYNTHVID_HEIGHT_MAX_WIN7))) {
+ pr_err("Screen resolution option is out of range: skipped\n");
+ return;
+ }
+
+ screen_width = x;
+ screen_height = y;
+ return;
+}
+
+
+/* Get framebuffer memory from Hyper-V video pci space */
+static int hvfb_getmem(struct fb_info *info)
+{
+ struct pci_dev *pdev;
+ ulong fb_phys;
+ void *fb_virt;
+
+ pdev = pci_get_device(PCI_VENDOR_ID_MICROSOFT,
+ PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
+ if (!pdev) {
+ pr_err("Unable to find PCI Hyper-V video\n");
+ return -ENODEV;
+ }
+
+ if (!(pci_resource_flags(pdev, 0) & IORESOURCE_MEM) ||
+ pci_resource_len(pdev, 0) < screen_fb_size)
+ goto err1;
+
+ fb_phys = pci_resource_end(pdev, 0) - screen_fb_size + 1;
+ if (!request_mem_region(fb_phys, screen_fb_size, KBUILD_MODNAME))
+ goto err1;
+
+ fb_virt = ioremap(fb_phys, screen_fb_size);
+ if (!fb_virt)
+ goto err2;
+
+ info->apertures = alloc_apertures(1);
+ if (!info->apertures)
+ goto err3;
+
+ info->apertures->ranges[0].base = pci_resource_start(pdev, 0);
+ info->apertures->ranges[0].size = pci_resource_len(pdev, 0);
+ info->fix.smem_start = fb_phys;
+ info->fix.smem_len = screen_fb_size;
+ info->screen_base = fb_virt;
+ info->screen_size = screen_fb_size;
+
+ pci_dev_put(pdev);
+ return 0;
+
+err3:
+ iounmap(fb_virt);
+err2:
+ release_mem_region(fb_phys, screen_fb_size);
+err1:
+ pci_dev_put(pdev);
+ return -ENOMEM;
+}
+
+/* Release the framebuffer */
+static void hvfb_putmem(struct fb_info *info)
+{
+ iounmap(info->screen_base);
+ release_mem_region(info->fix.smem_start, screen_fb_size);
+}
+
+
+static int hvfb_probe(struct hv_device *hdev,
+ const struct hv_vmbus_device_id *dev_id)
+{
+ struct fb_info *info;
+ struct hvfb_par *par;
+ int ret;
+
+ info = framebuffer_alloc(sizeof(struct hvfb_par), &hdev->device);
+ if (!info) {
+ pr_err("No memory for framebuffer info\n");
+ return -ENOMEM;
+ }
+
+ par = info->par;
+ par->info = info;
+ par->fb_ready = false;
+ init_completion(&par->wait);
+ INIT_DELAYED_WORK(&par->dwork, hvfb_update_work);
+
+ /* Connect to VSP */
+ hv_set_drvdata(hdev, info);
+ ret = synthvid_connect_vsp(hdev);
+ if (ret) {
+ pr_err("Unable to connect to VSP\n");
+ goto error1;
+ }
+
+ ret = hvfb_getmem(info);
+ if (ret) {
+ pr_err("No memory for framebuffer\n");
+ goto error2;
+ }
+
+ hvfb_get_option(info);
+ pr_info("Screen resolution: %dx%d, Color depth: %d\n",
+ screen_width, screen_height, screen_depth);
+
+
+ /* Set up fb_info */
+ info->flags = FBINFO_DEFAULT;
+
+ info->var.xres_virtual = info->var.xres = screen_width;
+ info->var.yres_virtual = info->var.yres = screen_height;
+ info->var.bits_per_pixel = screen_depth;
+
+ if (info->var.bits_per_pixel == 16) {
+ info->var.red = (struct fb_bitfield){11, 5, 0};
+ info->var.green = (struct fb_bitfield){5, 6, 0};
+ info->var.blue = (struct fb_bitfield){0, 5, 0};
+ info->var.transp = (struct fb_bitfield){0, 0, 0};
+ } else {
+ info->var.red = (struct fb_bitfield){16, 8, 0};
+ info->var.green = (struct fb_bitfield){8, 8, 0};
+ info->var.blue = (struct fb_bitfield){0, 8, 0};
+ info->var.transp = (struct fb_bitfield){24, 8, 0};
+ }
+
+ info->var.activate = FB_ACTIVATE_NOW;
+ info->var.height = -1;
+ info->var.width = -1;
+ info->var.vmode = FB_VMODE_NONINTERLACED;
+
+ strcpy(info->fix.id, KBUILD_MODNAME);
+ info->fix.type = FB_TYPE_PACKED_PIXELS;
+ info->fix.visual = FB_VISUAL_TRUECOLOR;
+ info->fix.line_length = screen_width * screen_depth / 8;
+ info->fix.accel = FB_ACCEL_NONE;
+
+ info->fbops = &hvfb_ops;
+ info->pseudo_palette = par->pseudo_palette;
+
+ /* Send config to host */
+ ret = synthvid_send_config(hdev);
+ if (ret)
+ goto error;
+
+ ret = register_framebuffer(info);
+ if (ret) {
+ pr_err("Unable to register framebuffer\n");
+ goto error;
+ }
+
+ par->fb_ready = true;
+
+ return 0;
+
+error:
+ hvfb_putmem(info);
+error2:
+ vmbus_close(hdev->channel);
+error1:
+ cancel_delayed_work_sync(&par->dwork);
+ hv_set_drvdata(hdev, NULL);
+ framebuffer_release(info);
+ return ret;
+}
+
+
+static int hvfb_remove(struct hv_device *hdev)
+{
+ struct fb_info *info = hv_get_drvdata(hdev);
+ struct hvfb_par *par = info->par;
+
+ par->update = false;
+ par->fb_ready = false;
+
+ unregister_framebuffer(info);
+ cancel_delayed_work_sync(&par->dwork);
+
+ vmbus_close(hdev->channel);
+ hv_set_drvdata(hdev, NULL);
+
+ hvfb_putmem(info);
+ framebuffer_release(info);
+
+ return 0;
+}
+
+
+static const struct hv_vmbus_device_id id_table[] = {
+ /* Synthetic Video Device GUID */
+ {HV_SYNTHVID_GUID},
+ {}
+};
+
+MODULE_DEVICE_TABLE(vmbus, id_table);
+
+static struct hv_driver hvfb_drv = {
+ .name = KBUILD_MODNAME,
+ .id_table = id_table,
+ .probe = hvfb_probe,
+ .remove = hvfb_remove,
+};
+
+
+static int __init hvfb_drv_init(void)
+{
+ return vmbus_driver_register(&hvfb_drv);
+}
+
+static void __exit hvfb_drv_exit(void)
+{
+ vmbus_driver_unregister(&hvfb_drv);
+}
+
+module_init(hvfb_drv_init);
+module_exit(hvfb_drv_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_VERSION(HV_DRV_VERSION);
+MODULE_DESCRIPTION("Microsoft Hyper-V Synthetic Video Frame Buffer Driver");
diff --git a/drivers/video/matrox/matroxfb_maven.c b/drivers/video/matrox/matroxfb_maven.c
index 217678e0b983..fd2897455696 100644
--- a/drivers/video/matrox/matroxfb_maven.c
+++ b/drivers/video/matrox/matroxfb_maven.c
@@ -137,8 +137,20 @@ static int* get_ctrl_ptr(struct maven_data* md, int idx) {
static int maven_get_reg(struct i2c_client* c, char reg) {
char dst;
- struct i2c_msg msgs[] = {{ c->addr, I2C_M_REV_DIR_ADDR, sizeof(reg), &reg },
- { c->addr, I2C_M_RD | I2C_M_NOSTART, sizeof(dst), &dst }};
+ struct i2c_msg msgs[] = {
+ {
+ .addr = c->addr,
+ .flags = I2C_M_REV_DIR_ADDR,
+ .len = sizeof(reg),
+ .buf = &reg
+ },
+ {
+ .addr = c->addr,
+ .flags = I2C_M_RD | I2C_M_NOSTART,
+ .len = sizeof(dst),
+ .buf = &dst
+ }
+ };
s32 err;
err = i2c_transfer(c->adapter, msgs, 2);
diff --git a/drivers/video/uvesafb.c b/drivers/video/uvesafb.c
index b75db0186488..0d0a43c5de4f 100644
--- a/drivers/video/uvesafb.c
+++ b/drivers/video/uvesafb.c
@@ -166,7 +166,7 @@ static int uvesafb_exec(struct uvesafb_ktask *task)
memcpy(&m->id, &uvesafb_cn_id, sizeof(m->id));
m->seq = seq;
m->len = len;
- m->ack = random32();
+ m->ack = prandom_u32();
/* uvesafb_task structure */
memcpy(m + 1, &task->t, sizeof(task->t));
diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig
index 5a32232cf7c1..7b16994aa6e1 100644
--- a/drivers/xen/Kconfig
+++ b/drivers/xen/Kconfig
@@ -145,9 +145,9 @@ config SWIOTLB_XEN
select SWIOTLB
config XEN_TMEM
- bool
+ tristate
depends on !ARM
- default y if (CLEANCACHE || FRONTSWAP)
+ default m if (CLEANCACHE || FRONTSWAP)
help
Shim to interface in-kernel Transcendent Memory hooks
(e.g. cleancache and frontswap) to Xen tmem hypercalls.
diff --git a/drivers/xen/tmem.c b/drivers/xen/tmem.c
index 3ee836d42581..e3600be4e7fa 100644
--- a/drivers/xen/tmem.c
+++ b/drivers/xen/tmem.c
@@ -5,6 +5,7 @@
* Author: Dan Magenheimer
*/
+#include <linux/module.h>
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
@@ -128,6 +129,7 @@ static int xen_tmem_flush_object(u32 pool_id, struct tmem_oid oid)
return xen_tmem_op(TMEM_FLUSH_OBJECT, pool_id, oid, 0, 0, 0, 0, 0);
}
+#ifndef CONFIG_XEN_TMEM_MODULE
bool __read_mostly tmem_enabled = false;
static int __init enable_tmem(char *s)
@@ -136,6 +138,7 @@ static int __init enable_tmem(char *s)
return 1;
}
__setup("tmem", enable_tmem);
+#endif
#ifdef CONFIG_CLEANCACHE
static int xen_tmem_destroy_pool(u32 pool_id)
@@ -227,16 +230,21 @@ static int tmem_cleancache_init_shared_fs(char *uuid, size_t pagesize)
return xen_tmem_new_pool(shared_uuid, TMEM_POOL_SHARED, pagesize);
}
-static bool __initdata use_cleancache = true;
-
+static bool disable_cleancache __read_mostly;
+static bool disable_selfballooning __read_mostly;
+#ifdef CONFIG_XEN_TMEM_MODULE
+module_param(disable_cleancache, bool, S_IRUGO);
+module_param(disable_selfballooning, bool, S_IRUGO);
+#else
static int __init no_cleancache(char *s)
{
- use_cleancache = false;
+ disable_cleancache = true;
return 1;
}
__setup("nocleancache", no_cleancache);
+#endif
-static struct cleancache_ops __initdata tmem_cleancache_ops = {
+static struct cleancache_ops tmem_cleancache_ops = {
.put_page = tmem_cleancache_put_page,
.get_page = tmem_cleancache_get_page,
.invalidate_page = tmem_cleancache_flush_page,
@@ -353,54 +361,71 @@ static void tmem_frontswap_init(unsigned ignored)
xen_tmem_new_pool(private, TMEM_POOL_PERSIST, PAGE_SIZE);
}
-static bool __initdata use_frontswap = true;
-
+static bool disable_frontswap __read_mostly;
+static bool disable_frontswap_selfshrinking __read_mostly;
+#ifdef CONFIG_XEN_TMEM_MODULE
+module_param(disable_frontswap, bool, S_IRUGO);
+module_param(disable_frontswap_selfshrinking, bool, S_IRUGO);
+#else
static int __init no_frontswap(char *s)
{
- use_frontswap = false;
+ disable_frontswap = true;
return 1;
}
__setup("nofrontswap", no_frontswap);
+#endif
-static struct frontswap_ops __initdata tmem_frontswap_ops = {
+static struct frontswap_ops tmem_frontswap_ops = {
.store = tmem_frontswap_store,
.load = tmem_frontswap_load,
.invalidate_page = tmem_frontswap_flush_page,
.invalidate_area = tmem_frontswap_flush_area,
.init = tmem_frontswap_init
};
+#else /* CONFIG_FRONTSWAP */
+#define disable_frontswap_selfshrinking 1
#endif
-static int __init xen_tmem_init(void)
+static int xen_tmem_init(void)
{
if (!xen_domain())
return 0;
#ifdef CONFIG_FRONTSWAP
- if (tmem_enabled && use_frontswap) {
+ if (tmem_enabled && !disable_frontswap) {
char *s = "";
- struct frontswap_ops old_ops =
+ struct frontswap_ops *old_ops =
frontswap_register_ops(&tmem_frontswap_ops);
tmem_frontswap_poolid = -1;
- if (old_ops.init != NULL)
+ if (IS_ERR(old_ops) || old_ops) {
+ if (IS_ERR(old_ops))
+ return PTR_ERR(old_ops);
s = " (WARNING: frontswap_ops overridden)";
+ }
printk(KERN_INFO "frontswap enabled, RAM provided by "
"Xen Transcendent Memory%s\n", s);
}
#endif
#ifdef CONFIG_CLEANCACHE
BUG_ON(sizeof(struct cleancache_filekey) != sizeof(struct tmem_oid));
- if (tmem_enabled && use_cleancache) {
+ if (tmem_enabled && !disable_cleancache) {
char *s = "";
- struct cleancache_ops old_ops =
+ struct cleancache_ops *old_ops =
cleancache_register_ops(&tmem_cleancache_ops);
- if (old_ops.init_fs != NULL)
+ if (old_ops)
s = " (WARNING: cleancache_ops overridden)";
printk(KERN_INFO "cleancache enabled, RAM provided by "
"Xen Transcendent Memory%s\n", s);
}
#endif
+#ifdef CONFIG_XEN_SELFBALLOONING
+ xen_selfballoon_init(!disable_selfballooning,
+ !disable_frontswap_selfshrinking);
+#endif
return 0;
}
module_init(xen_tmem_init)
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Dan Magenheimer <dan.magenheimer@oracle.com>");
+MODULE_DESCRIPTION("Shim to Xen transcendent memory");
diff --git a/drivers/xen/xen-selfballoon.c b/drivers/xen/xen-selfballoon.c
index 2552d3e0a70f..f2ef569c7cc1 100644
--- a/drivers/xen/xen-selfballoon.c
+++ b/drivers/xen/xen-selfballoon.c
@@ -121,7 +121,7 @@ static DECLARE_DELAYED_WORK(selfballoon_worker, selfballoon_process);
static bool frontswap_selfshrinking __read_mostly;
/* Enable/disable with kernel boot option. */
-static bool use_frontswap_selfshrink __initdata = true;
+static bool use_frontswap_selfshrink = true;
/*
* The default values for the following parameters were deemed reasonable
@@ -185,7 +185,7 @@ static int __init xen_nofrontswap_selfshrink_setup(char *s)
__setup("noselfshrink", xen_nofrontswap_selfshrink_setup);
/* Disable with kernel boot option. */
-static bool use_selfballooning __initdata = true;
+static bool use_selfballooning = true;
static int __init xen_noselfballooning_setup(char *s)
{
@@ -196,7 +196,7 @@ static int __init xen_noselfballooning_setup(char *s)
__setup("noselfballooning", xen_noselfballooning_setup);
#else /* !CONFIG_FRONTSWAP */
/* Enable with kernel boot option. */
-static bool use_selfballooning __initdata = false;
+static bool use_selfballooning;
static int __init xen_selfballooning_setup(char *s)
{
@@ -537,7 +537,7 @@ int register_xen_selfballooning(struct device *dev)
}
EXPORT_SYMBOL(register_xen_selfballooning);
-static int __init xen_selfballoon_init(void)
+int xen_selfballoon_init(bool use_selfballooning, bool use_frontswap_selfshrink)
{
bool enable = false;
@@ -571,7 +571,4 @@ static int __init xen_selfballoon_init(void)
return 0;
}
-
-subsys_initcall(xen_selfballoon_init);
-
-MODULE_LICENSE("GPL");
+EXPORT_SYMBOL(xen_selfballoon_init);
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 0ad61c6a65a5..055562c580b4 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -33,6 +33,7 @@
#include <linux/pagemap.h>
#include <linux/idr.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include <net/9p/9p.h>
#include <net/9p/client.h>
diff --git a/fs/Makefile b/fs/Makefile
index 9d53192236fc..3b2c76759ec9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -10,7 +10,7 @@ obj-y := open.o read_write.o file_table.o super.o \
ioctl.o readdir.o select.o fifo.o dcache.o inode.o \
attr.o bad_inode.o file.o filesystems.o namespace.o \
seq_file.o xattr.o libfs.o fs-writeback.o \
- pnode.o drop_caches.o splice.o sync.o utimes.o \
+ pnode.o splice.o sync.o utimes.o \
stack.o fs_struct.o statfs.o
ifeq ($(CONFIG_BLOCK),y)
@@ -49,6 +49,7 @@ obj-$(CONFIG_FS_POSIX_ACL) += posix_acl.o xattr_acl.o
obj-$(CONFIG_NFS_COMMON) += nfs_common/
obj-$(CONFIG_GENERIC_ACL) += generic_acl.o
obj-$(CONFIG_COREDUMP) += coredump.o
+obj-$(CONFIG_SYSCTL) += drop_caches.o
obj-$(CONFIG_FHANDLE) += fhandle.o
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 7e03eadb40c0..a890db4b9898 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/writeback.h>
#include <linux/pagevec.h>
+#include <linux/aio.h>
#include "internal.h"
static int afs_write_back_from_locked_page(struct afs_writeback *wb,
diff --git a/fs/aio.c b/fs/aio.c
index c3ebb98a527b..1cbbe59af520 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -8,6 +8,8 @@
*
* See ../COPYING for licensing terms.
*/
+#define pr_fmt(fmt) "%s: " fmt, __func__
+
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/errno.h>
@@ -18,14 +20,14 @@
#include <linux/backing-dev.h>
#include <linux/uio.h>
-#define DEBUG 0
-
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/mman.h>
+#include <linux/bio.h>
#include <linux/mmu_context.h>
+#include <linux/percpu.h>
#include <linux/slab.h>
#include <linux/timer.h>
#include <linux/aio.h>
@@ -35,15 +37,110 @@
#include <linux/eventfd.h>
#include <linux/blkdev.h>
#include <linux/compat.h>
+#include <linux/percpu-refcount.h>
#include <asm/kmap_types.h>
#include <asm/uaccess.h>
-#if DEBUG > 1
-#define dprintk printk
-#else
-#define dprintk(x...) do { ; } while (0)
-#endif
+#define AIO_RING_MAGIC 0xa10a10a1
+#define AIO_RING_COMPAT_FEATURES 1
+#define AIO_RING_INCOMPAT_FEATURES 0
+struct aio_ring {
+ unsigned id; /* kernel internal index number */
+ unsigned nr; /* number of io_events */
+ unsigned head;
+ unsigned tail;
+
+ unsigned magic;
+ unsigned compat_features;
+ unsigned incompat_features;
+ unsigned header_length; /* size of aio_ring */
+
+
+ struct io_event io_events[0];
+}; /* 128 bytes + ring size */
+
+#define AIO_RING_PAGES 8
+
+struct kioctx_cpu {
+ unsigned reqs_available;
+};
+
+struct kioctx {
+ struct percpu_ref users;
+
+ /* This needs improving */
+ unsigned long user_id;
+ struct hlist_node list;
+
+ struct __percpu kioctx_cpu *cpu;
+
+ /* Size of ringbuffer, in units of struct io_event */
+ unsigned nr_events;
+
+ /*
+ * Maximum number of outstanding requests:
+ * sys_io_setup currently limits this to an unsigned int
+ */
+ unsigned max_reqs;
+
+ /*
+ * For percpu reqs_available, number of slots we move to/from global
+ * counter at a time:
+ */
+ unsigned req_batch;
+
+ unsigned long mmap_base;
+ unsigned long mmap_size;
+
+ struct page **ring_pages;
+ long nr_pages;
+
+ struct rcu_head rcu_head;
+ struct work_struct rcu_work;
+
+ struct {
+ /*
+ * This counts the number of available slots in the ringbuffer,
+ * so we avoid overflowing it: it's decremented (if positive)
+ * when allocating a kiocb and incremented when the resulting
+ * io_event is pulled off the ringbuffer.
+ *
+ * We batch accesses to it with a percpu version.
+ */
+ atomic_t reqs_available;
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ spinlock_t ctx_lock;
+ struct list_head active_reqs; /* used for cancellation */
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ struct mutex ring_lock;
+ wait_queue_head_t wait;
+
+ /*
+ * Copy of the real tail, that aio_complete uses - to reduce
+ * cacheline bouncing. The real tail will tend to be much more
+ * contended - since typically events are delivered one at a
+ * time, and then aio_read_events() slurps them up a bunch at a
+ * time - so it's helpful if aio_read_events() isn't also
+ * contending for the tail. So, aio_complete() updates
+ * shadow_tail whenever it updates tail.
+ *
+ * Also needed because tail is used as a hacky lock and isn't
+ * always the real tail.
+ */
+ unsigned shadow_tail;
+ } ____cacheline_aligned_in_smp;
+
+ struct {
+ unsigned tail;
+ } ____cacheline_aligned_in_smp;
+
+ struct page *internal_pages[AIO_RING_PAGES];
+};
/*------ sysctl variables----*/
static DEFINE_SPINLOCK(aio_nr_lock);
@@ -54,11 +151,6 @@ unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio request
static struct kmem_cache *kiocb_cachep;
static struct kmem_cache *kioctx_cachep;
-static struct workqueue_struct *aio_wq;
-
-static void aio_kick_handler(struct work_struct *);
-static void aio_queue_work(struct kioctx *);
-
/* aio_setup
* Creates the slab caches used by the aio routines, panic on
* failure as this is done early during the boot sequence.
@@ -68,10 +160,7 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
- aio_wq = alloc_workqueue("aio", 0, 1); /* used to limit concurrency */
- BUG_ON(!aio_wq);
-
- pr_debug("aio_setup: sizeof(struct page) = %d\n", (int)sizeof(struct page));
+ pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
return 0;
}
@@ -79,28 +168,23 @@ __initcall(aio_setup);
static void aio_free_ring(struct kioctx *ctx)
{
- struct aio_ring_info *info = &ctx->ring_info;
long i;
- for (i=0; i<info->nr_pages; i++)
- put_page(info->ring_pages[i]);
+ for (i = 0; i < ctx->nr_pages; i++)
+ put_page(ctx->ring_pages[i]);
- if (info->mmap_size) {
- BUG_ON(ctx->mm != current->mm);
- vm_munmap(info->mmap_base, info->mmap_size);
- }
+ if (ctx->mmap_size)
+ vm_munmap(ctx->mmap_base, ctx->mmap_size);
- if (info->ring_pages && info->ring_pages != info->internal_pages)
- kfree(info->ring_pages);
- info->ring_pages = NULL;
- info->nr = 0;
+ if (ctx->ring_pages && ctx->ring_pages != ctx->internal_pages)
+ kfree(ctx->ring_pages);
}
static int aio_setup_ring(struct kioctx *ctx)
{
struct aio_ring *ring;
- struct aio_ring_info *info = &ctx->ring_info;
unsigned nr_events = ctx->max_reqs;
+ struct mm_struct *mm = current->mm;
unsigned long size, populate;
int nr_pages;
@@ -116,46 +200,44 @@ static int aio_setup_ring(struct kioctx *ctx)
nr_events = (PAGE_SIZE * nr_pages - sizeof(struct aio_ring)) / sizeof(struct io_event);
- info->nr = 0;
- info->ring_pages = info->internal_pages;
+ ctx->nr_events = 0;
+ ctx->ring_pages = ctx->internal_pages;
if (nr_pages > AIO_RING_PAGES) {
- info->ring_pages = kcalloc(nr_pages, sizeof(struct page *), GFP_KERNEL);
- if (!info->ring_pages)
+ ctx->ring_pages = kcalloc(nr_pages, sizeof(struct page *),
+ GFP_KERNEL);
+ if (!ctx->ring_pages)
return -ENOMEM;
}
- info->mmap_size = nr_pages * PAGE_SIZE;
- dprintk("attempting mmap of %lu bytes\n", info->mmap_size);
- down_write(&ctx->mm->mmap_sem);
- info->mmap_base = do_mmap_pgoff(NULL, 0, info->mmap_size,
- PROT_READ|PROT_WRITE,
- MAP_ANONYMOUS|MAP_PRIVATE, 0,
- &populate);
- if (IS_ERR((void *)info->mmap_base)) {
- up_write(&ctx->mm->mmap_sem);
- info->mmap_size = 0;
+ ctx->mmap_size = nr_pages * PAGE_SIZE;
+ pr_debug("attempting mmap of %lu bytes\n", ctx->mmap_size);
+ down_write(&mm->mmap_sem);
+ ctx->mmap_base = do_mmap_pgoff(NULL, 0, ctx->mmap_size,
+ PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, 0, &populate);
+ if (IS_ERR((void *)ctx->mmap_base)) {
+ up_write(&mm->mmap_sem);
+ ctx->mmap_size = 0;
aio_free_ring(ctx);
return -EAGAIN;
}
- dprintk("mmap address: 0x%08lx\n", info->mmap_base);
- info->nr_pages = get_user_pages(current, ctx->mm,
- info->mmap_base, nr_pages,
- 1, 0, info->ring_pages, NULL);
- up_write(&ctx->mm->mmap_sem);
+ pr_debug("mmap address: 0x%08lx\n", ctx->mmap_base);
+ ctx->nr_pages = get_user_pages(current, mm, ctx->mmap_base, nr_pages,
+ 1, 0, ctx->ring_pages, NULL);
+ up_write(&mm->mmap_sem);
- if (unlikely(info->nr_pages != nr_pages)) {
+ if (unlikely(ctx->nr_pages != nr_pages)) {
aio_free_ring(ctx);
return -EAGAIN;
}
if (populate)
- mm_populate(info->mmap_base, populate);
+ mm_populate(ctx->mmap_base, populate);
- ctx->user_id = info->mmap_base;
+ ctx->user_id = ctx->mmap_base;
+ ctx->nr_events = nr_events; /* trusted copy */
- info->nr = nr_events; /* trusted copy */
-
- ring = kmap_atomic(info->ring_pages[0]);
+ ring = kmap_atomic(ctx->ring_pages[0]);
ring->nr = nr_events; /* user copy */
ring->id = ctx->user_id;
ring->head = ring->tail = 0;
@@ -164,72 +246,145 @@ static int aio_setup_ring(struct kioctx *ctx)
ring->incompat_features = AIO_RING_INCOMPAT_FEATURES;
ring->header_length = sizeof(struct aio_ring);
kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
return 0;
}
-
-/* aio_ring_event: returns a pointer to the event at the given index from
- * kmap_atomic(). Release the pointer with put_aio_ring_event();
- */
#define AIO_EVENTS_PER_PAGE (PAGE_SIZE / sizeof(struct io_event))
#define AIO_EVENTS_FIRST_PAGE ((PAGE_SIZE - sizeof(struct aio_ring)) / sizeof(struct io_event))
#define AIO_EVENTS_OFFSET (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
-#define aio_ring_event(info, nr) ({ \
- unsigned pos = (nr) + AIO_EVENTS_OFFSET; \
- struct io_event *__event; \
- __event = kmap_atomic( \
- (info)->ring_pages[pos / AIO_EVENTS_PER_PAGE]); \
- __event += pos % AIO_EVENTS_PER_PAGE; \
- __event; \
-})
-
-#define put_aio_ring_event(event) do { \
- struct io_event *__event = (event); \
- (void)__event; \
- kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
-} while(0)
-
-static void ctx_rcu_free(struct rcu_head *head)
+void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+{
+ struct kioctx *ctx = req->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+
+ if (!req->ki_list.next)
+ list_add(&req->ki_list, &ctx->active_reqs);
+
+ req->ki_cancel = cancel;
+
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+EXPORT_SYMBOL(kiocb_set_cancel_fn);
+
+static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
+ struct io_event *res)
+{
+ kiocb_cancel_fn *old, *cancel;
+ int ret = -EINVAL;
+
+ /*
+ * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
+ * actually has a cancel function, hence the cmpxchg()
+ */
+
+ cancel = ACCESS_ONCE(kiocb->ki_cancel);
+ do {
+ if (!cancel || cancel == KIOCB_CANCELLED)
+ return ret;
+
+ old = cancel;
+ cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
+ } while (cancel != old);
+
+ atomic_inc(&kiocb->ki_users);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ memset(res, 0, sizeof(*res));
+ res->obj = (u64)(unsigned long)kiocb->ki_obj.user;
+ res->data = kiocb->ki_user_data;
+ ret = cancel(kiocb, res);
+
+ spin_lock_irq(&ctx->ctx_lock);
+
+ return ret;
+}
+
+static void free_ioctx_rcu(struct rcu_head *head)
{
struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+
+ free_percpu(ctx->cpu);
kmem_cache_free(kioctx_cachep, ctx);
}
-/* __put_ioctx
- * Called when the last user of an aio context has gone away,
- * and the struct needs to be freed.
+/*
+ * When this function runs, the kioctx has been removed from the "hash table"
+ * and ctx->users has dropped to 0, so we know no more kiocbs can be submitted -
+ * now it's safe to cancel any that need to be.
*/
-static void __put_ioctx(struct kioctx *ctx)
+static void free_ioctx(struct kioctx *ctx)
{
- unsigned nr_events = ctx->max_reqs;
- BUG_ON(ctx->reqs_active);
+ struct aio_ring *ring;
+ struct io_event res;
+ struct kiocb *req;
+ unsigned cpu, head, avail;
- cancel_delayed_work_sync(&ctx->wq);
- aio_free_ring(ctx);
- mmdrop(ctx->mm);
- ctx->mm = NULL;
- if (nr_events) {
- spin_lock(&aio_nr_lock);
- BUG_ON(aio_nr - nr_events > aio_nr);
- aio_nr -= nr_events;
- spin_unlock(&aio_nr_lock);
+ spin_lock_irq(&ctx->ctx_lock);
+
+ while (!list_empty(&ctx->active_reqs)) {
+ req = list_first_entry(&ctx->active_reqs,
+ struct kiocb, ki_list);
+
+ list_del_init(&req->ki_list);
+ kiocb_cancel(ctx, req, &res);
}
- pr_debug("__put_ioctx: freeing %p\n", ctx);
- call_rcu(&ctx->rcu_head, ctx_rcu_free);
-}
-static inline int try_get_ioctx(struct kioctx *kioctx)
-{
- return atomic_inc_not_zero(&kioctx->users);
+ spin_unlock_irq(&ctx->ctx_lock);
+
+ for_each_possible_cpu(cpu) {
+ struct kioctx_cpu *kcpu = per_cpu_ptr(ctx->cpu, cpu);
+
+ atomic_add(kcpu->reqs_available, &ctx->reqs_available);
+ kcpu->reqs_available = 0;
+ }
+
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
+ kunmap_atomic(ring);
+
+ while (atomic_read(&ctx->reqs_available) < ctx->max_reqs) {
+ wait_event(ctx->wait,
+ (head != ctx->shadow_tail) ||
+ (atomic_read(&ctx->reqs_available) >= ctx->max_reqs));
+
+ avail = (head <= ctx->shadow_tail ?
+ ctx->shadow_tail : ctx->nr_events) - head;
+
+ atomic_add(avail, &ctx->reqs_available);
+ head += avail;
+ head %= ctx->nr_events;
+ }
+
+ WARN_ON(atomic_read(&ctx->reqs_available) > ctx->max_reqs);
+
+ aio_free_ring(ctx);
+
+ spin_lock(&aio_nr_lock);
+ BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
+ aio_nr -= ctx->max_reqs;
+ spin_unlock(&aio_nr_lock);
+
+ pr_debug("freeing %p\n", ctx);
+
+ /*
+ * Here the call_rcu() is between the wait_event() for reqs_active to
+ * hit 0, and freeing the ioctx.
+ *
+ * aio_complete() decrements reqs_active, but it has to touch the ioctx
+ * after to issue a wakeup so we use rcu.
+ */
+ call_rcu(&ctx->rcu_head, free_ioctx_rcu);
}
-static inline void put_ioctx(struct kioctx *kioctx)
+static void put_ioctx(struct kioctx *ctx)
{
- BUG_ON(atomic_read(&kioctx->users) <= 0);
- if (unlikely(atomic_dec_and_test(&kioctx->users)))
- __put_ioctx(kioctx);
+ if (percpu_ref_put(&ctx->users))
+ free_ioctx(ctx);
}
/* ioctx_alloc
@@ -237,10 +392,22 @@ static inline void put_ioctx(struct kioctx *kioctx)
*/
static struct kioctx *ioctx_alloc(unsigned nr_events)
{
- struct mm_struct *mm;
+ struct mm_struct *mm = current->mm;
struct kioctx *ctx;
int err = -ENOMEM;
+ /*
+ * We keep track of the number of available ringbuffer slots, to prevent
+ * overflow (reqs_available), and we also use percpu counters for this.
+ *
+ * So since up to half the slots might be on other cpu's percpu counters
+ * and unavailable, double nr_events so userspace sees what they
+ * expected: additionally, we move req_batch slots to/from percpu
+ * counters at a time, so make sure that isn't 0:
+ */
+ nr_events = max(nr_events, num_possible_cpus() * 4);
+ nr_events *= 2;
+
/* Prevent overflows */
if ((nr_events > (0x10000000U / sizeof(struct io_event))) ||
(nr_events > (0x10000000U / sizeof(struct kiocb)))) {
@@ -256,21 +423,27 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
return ERR_PTR(-ENOMEM);
ctx->max_reqs = nr_events;
- mm = ctx->mm = current->mm;
- atomic_inc(&mm->mm_count);
+ atomic_set(&ctx->reqs_available, nr_events);
+ ctx->req_batch = nr_events / (num_possible_cpus() * 4);
+
+ percpu_ref_init(&ctx->users);
+ rcu_read_lock();
+ percpu_ref_get(&ctx->users);
+ rcu_read_unlock();
- atomic_set(&ctx->users, 2);
spin_lock_init(&ctx->ctx_lock);
- spin_lock_init(&ctx->ring_info.ring_lock);
+ mutex_init(&ctx->ring_lock);
init_waitqueue_head(&ctx->wait);
INIT_LIST_HEAD(&ctx->active_reqs);
- INIT_LIST_HEAD(&ctx->run_list);
- INIT_DELAYED_WORK(&ctx->wq, aio_kick_handler);
- if (aio_setup_ring(ctx) < 0)
+ ctx->cpu = alloc_percpu(struct kioctx_cpu);
+ if (!ctx->cpu)
goto out_freectx;
+ if (aio_setup_ring(ctx) < 0)
+ goto out_freepcpu;
+
/* limit the number of system wide aios */
spin_lock(&aio_nr_lock);
if (aio_nr + nr_events > aio_max_nr ||
@@ -286,64 +459,58 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
hlist_add_head_rcu(&ctx->list, &mm->ioctx_list);
spin_unlock(&mm->ioctx_lock);
- dprintk("aio: allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
- ctx, ctx->user_id, current->mm, ctx->ring_info.nr);
+ pr_debug("allocated ioctx %p[%ld]: mm=%p mask=0x%x\n",
+ ctx, ctx->user_id, mm, ctx->nr_events);
return ctx;
out_cleanup:
err = -EAGAIN;
aio_free_ring(ctx);
+out_freepcpu:
+ free_percpu(ctx->cpu);
out_freectx:
- mmdrop(mm);
kmem_cache_free(kioctx_cachep, ctx);
- dprintk("aio: error allocating ioctx %d\n", err);
+ pr_debug("error allocating ioctx %d\n", err);
return ERR_PTR(err);
}
-/* kill_ctx
- * Cancels all outstanding aio requests on an aio context. Used
- * when the processes owning a context have all exited to encourage
- * the rapid destruction of the kioctx.
- */
-static void kill_ctx(struct kioctx *ctx)
+static void kill_ioctx_work(struct work_struct *work)
{
- int (*cancel)(struct kiocb *, struct io_event *);
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- struct io_event res;
+ struct kioctx *ctx = container_of(work, struct kioctx, rcu_work);
- spin_lock_irq(&ctx->ctx_lock);
- ctx->dead = 1;
- while (!list_empty(&ctx->active_reqs)) {
- struct list_head *pos = ctx->active_reqs.next;
- struct kiocb *iocb = list_kiocb(pos);
- list_del_init(&iocb->ki_list);
- cancel = iocb->ki_cancel;
- kiocbSetCancelled(iocb);
- if (cancel) {
- iocb->ki_users++;
- spin_unlock_irq(&ctx->ctx_lock);
- cancel(iocb, &res);
- spin_lock_irq(&ctx->ctx_lock);
- }
- }
+ wake_up_all(&ctx->wait);
+ put_ioctx(ctx);
+}
- if (!ctx->reqs_active)
- goto out;
+static void kill_ioctx_rcu(struct rcu_head *head)
+{
+ struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
- add_wait_queue(&ctx->wait, &wait);
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- while (ctx->reqs_active) {
- spin_unlock_irq(&ctx->ctx_lock);
- io_schedule();
- set_task_state(tsk, TASK_UNINTERRUPTIBLE);
- spin_lock_irq(&ctx->ctx_lock);
- }
- __set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(&ctx->wait, &wait);
+ INIT_WORK(&ctx->rcu_work, kill_ioctx_work);
+ schedule_work(&ctx->rcu_work);
+}
-out:
- spin_unlock_irq(&ctx->ctx_lock);
+/* kill_ioctx
+ * Cancels all outstanding aio requests on an aio context. Used
+ * when the processes owning a context have all exited to encourage
+ * the rapid destruction of the kioctx.
+ */
+static void kill_ioctx(struct kioctx *ctx)
+{
+ if (percpu_ref_kill(&ctx->users)) {
+ hlist_del_rcu(&ctx->list);
+ /* Between hlist_del_rcu() and dropping the initial ref */
+ synchronize_rcu();
+
+ /*
+ * We can't punt to workqueue here because put_ioctx() ->
+ * free_ioctx() will unmap the ringbuffer, and that has to be
+ * done in the original process's context. kill_ioctx_rcu/work()
+ * exist for exit_aio(), as in that path free_ioctx() won't do
+ * the unmap.
+ */
+ kill_ioctx_work(&ctx->rcu_work);
+ }
}
/* wait_on_sync_kiocb:
@@ -351,9 +518,9 @@ out:
*/
ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
{
- while (iocb->ki_users) {
+ while (atomic_read(&iocb->ki_users)) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!iocb->ki_users)
+ if (!atomic_read(&iocb->ki_users))
break;
io_schedule();
}
@@ -362,28 +529,20 @@ ssize_t wait_on_sync_kiocb(struct kiocb *iocb)
}
EXPORT_SYMBOL(wait_on_sync_kiocb);
-/* exit_aio: called when the last user of mm goes away. At this point,
- * there is no way for any new requests to be submited or any of the
- * io_* syscalls to be called on the context. However, there may be
- * outstanding requests which hold references to the context; as they
- * go away, they will call put_ioctx and release any pinned memory
- * associated with the request (held via struct page * references).
+/*
+ * exit_aio: called when the last user of mm goes away. At this point, there is
+ * no way for any new requests to be submited or any of the io_* syscalls to be
+ * called on the context.
+ *
+ * There may be outstanding kiocbs, but free_ioctx() will explicitly wait on
+ * them.
*/
void exit_aio(struct mm_struct *mm)
{
struct kioctx *ctx;
+ struct hlist_node *n;
- while (!hlist_empty(&mm->ioctx_list)) {
- ctx = hlist_entry(mm->ioctx_list.first, struct kioctx, list);
- hlist_del_rcu(&ctx->list);
-
- kill_ctx(ctx);
-
- if (1 != atomic_read(&ctx->users))
- printk(KERN_DEBUG
- "exit_aio:ioctx still alive: %d %d %d\n",
- atomic_read(&ctx->users), ctx->dead,
- ctx->reqs_active);
+ hlist_for_each_entry_safe(ctx, n, &mm->ioctx_list, list) {
/*
* We don't need to bother with munmap() here -
* exit_mmap(mm) is coming and it'll unmap everything.
@@ -391,150 +550,95 @@ void exit_aio(struct mm_struct *mm)
* as indicator that it needs to unmap the area,
* just set it to 0; aio_free_ring() is the only
* place that uses ->mmap_size, so it's safe.
- * That way we get all munmap done to current->mm -
- * all other callers have ctx->mm == current->mm.
*/
- ctx->ring_info.mmap_size = 0;
- put_ioctx(ctx);
+ ctx->mmap_size = 0;
+
+ if (percpu_ref_kill(&ctx->users)) {
+ hlist_del_rcu(&ctx->list);
+ call_rcu(&ctx->rcu_head, kill_ioctx_rcu);
+ }
}
}
-/* aio_get_req
- * Allocate a slot for an aio request. Increments the users count
- * of the kioctx so that the kioctx stays around until all requests are
- * complete. Returns NULL if no requests are free.
- *
- * Returns with kiocb->users set to 2. The io submit code path holds
- * an extra reference while submitting the i/o.
- * This prevents races between the aio code path referencing the
- * req (after submitting it) and aio_complete() freeing the req.
- */
-static struct kiocb *__aio_get_req(struct kioctx *ctx)
+static void put_reqs_available(struct kioctx *ctx, unsigned nr)
{
- struct kiocb *req = NULL;
+ struct kioctx_cpu *kcpu;
- req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
- if (unlikely(!req))
- return NULL;
-
- req->ki_flags = 0;
- req->ki_users = 2;
- req->ki_key = 0;
- req->ki_ctx = ctx;
- req->ki_cancel = NULL;
- req->ki_retry = NULL;
- req->ki_dtor = NULL;
- req->private = NULL;
- req->ki_iovec = NULL;
- INIT_LIST_HEAD(&req->ki_run_list);
- req->ki_eventfd = NULL;
+ preempt_disable();
+ kcpu = this_cpu_ptr(ctx->cpu);
- return req;
-}
-
-/*
- * struct kiocb's are allocated in batches to reduce the number of
- * times the ctx lock is acquired and released.
- */
-#define KIOCB_BATCH_SIZE 32L
-struct kiocb_batch {
- struct list_head head;
- long count; /* number of requests left to allocate */
-};
+ kcpu->reqs_available += nr;
+ while (kcpu->reqs_available >= ctx->req_batch * 2) {
+ kcpu->reqs_available -= ctx->req_batch;
+ atomic_add(ctx->req_batch, &ctx->reqs_available);
+ }
-static void kiocb_batch_init(struct kiocb_batch *batch, long total)
-{
- INIT_LIST_HEAD(&batch->head);
- batch->count = total;
+ preempt_enable();
}
-static void kiocb_batch_free(struct kioctx *ctx, struct kiocb_batch *batch)
+static bool get_reqs_available(struct kioctx *ctx)
{
- struct kiocb *req, *n;
-
- if (list_empty(&batch->head))
- return;
-
- spin_lock_irq(&ctx->ctx_lock);
- list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
- list_del(&req->ki_batch);
- list_del(&req->ki_list);
- kmem_cache_free(kiocb_cachep, req);
- ctx->reqs_active--;
- }
- if (unlikely(!ctx->reqs_active && ctx->dead))
- wake_up_all(&ctx->wait);
- spin_unlock_irq(&ctx->ctx_lock);
-}
+ struct kioctx_cpu *kcpu;
+ bool ret = false;
-/*
- * Allocate a batch of kiocbs. This avoids taking and dropping the
- * context lock a lot during setup.
- */
-static int kiocb_batch_refill(struct kioctx *ctx, struct kiocb_batch *batch)
-{
- unsigned short allocated, to_alloc;
- long avail;
- struct kiocb *req, *n;
- struct aio_ring *ring;
+ preempt_disable();
+ kcpu = this_cpu_ptr(ctx->cpu);
- to_alloc = min(batch->count, KIOCB_BATCH_SIZE);
- for (allocated = 0; allocated < to_alloc; allocated++) {
- req = __aio_get_req(ctx);
- if (!req)
- /* allocation failed, go with what we've got */
- break;
- list_add(&req->ki_batch, &batch->head);
- }
+ if (!kcpu->reqs_available) {
+ int old, avail = atomic_read(&ctx->reqs_available);
- if (allocated == 0)
- goto out;
+ do {
+ if (avail < ctx->req_batch)
+ goto out;
- spin_lock_irq(&ctx->ctx_lock);
- ring = kmap_atomic(ctx->ring_info.ring_pages[0]);
-
- avail = aio_ring_avail(&ctx->ring_info, ring) - ctx->reqs_active;
- BUG_ON(avail < 0);
- if (avail < allocated) {
- /* Trim back the number of requests. */
- list_for_each_entry_safe(req, n, &batch->head, ki_batch) {
- list_del(&req->ki_batch);
- kmem_cache_free(kiocb_cachep, req);
- if (--allocated <= avail)
- break;
- }
- }
+ old = avail;
+ avail = atomic_cmpxchg(&ctx->reqs_available,
+ avail, avail - ctx->req_batch);
+ } while (avail != old);
- batch->count -= allocated;
- list_for_each_entry(req, &batch->head, ki_batch) {
- list_add(&req->ki_list, &ctx->active_reqs);
- ctx->reqs_active++;
+ kcpu->reqs_available += ctx->req_batch;
}
- kunmap_atomic(ring);
- spin_unlock_irq(&ctx->ctx_lock);
-
+ ret = true;
+ kcpu->reqs_available--;
out:
- return allocated;
+ preempt_enable();
+ return ret;
}
-static inline struct kiocb *aio_get_req(struct kioctx *ctx,
- struct kiocb_batch *batch)
+/* aio_get_req
+ * Allocate a slot for an aio request. Increments the ki_users count
+ * of the kioctx so that the kioctx stays around until all requests are
+ * complete. Returns NULL if no requests are free.
+ *
+ * Returns with kiocb->ki_users set to 2. The io submit code path holds
+ * an extra reference while submitting the i/o.
+ * This prevents races between the aio code path referencing the
+ * req (after submitting it) and aio_complete() freeing the req.
+ */
+static inline struct kiocb *aio_get_req(struct kioctx *ctx)
{
struct kiocb *req;
- if (list_empty(&batch->head))
- if (kiocb_batch_refill(ctx, batch) == 0)
- return NULL;
- req = list_first_entry(&batch->head, struct kiocb, ki_batch);
- list_del(&req->ki_batch);
+ if (!get_reqs_available(ctx))
+ return NULL;
+
+ req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
+ if (unlikely(!req))
+ goto out_put;
+
+ atomic_set(&req->ki_users, 2);
+ req->ki_ctx = ctx;
return req;
+out_put:
+ put_reqs_available(ctx, 1);
+ return NULL;
}
-static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
+static void kiocb_free(struct kiocb *req)
{
- assert_spin_locked(&ctx->ctx_lock);
-
+ if (req->ki_filp)
+ fput(req->ki_filp);
if (req->ki_eventfd != NULL)
eventfd_ctx_put(req->ki_eventfd);
if (req->ki_dtor)
@@ -542,48 +646,12 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
if (req->ki_iovec != &req->ki_inline_vec)
kfree(req->ki_iovec);
kmem_cache_free(kiocb_cachep, req);
- ctx->reqs_active--;
-
- if (unlikely(!ctx->reqs_active && ctx->dead))
- wake_up_all(&ctx->wait);
}
-/* __aio_put_req
- * Returns true if this put was the last user of the request.
- */
-static int __aio_put_req(struct kioctx *ctx, struct kiocb *req)
+void aio_put_req(struct kiocb *req)
{
- dprintk(KERN_DEBUG "aio_put(%p): f_count=%ld\n",
- req, atomic_long_read(&req->ki_filp->f_count));
-
- assert_spin_locked(&ctx->ctx_lock);
-
- req->ki_users--;
- BUG_ON(req->ki_users < 0);
- if (likely(req->ki_users))
- return 0;
- list_del(&req->ki_list); /* remove from active_reqs */
- req->ki_cancel = NULL;
- req->ki_retry = NULL;
-
- fput(req->ki_filp);
- req->ki_filp = NULL;
- really_put_req(ctx, req);
- return 1;
-}
-
-/* aio_put_req
- * Returns true if this put was the last user of the kiocb,
- * false if the request is still in use.
- */
-int aio_put_req(struct kiocb *req)
-{
- struct kioctx *ctx = req->ki_ctx;
- int ret;
- spin_lock_irq(&ctx->ctx_lock);
- ret = __aio_put_req(ctx, req);
- spin_unlock_irq(&ctx->ctx_lock);
- return ret;
+ if (atomic_dec_and_test(&req->ki_users))
+ kiocb_free(req);
}
EXPORT_SYMBOL(aio_put_req);
@@ -595,13 +663,8 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
rcu_read_lock();
hlist_for_each_entry_rcu(ctx, &mm->ioctx_list, list) {
- /*
- * RCU protects us against accessing freed memory but
- * we have to be careful not to get a reference when the
- * reference count already dropped to 0 (ctx->dead test
- * is unreliable because of races).
- */
- if (ctx->user_id == ctx_id && !ctx->dead && try_get_ioctx(ctx)){
+ if (ctx->user_id == ctx_id){
+ percpu_ref_get(&ctx->users);
ret = ctx;
break;
}
@@ -611,610 +674,330 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
}
-/*
- * Queue up a kiocb to be retried. Assumes that the kiocb
- * has already been marked as kicked, and places it on
- * the retry run list for the corresponding ioctx, if it
- * isn't already queued. Returns 1 if it actually queued
- * the kiocb (to tell the caller to activate the work
- * queue to process it), or 0, if it found that it was
- * already queued.
- */
-static inline int __queue_kicked_iocb(struct kiocb *iocb)
+static inline unsigned kioctx_ring_put(struct kioctx *ctx, struct kiocb *req,
+ unsigned tail)
{
- struct kioctx *ctx = iocb->ki_ctx;
+ struct io_event *ev_page, *event;
+ unsigned pos = tail + AIO_EVENTS_OFFSET;
- assert_spin_locked(&ctx->ctx_lock);
-
- if (list_empty(&iocb->ki_run_list)) {
- list_add_tail(&iocb->ki_run_list,
- &ctx->run_list);
- return 1;
- }
- return 0;
-}
+ if (++tail >= ctx->nr_events)
+ tail = 0;
-/* aio_run_iocb
- * This is the core aio execution routine. It is
- * invoked both for initial i/o submission and
- * subsequent retries via the aio_kick_handler.
- * Expects to be invoked with iocb->ki_ctx->lock
- * already held. The lock is released and reacquired
- * as needed during processing.
- *
- * Calls the iocb retry method (already setup for the
- * iocb on initial submission) for operation specific
- * handling, but takes care of most of common retry
- * execution details for a given iocb. The retry method
- * needs to be non-blocking as far as possible, to avoid
- * holding up other iocbs waiting to be serviced by the
- * retry kernel thread.
- *
- * The trickier parts in this code have to do with
- * ensuring that only one retry instance is in progress
- * for a given iocb at any time. Providing that guarantee
- * simplifies the coding of individual aio operations as
- * it avoids various potential races.
- */
-static ssize_t aio_run_iocb(struct kiocb *iocb)
-{
- struct kioctx *ctx = iocb->ki_ctx;
- ssize_t (*retry)(struct kiocb *);
- ssize_t ret;
+ ev_page = kmap_atomic(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
+ event = ev_page + pos % AIO_EVENTS_PER_PAGE;
- if (!(retry = iocb->ki_retry)) {
- printk("aio_run_iocb: iocb->ki_retry = NULL\n");
- return 0;
- }
+ event->obj = (u64)(unsigned long)req->ki_obj.user;
+ event->data = req->ki_user_data;
+ event->res = req->ki_res;
+ event->res2 = req->ki_res2;
- /*
- * We don't want the next retry iteration for this
- * operation to start until this one has returned and
- * updated the iocb state. However, wait_queue functions
- * can trigger a kick_iocb from interrupt context in the
- * meantime, indicating that data is available for the next
- * iteration. We want to remember that and enable the
- * next retry iteration _after_ we are through with
- * this one.
- *
- * So, in order to be able to register a "kick", but
- * prevent it from being queued now, we clear the kick
- * flag, but make the kick code *think* that the iocb is
- * still on the run list until we are actually done.
- * When we are done with this iteration, we check if
- * the iocb was kicked in the meantime and if so, queue
- * it up afresh.
- */
+ kunmap_atomic(ev_page);
+ flush_dcache_page(ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE]);
- kiocbClearKicked(iocb);
+ pr_debug("%p[%u]: %p: %p %Lx %lx %lx\n",
+ ctx, tail, req, req->ki_obj.user, req->ki_user_data,
+ req->ki_res, req->ki_res2);
- /*
- * This is so that aio_complete knows it doesn't need to
- * pull the iocb off the run list (We can't just call
- * INIT_LIST_HEAD because we don't want a kick_iocb to
- * queue this on the run list yet)
- */
- iocb->ki_run_list.next = iocb->ki_run_list.prev = NULL;
- spin_unlock_irq(&ctx->ctx_lock);
+ return tail;
+}
- /* Quit retrying if the i/o has been cancelled */
- if (kiocbIsCancelled(iocb)) {
- ret = -EINTR;
- aio_complete(iocb, ret, 0);
- /* must not access the iocb after this */
- goto out;
- }
+static inline unsigned kioctx_ring_lock(struct kioctx *ctx)
+{
+ unsigned tail;
/*
- * Now we are all set to call the retry method in async
- * context.
+ * ctx->tail is both our lock and the canonical version of the tail
+ * pointer.
*/
- ret = retry(iocb);
-
- if (ret != -EIOCBRETRY && ret != -EIOCBQUEUED) {
- /*
- * There's no easy way to restart the syscall since other AIO's
- * may be already running. Just fail this IO with EINTR.
- */
- if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
- ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
- ret = -EINTR;
- aio_complete(iocb, ret, 0);
- }
-out:
- spin_lock_irq(&ctx->ctx_lock);
+ while ((tail = xchg(&ctx->tail, UINT_MAX)) == UINT_MAX)
+ cpu_relax();
- if (-EIOCBRETRY == ret) {
- /*
- * OK, now that we are done with this iteration
- * and know that there is more left to go,
- * this is where we let go so that a subsequent
- * "kick" can start the next iteration
- */
-
- /* will make __queue_kicked_iocb succeed from here on */
- INIT_LIST_HEAD(&iocb->ki_run_list);
- /* we must queue the next iteration ourselves, if it
- * has already been kicked */
- if (kiocbIsKicked(iocb)) {
- __queue_kicked_iocb(iocb);
-
- /*
- * __queue_kicked_iocb will always return 1 here, because
- * iocb->ki_run_list is empty at this point so it should
- * be safe to unconditionally queue the context into the
- * work queue.
- */
- aio_queue_work(ctx);
- }
- }
- return ret;
+ return tail;
}
-/*
- * __aio_run_iocbs:
- * Process all pending retries queued on the ioctx
- * run list.
- * Assumes it is operating within the aio issuer's mm
- * context.
- */
-static int __aio_run_iocbs(struct kioctx *ctx)
+static inline void kioctx_ring_unlock(struct kioctx *ctx, unsigned tail)
{
- struct kiocb *iocb;
- struct list_head run_list;
+ struct aio_ring *ring;
- assert_spin_locked(&ctx->ctx_lock);
+ if (!ctx)
+ return;
- list_replace_init(&ctx->run_list, &run_list);
- while (!list_empty(&run_list)) {
- iocb = list_entry(run_list.next, struct kiocb,
- ki_run_list);
- list_del(&iocb->ki_run_list);
- /*
- * Hold an extra reference while retrying i/o.
- */
- iocb->ki_users++; /* grab extra reference */
- aio_run_iocb(iocb);
- __aio_put_req(ctx, iocb);
- }
- if (!list_empty(&ctx->run_list))
- return 1;
- return 0;
-}
+ smp_wmb();
+ /* make event visible before updating tail */
-static void aio_queue_work(struct kioctx * ctx)
-{
- unsigned long timeout;
- /*
- * if someone is waiting, get the work started right
- * away, otherwise, use a longer delay
- */
- smp_mb();
- if (waitqueue_active(&ctx->wait))
- timeout = 1;
- else
- timeout = HZ/10;
- queue_delayed_work(aio_wq, &ctx->wq, timeout);
-}
+ ctx->shadow_tail = tail;
-/*
- * aio_run_all_iocbs:
- * Process all pending retries queued on the ioctx
- * run list, and keep running them until the list
- * stays empty.
- * Assumes it is operating within the aio issuer's mm context.
- */
-static inline void aio_run_all_iocbs(struct kioctx *ctx)
-{
- spin_lock_irq(&ctx->ctx_lock);
- while (__aio_run_iocbs(ctx))
- ;
- spin_unlock_irq(&ctx->ctx_lock);
-}
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ ring->tail = tail;
+ kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
-/*
- * aio_kick_handler:
- * Work queue handler triggered to process pending
- * retries on an ioctx. Takes on the aio issuer's
- * mm context before running the iocbs, so that
- * copy_xxx_user operates on the issuer's address
- * space.
- * Run on aiod's context.
- */
-static void aio_kick_handler(struct work_struct *work)
-{
- struct kioctx *ctx = container_of(work, struct kioctx, wq.work);
- mm_segment_t oldfs = get_fs();
- struct mm_struct *mm;
- int requeue;
+ /* unlock, make new tail visible before checking waitlist */
+ smp_mb();
- set_fs(USER_DS);
- use_mm(ctx->mm);
- spin_lock_irq(&ctx->ctx_lock);
- requeue =__aio_run_iocbs(ctx);
- mm = ctx->mm;
- spin_unlock_irq(&ctx->ctx_lock);
- unuse_mm(mm);
- set_fs(oldfs);
- /*
- * we're in a worker thread already; no point using non-zero delay
- */
- if (requeue)
- queue_delayed_work(aio_wq, &ctx->wq, 0);
-}
+ ctx->tail = tail;
+ if (waitqueue_active(&ctx->wait))
+ wake_up(&ctx->wait);
+}
-/*
- * Called by kick_iocb to queue the kiocb for retry
- * and if required activate the aio work queue to process
- * it
- */
-static void try_queue_kicked_iocb(struct kiocb *iocb)
+void batch_complete_aio(struct batch_complete *batch)
{
- struct kioctx *ctx = iocb->ki_ctx;
+ struct kioctx *ctx = NULL;
+ struct eventfd_ctx *eventfd = NULL;
+ struct rb_node *n;
unsigned long flags;
- int run = 0;
+ unsigned tail = 0;
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- /* set this inside the lock so that we can't race with aio_run_iocb()
- * testing it and putting the iocb on the run list under the lock */
- if (!kiocbTryKick(iocb))
- run = __queue_kicked_iocb(iocb);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- if (run)
- aio_queue_work(ctx);
-}
-
-/*
- * kick_iocb:
- * Called typically from a wait queue callback context
- * to trigger a retry of the iocb.
- * The retry is usually executed by aio workqueue
- * threads (See aio_kick_handler).
- */
-void kick_iocb(struct kiocb *iocb)
-{
- /* sync iocbs are easy: they can only ever be executing from a
- * single context. */
- if (is_sync_kiocb(iocb)) {
- kiocbSetKicked(iocb);
- wake_up_process(iocb->ki_obj.tsk);
+ if (RB_EMPTY_ROOT(&batch->kiocb))
return;
- }
-
- try_queue_kicked_iocb(iocb);
-}
-EXPORT_SYMBOL(kick_iocb);
-
-/* aio_complete
- * Called when the io request on the given iocb is complete.
- * Returns true if this is the last user of the request. The
- * only other user of the request can be the cancellation code.
- */
-int aio_complete(struct kiocb *iocb, long res, long res2)
-{
- struct kioctx *ctx = iocb->ki_ctx;
- struct aio_ring_info *info;
- struct aio_ring *ring;
- struct io_event *event;
- unsigned long flags;
- unsigned long tail;
- int ret;
/*
- * Special case handling for sync iocbs:
- * - events go directly into the iocb for fast handling
- * - the sync task with the iocb in its stack holds the single iocb
- * ref, no other paths have a way to get another ref
- * - the sync task helpfully left a reference to itself in the iocb
- */
- if (is_sync_kiocb(iocb)) {
- BUG_ON(iocb->ki_users != 1);
- iocb->ki_user_data = res;
- iocb->ki_users = 0;
- wake_up_process(iocb->ki_obj.tsk);
- return 1;
- }
-
- info = &ctx->ring_info;
-
- /* add a completion event to the ring buffer.
- * must be done holding ctx->ctx_lock to prevent
- * other code from messing with the tail
- * pointer since we might be called from irq
- * context.
+ * Take rcu_read_lock() in case the kioctx is being destroyed, as we
+ * need to issue a wakeup after incrementing reqs_available.
*/
- spin_lock_irqsave(&ctx->ctx_lock, flags);
+ rcu_read_lock();
+ local_irq_save(flags);
- if (iocb->ki_run_list.prev && !list_empty(&iocb->ki_run_list))
- list_del_init(&iocb->ki_run_list);
+ n = rb_first(&batch->kiocb);
+ while (n) {
+ struct kiocb *req = container_of(n, struct kiocb, ki_node);
- /*
- * cancelled requests don't get events, userland was given one
- * when the event got cancelled.
- */
- if (kiocbIsCancelled(iocb))
- goto put_rq;
+ if (n->rb_right) {
+ n->rb_right->__rb_parent_color = n->__rb_parent_color;
+ n = n->rb_right;
- ring = kmap_atomic(info->ring_pages[0]);
+ while (n->rb_left)
+ n = n->rb_left;
+ } else {
+ n = rb_parent(n);
+ }
- tail = info->tail;
- event = aio_ring_event(info, tail);
- if (++tail >= info->nr)
- tail = 0;
+ if (unlikely(req->ki_eventfd != eventfd)) {
+ if (eventfd) {
+ /* Make event visible */
+ kioctx_ring_unlock(ctx, tail);
+ ctx = NULL;
- event->obj = (u64)(unsigned long)iocb->ki_obj.user;
- event->data = iocb->ki_user_data;
- event->res = res;
- event->res2 = res2;
+ eventfd_signal(eventfd, 1);
+ eventfd_ctx_put(eventfd);
+ }
- dprintk("aio_complete: %p[%lu]: %p: %p %Lx %lx %lx\n",
- ctx, tail, iocb, iocb->ki_obj.user, iocb->ki_user_data,
- res, res2);
+ eventfd = req->ki_eventfd;
+ req->ki_eventfd = NULL;
+ }
- /* after flagging the request as done, we
- * must never even look at it again
- */
- smp_wmb(); /* make event visible before updating tail */
+ if (unlikely(req->ki_ctx != ctx)) {
+ kioctx_ring_unlock(ctx, tail);
- info->tail = tail;
- ring->tail = tail;
+ ctx = req->ki_ctx;
+ tail = kioctx_ring_lock(ctx);
+ }
- put_aio_ring_event(event);
- kunmap_atomic(ring);
+ tail = kioctx_ring_put(ctx, req, tail);
+ aio_put_req(req);
+ }
- pr_debug("added to ring %p at [%lu]\n", iocb, tail);
+ kioctx_ring_unlock(ctx, tail);
+ local_irq_restore(flags);
+ rcu_read_unlock();
/*
* Check if the user asked us to deliver the result through an
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd != NULL)
- eventfd_signal(iocb->ki_eventfd, 1);
+ if (eventfd) {
+ eventfd_signal(eventfd, 1);
+ eventfd_ctx_put(eventfd);
+ }
+}
+EXPORT_SYMBOL(batch_complete_aio);
-put_rq:
- /* everything turned out well, dispose of the aiocb. */
- ret = __aio_put_req(ctx, iocb);
+/* aio_complete_batch
+ * Called when the io request on the given iocb is complete; @batch may be
+ * NULL.
+ */
+void aio_complete_batch(struct kiocb *req, long res, long res2,
+ struct batch_complete *batch)
+{
+ req->ki_res = res;
+ req->ki_res2 = res2;
+
+ if (req->ki_list.next) {
+ struct kioctx *ctx = req->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_del(&req->ki_list);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+ }
/*
- * We have to order our ring_info tail store above and test
- * of the wait list below outside the wait lock. This is
- * like in wake_up_bit() where clearing a bit has to be
- * ordered with the unlocked test.
+ * Special case handling for sync iocbs:
+ * - events go directly into the iocb for fast handling
+ * - the sync task with the iocb in its stack holds the single iocb
+ * ref, no other paths have a way to get another ref
+ * - the sync task helpfully left a reference to itself in the iocb
*/
- smp_mb();
+ if (is_sync_kiocb(req)) {
+ BUG_ON(atomic_read(&req->ki_users) != 1);
+ req->ki_user_data = req->ki_res;
+ atomic_set(&req->ki_users, 0);
+ wake_up_process(req->ki_obj.tsk);
+ } else if (batch) {
+ int res;
+ struct kiocb *t;
+ struct rb_node **n = &batch->kiocb.rb_node, *parent = NULL;
+
+ while (*n) {
+ parent = *n;
+ t = container_of(*n, struct kiocb, ki_node);
+
+ res = req->ki_ctx != t->ki_ctx
+ ? req->ki_ctx < t->ki_ctx
+ : req->ki_eventfd != t->ki_eventfd
+ ? req->ki_eventfd < t->ki_eventfd
+ : req < t;
+
+ n = res ? &(*n)->rb_left : &(*n)->rb_right;
+ }
- if (waitqueue_active(&ctx->wait))
- wake_up(&ctx->wait);
+ rb_link_node(&req->ki_node, parent, n);
+ rb_insert_color(&req->ki_node, &batch->kiocb);
+ } else {
+ struct batch_complete batch_stack;
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- return ret;
+ memset(&req->ki_node, 0, sizeof(req->ki_node));
+ batch_stack.kiocb.rb_node = &req->ki_node;
+
+ batch_complete_aio(&batch_stack);
+ }
}
-EXPORT_SYMBOL(aio_complete);
+EXPORT_SYMBOL(aio_complete_batch);
-/* aio_read_evt
- * Pull an event off of the ioctx's event ring. Returns the number of
- * events fetched (0 or 1 ;-)
- * FIXME: make this use cmpxchg.
- * TODO: make the ringbuffer user mmap()able (requires FIXME).
+/* aio_read_events
+ * Pull an event off of the ioctx's event ring. Returns the number of
+ * events fetched
*/
-static int aio_read_evt(struct kioctx *ioctx, struct io_event *ent)
+static long aio_read_events_ring(struct kioctx *ctx,
+ struct io_event __user *event, long nr)
{
- struct aio_ring_info *info = &ioctx->ring_info;
struct aio_ring *ring;
- unsigned long head;
- int ret = 0;
-
- ring = kmap_atomic(info->ring_pages[0]);
- dprintk("in aio_read_evt h%lu t%lu m%lu\n",
- (unsigned long)ring->head, (unsigned long)ring->tail,
- (unsigned long)ring->nr);
-
- if (ring->head == ring->tail)
- goto out;
+ unsigned head, pos;
+ long ret = 0;
+ int copy_ret;
- spin_lock(&info->ring_lock);
-
- head = ring->head % info->nr;
- if (head != ring->tail) {
- struct io_event *evp = aio_ring_event(info, head);
- *ent = *evp;
- head = (head + 1) % info->nr;
- smp_mb(); /* finish reading the event before updatng the head */
- ring->head = head;
- ret = 1;
- put_aio_ring_event(evp);
- }
- spin_unlock(&info->ring_lock);
+ mutex_lock(&ctx->ring_lock);
-out:
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ head = ring->head;
kunmap_atomic(ring);
- dprintk("leaving aio_read_evt: %d h%lu t%lu\n", ret,
- (unsigned long)ring->head, (unsigned long)ring->tail);
- return ret;
-}
-
-struct aio_timeout {
- struct timer_list timer;
- int timed_out;
- struct task_struct *p;
-};
-static void timeout_func(unsigned long data)
-{
- struct aio_timeout *to = (struct aio_timeout *)data;
+ pr_debug("h%u t%u m%u\n", head, ctx->shadow_tail, ctx->nr_events);
- to->timed_out = 1;
- wake_up_process(to->p);
-}
+ if (head == ctx->shadow_tail)
+ goto out;
-static inline void init_timeout(struct aio_timeout *to)
-{
- setup_timer_on_stack(&to->timer, timeout_func, (unsigned long) to);
- to->timed_out = 0;
- to->p = current;
-}
+ while (ret < nr) {
+ long avail = (head <= ctx->shadow_tail
+ ? ctx->shadow_tail : ctx->nr_events) - head;
+ struct io_event *ev;
+ struct page *page;
-static inline void set_timeout(long start_jiffies, struct aio_timeout *to,
- const struct timespec *ts)
-{
- to->timer.expires = start_jiffies + timespec_to_jiffies(ts);
- if (time_after(to->timer.expires, jiffies))
- add_timer(&to->timer);
- else
- to->timed_out = 1;
-}
+ if (head == ctx->shadow_tail)
+ break;
-static inline void clear_timeout(struct aio_timeout *to)
-{
- del_singleshot_timer_sync(&to->timer);
-}
+ avail = min(avail, nr - ret);
+ avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
+ ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-static int read_events(struct kioctx *ctx,
- long min_nr, long nr,
- struct io_event __user *event,
- struct timespec __user *timeout)
-{
- long start_jiffies = jiffies;
- struct task_struct *tsk = current;
- DECLARE_WAITQUEUE(wait, tsk);
- int ret;
- int i = 0;
- struct io_event ent;
- struct aio_timeout to;
- int retry = 0;
-
- /* needed to zero any padding within an entry (there shouldn't be
- * any, but C is fun!
- */
- memset(&ent, 0, sizeof(ent));
-retry:
- ret = 0;
- while (likely(i < nr)) {
- ret = aio_read_evt(ctx, &ent);
- if (unlikely(ret <= 0))
- break;
+ pos = head + AIO_EVENTS_OFFSET;
+ page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
+ pos %= AIO_EVENTS_PER_PAGE;
- dprintk("read event: %Lx %Lx %Lx %Lx\n",
- ent.data, ent.obj, ent.res, ent.res2);
+ ev = kmap(page);
+ copy_ret = copy_to_user(event + ret, ev + pos, sizeof(*ev) * avail);
+ kunmap(page);
- /* Could we split the check in two? */
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- dprintk("aio: lost an event due to EFAULT.\n");
- break;
+ if (unlikely(copy_ret)) {
+ ret = -EFAULT;
+ goto out;
}
- ret = 0;
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
+ ret += avail;
+ head += avail;
+ head %= ctx->nr_events;
}
- if (min_nr <= i)
- return i;
- if (ret)
- return ret;
+ ring = kmap_atomic(ctx->ring_pages[0]);
+ ring->head = head;
+ kunmap_atomic(ring);
+ flush_dcache_page(ctx->ring_pages[0]);
- /* End fast path */
+ pr_debug("%li h%u t%u\n", ret, head, ctx->shadow_tail);
- /* racey check, but it gets redone */
- if (!retry && unlikely(!list_empty(&ctx->run_list))) {
- retry = 1;
- aio_run_all_iocbs(ctx);
- goto retry;
- }
-
- init_timeout(&to);
- if (timeout) {
- struct timespec ts;
- ret = -EFAULT;
- if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
- goto out;
-
- set_timeout(start_jiffies, &to, &ts);
- }
+ put_reqs_available(ctx, ret);
+out:
+ mutex_unlock(&ctx->ring_lock);
- while (likely(i < nr)) {
- add_wait_queue_exclusive(&ctx->wait, &wait);
- do {
- set_task_state(tsk, TASK_INTERRUPTIBLE);
- ret = aio_read_evt(ctx, &ent);
- if (ret)
- break;
- if (min_nr <= i)
- break;
- if (unlikely(ctx->dead)) {
- ret = -EINVAL;
- break;
- }
- if (to.timed_out) /* Only check after read evt */
- break;
- /* Try to only show up in io wait if there are ops
- * in flight */
- if (ctx->reqs_active)
- io_schedule();
- else
- schedule();
- if (signal_pending(tsk)) {
- ret = -EINTR;
- break;
- }
- /*ret = aio_read_evt(ctx, &ent);*/
- } while (1) ;
+ return ret;
+}
- set_task_state(tsk, TASK_RUNNING);
- remove_wait_queue(&ctx->wait, &wait);
+static bool aio_read_events(struct kioctx *ctx, long min_nr, long nr,
+ struct io_event __user *event, long *i)
+{
+ long ret = aio_read_events_ring(ctx, event + *i, nr - *i);
- if (unlikely(ret <= 0))
- break;
+ if (ret > 0)
+ *i += ret;
- ret = -EFAULT;
- if (unlikely(copy_to_user(event, &ent, sizeof(ent)))) {
- dprintk("aio: lost an event due to EFAULT.\n");
- break;
- }
+ if (unlikely(percpu_ref_dead(&ctx->users)))
+ ret = -EINVAL;
- /* Good, event copied to userland, update counts. */
- event ++;
- i ++;
- }
+ if (!*i)
+ *i = ret;
- if (timeout)
- clear_timeout(&to);
-out:
- destroy_timer_on_stack(&to.timer);
- return i ? i : ret;
+ return ret < 0 || *i >= min_nr;
}
-/* Take an ioctx and remove it from the list of ioctx's. Protects
- * against races with itself via ->dead.
- */
-static void io_destroy(struct kioctx *ioctx)
+static long read_events(struct kioctx *ctx, long min_nr, long nr,
+ struct io_event __user *event,
+ struct timespec __user *timeout)
{
- struct mm_struct *mm = current->mm;
- int was_dead;
+ ktime_t until = { .tv64 = KTIME_MAX };
+ long ret = 0;
- /* delete the entry from the list is someone else hasn't already */
- spin_lock(&mm->ioctx_lock);
- was_dead = ioctx->dead;
- ioctx->dead = 1;
- hlist_del_rcu(&ioctx->list);
- spin_unlock(&mm->ioctx_lock);
+ if (timeout) {
+ struct timespec ts;
- dprintk("aio_release(%p)\n", ioctx);
- if (likely(!was_dead))
- put_ioctx(ioctx); /* twice for the list */
+ if (unlikely(copy_from_user(&ts, timeout, sizeof(ts))))
+ return -EFAULT;
- kill_ctx(ioctx);
+ until = timespec_to_ktime(ts);
+ }
/*
- * Wake up any waiters. The setting of ctx->dead must be seen
- * by other CPUs at this point. Right now, we rely on the
- * locking done by the above calls to ensure this consistency.
+ * Note that aio_read_events() is being called as the conditional - i.e.
+ * we're calling it after prepare_to_wait() has set task state to
+ * TASK_INTERRUPTIBLE.
+ *
+ * But aio_read_events() can block, and if it blocks it's going to flip
+ * the task state back to TASK_RUNNING.
+ *
+ * This should be ok, provided it doesn't flip the state back to
+ * TASK_RUNNING and return 0 too much - that causes us to spin. That
+ * will only happen if the mutex_lock() call blocks, and we then find
+ * the ringbuffer empty. So in practice we should be ok, but it's
+ * something to be aware of when touching this code.
*/
- wake_up_all(&ioctx->wait);
+ wait_event_interruptible_hrtimeout(ctx->wait,
+ aio_read_events(ctx, min_nr, nr, event, &ret), until);
+
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+
+ return ret;
}
/* sys_io_setup:
@@ -1252,7 +1035,7 @@ SYSCALL_DEFINE2(io_setup, unsigned, nr_events, aio_context_t __user *, ctxp)
if (!IS_ERR(ioctx)) {
ret = put_user(ioctx->user_id, ctxp);
if (ret)
- io_destroy(ioctx);
+ kill_ioctx(ioctx);
put_ioctx(ioctx);
}
@@ -1270,7 +1053,7 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
{
struct kioctx *ioctx = lookup_ioctx(ctx);
if (likely(NULL != ioctx)) {
- io_destroy(ioctx);
+ kill_ioctx(ioctx);
put_ioctx(ioctx);
return 0;
}
@@ -1301,24 +1084,15 @@ static void aio_advance_iovec(struct kiocb *iocb, ssize_t ret)
BUG_ON(ret > 0 && iocb->ki_left == 0);
}
-static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
+typedef ssize_t (aio_rw_op)(struct kiocb *, const struct iovec *,
+ unsigned long, loff_t);
+
+static ssize_t aio_rw_vect_retry(struct kiocb *iocb, int rw, aio_rw_op *rw_op)
{
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
- ssize_t (*rw_op)(struct kiocb *, const struct iovec *,
- unsigned long, loff_t);
ssize_t ret = 0;
- unsigned short opcode;
-
- if ((iocb->ki_opcode == IOCB_CMD_PREADV) ||
- (iocb->ki_opcode == IOCB_CMD_PREAD)) {
- rw_op = file->f_op->aio_read;
- opcode = IOCB_CMD_PREADV;
- } else {
- rw_op = file->f_op->aio_write;
- opcode = IOCB_CMD_PWRITEV;
- }
/* This matches the pread()/pwrite() logic */
if (iocb->ki_pos < 0)
@@ -1334,7 +1108,7 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
/* retry all partial writes. retry partial reads as long as its a
* regular file. */
} while (ret > 0 && iocb->ki_left > 0 &&
- (opcode == IOCB_CMD_PWRITEV ||
+ (rw == WRITE ||
(!S_ISFIFO(inode->i_mode) && !S_ISSOCK(inode->i_mode))));
/* This means we must have transferred all that we could */
@@ -1344,81 +1118,49 @@ static ssize_t aio_rw_vect_retry(struct kiocb *iocb)
/* If we managed to write some out we return that, rather than
* the eventual error. */
- if (opcode == IOCB_CMD_PWRITEV
- && ret < 0 && ret != -EIOCBQUEUED && ret != -EIOCBRETRY
+ if (rw == WRITE
+ && ret < 0 && ret != -EIOCBQUEUED
&& iocb->ki_nbytes - iocb->ki_left)
ret = iocb->ki_nbytes - iocb->ki_left;
return ret;
}
-static ssize_t aio_fdsync(struct kiocb *iocb)
-{
- struct file *file = iocb->ki_filp;
- ssize_t ret = -EINVAL;
-
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(iocb, 1);
- return ret;
-}
-
-static ssize_t aio_fsync(struct kiocb *iocb)
-{
- struct file *file = iocb->ki_filp;
- ssize_t ret = -EINVAL;
-
- if (file->f_op->aio_fsync)
- ret = file->f_op->aio_fsync(iocb, 0);
- return ret;
-}
-
-static ssize_t aio_setup_vectored_rw(int type, struct kiocb *kiocb, bool compat)
+static ssize_t aio_setup_vectored_rw(int rw, struct kiocb *kiocb, bool compat)
{
ssize_t ret;
+ kiocb->ki_nr_segs = kiocb->ki_nbytes;
+
#ifdef CONFIG_COMPAT
if (compat)
- ret = compat_rw_copy_check_uvector(type,
+ ret = compat_rw_copy_check_uvector(rw,
(struct compat_iovec __user *)kiocb->ki_buf,
- kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+ kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
&kiocb->ki_iovec);
else
#endif
- ret = rw_copy_check_uvector(type,
+ ret = rw_copy_check_uvector(rw,
(struct iovec __user *)kiocb->ki_buf,
- kiocb->ki_nbytes, 1, &kiocb->ki_inline_vec,
+ kiocb->ki_nr_segs, 1, &kiocb->ki_inline_vec,
&kiocb->ki_iovec);
if (ret < 0)
- goto out;
-
- ret = rw_verify_area(type, kiocb->ki_filp, &kiocb->ki_pos, ret);
- if (ret < 0)
- goto out;
+ return ret;
- kiocb->ki_nr_segs = kiocb->ki_nbytes;
- kiocb->ki_cur_seg = 0;
- /* ki_nbytes/left now reflect bytes instead of segs */
+ /* ki_nbytes now reflect bytes instead of segs */
kiocb->ki_nbytes = ret;
- kiocb->ki_left = ret;
-
- ret = 0;
-out:
- return ret;
+ return 0;
}
-static ssize_t aio_setup_single_vector(int type, struct file * file, struct kiocb *kiocb)
+static ssize_t aio_setup_single_vector(int rw, struct kiocb *kiocb)
{
- int bytes;
-
- bytes = rw_verify_area(type, file, &kiocb->ki_pos, kiocb->ki_left);
- if (bytes < 0)
- return bytes;
+ if (unlikely(!access_ok(!rw, kiocb->ki_buf, kiocb->ki_nbytes)))
+ return -EFAULT;
kiocb->ki_iovec = &kiocb->ki_inline_vec;
kiocb->ki_iovec->iov_base = kiocb->ki_buf;
- kiocb->ki_iovec->iov_len = bytes;
+ kiocb->ki_iovec->iov_len = kiocb->ki_nbytes;
kiocb->ki_nr_segs = 1;
- kiocb->ki_cur_seg = 0;
return 0;
}
@@ -1427,96 +1169,94 @@ static ssize_t aio_setup_single_vector(int type, struct file * file, struct kioc
* Performs the initial checks and aio retry method
* setup for the kiocb at the time of io submission.
*/
-static ssize_t aio_setup_iocb(struct kiocb *kiocb, bool compat)
+static ssize_t aio_run_iocb(struct kiocb *req, bool compat)
{
- struct file *file = kiocb->ki_filp;
- ssize_t ret = 0;
+ struct file *file = req->ki_filp;
+ ssize_t ret;
+ int rw;
+ fmode_t mode;
+ aio_rw_op *rw_op;
- switch (kiocb->ki_opcode) {
+ switch (req->ki_opcode) {
case IOCB_CMD_PREAD:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_READ)))
- break;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_WRITE, kiocb->ki_buf,
- kiocb->ki_left)))
- break;
- ret = aio_setup_single_vector(READ, file, kiocb);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_read)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
- case IOCB_CMD_PWRITE:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- break;
- ret = -EFAULT;
- if (unlikely(!access_ok(VERIFY_READ, kiocb->ki_buf,
- kiocb->ki_left)))
- break;
- ret = aio_setup_single_vector(WRITE, file, kiocb);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_write)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
case IOCB_CMD_PREADV:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_READ)))
- break;
- ret = aio_setup_vectored_rw(READ, kiocb, compat);
- if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_read)
- kiocb->ki_retry = aio_rw_vect_retry;
- break;
+ mode = FMODE_READ;
+ rw = READ;
+ rw_op = file->f_op->aio_read;
+ goto rw_common;
+
+ case IOCB_CMD_PWRITE:
case IOCB_CMD_PWRITEV:
- ret = -EBADF;
- if (unlikely(!(file->f_mode & FMODE_WRITE)))
- break;
- ret = aio_setup_vectored_rw(WRITE, kiocb, compat);
+ mode = FMODE_WRITE;
+ rw = WRITE;
+ rw_op = file->f_op->aio_write;
+ goto rw_common;
+rw_common:
+ if (unlikely(!(file->f_mode & mode)))
+ return -EBADF;
+
+ if (!rw_op)
+ return -EINVAL;
+
+ ret = (req->ki_opcode == IOCB_CMD_PREADV ||
+ req->ki_opcode == IOCB_CMD_PWRITEV)
+ ? aio_setup_vectored_rw(rw, req, compat)
+ : aio_setup_single_vector(rw, req);
if (ret)
- break;
- ret = -EINVAL;
- if (file->f_op->aio_write)
- kiocb->ki_retry = aio_rw_vect_retry;
+ return ret;
+
+ ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
+ if (ret < 0)
+ return ret;
+
+ req->ki_nbytes = ret;
+ req->ki_left = ret;
+
+ ret = aio_rw_vect_retry(req, rw, rw_op);
break;
+
case IOCB_CMD_FDSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- kiocb->ki_retry = aio_fdsync;
+ if (!file->f_op->aio_fsync)
+ return -EINVAL;
+
+ ret = file->f_op->aio_fsync(req, 1);
break;
+
case IOCB_CMD_FSYNC:
- ret = -EINVAL;
- if (file->f_op->aio_fsync)
- kiocb->ki_retry = aio_fsync;
+ if (!file->f_op->aio_fsync)
+ return -EINVAL;
+
+ ret = file->f_op->aio_fsync(req, 0);
break;
+
default:
- dprintk("EINVAL: io_submit: no operation provided\n");
- ret = -EINVAL;
+ pr_debug("EINVAL: no operation provided\n");
+ return -EINVAL;
}
- if (!kiocb->ki_retry)
- return ret;
+ if (ret != -EIOCBQUEUED) {
+ /*
+ * There's no easy way to restart the syscall since other AIO's
+ * may be already running. Just fail this IO with EINTR.
+ */
+ if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR ||
+ ret == -ERESTARTNOHAND || ret == -ERESTART_RESTARTBLOCK))
+ ret = -EINTR;
+ aio_complete(req, ret, 0);
+ }
return 0;
}
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, struct kiocb_batch *batch,
- bool compat)
+ struct iocb *iocb, bool compat)
{
struct kiocb *req;
- struct file *file;
ssize_t ret;
/* enforce forwards compatibility on users */
if (unlikely(iocb->aio_reserved1 || iocb->aio_reserved2)) {
- pr_debug("EINVAL: io_submit: reserve field set\n");
+ pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
@@ -1530,16 +1270,16 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
return -EINVAL;
}
- file = fget(iocb->aio_fildes);
- if (unlikely(!file))
- return -EBADF;
-
- req = aio_get_req(ctx, batch); /* returns with 2 references to req */
- if (unlikely(!req)) {
- fput(file);
+ req = aio_get_req(ctx);
+ if (unlikely(!req))
return -EAGAIN;
+
+ req->ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->ki_filp)) {
+ ret = -EBADF;
+ goto out_put_req;
}
- req->ki_filp = file;
+
if (iocb->aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
@@ -1555,9 +1295,9 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
}
- ret = put_user(req->ki_key, &user_iocb->aio_key);
+ ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
if (unlikely(ret)) {
- dprintk("EFAULT: aio_key\n");
+ pr_debug("EFAULT: aio_key\n");
goto out_put_req;
}
@@ -1569,41 +1309,14 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
req->ki_left = req->ki_nbytes = iocb->aio_nbytes;
req->ki_opcode = iocb->aio_lio_opcode;
- ret = aio_setup_iocb(req, compat);
-
+ ret = aio_run_iocb(req, compat);
if (ret)
goto out_put_req;
- spin_lock_irq(&ctx->ctx_lock);
- /*
- * We could have raced with io_destroy() and are currently holding a
- * reference to ctx which should be destroyed. We cannot submit IO
- * since ctx gets freed as soon as io_submit() puts its reference. The
- * check here is reliable: io_destroy() sets ctx->dead before waiting
- * for outstanding IO and the barrier between these two is realized by
- * unlock of mm->ioctx_lock and lock of ctx->ctx_lock. Analogously we
- * increment ctx->reqs_active before checking for ctx->dead and the
- * barrier is realized by unlock and lock of ctx->ctx_lock. Thus if we
- * don't see ctx->dead set here, io_destroy() waits for our IO to
- * finish.
- */
- if (ctx->dead) {
- spin_unlock_irq(&ctx->ctx_lock);
- ret = -EINVAL;
- goto out_put_req;
- }
- aio_run_iocb(req);
- if (!list_empty(&ctx->run_list)) {
- /* drain the run list */
- while (__aio_run_iocbs(ctx))
- ;
- }
- spin_unlock_irq(&ctx->ctx_lock);
-
aio_put_req(req); /* drop extra ref to req */
return 0;
-
out_put_req:
+ put_reqs_available(ctx, 1);
aio_put_req(req); /* drop extra ref to req */
aio_put_req(req); /* drop i/o ref to req */
return ret;
@@ -1616,7 +1329,6 @@ long do_io_submit(aio_context_t ctx_id, long nr,
long ret = 0;
int i = 0;
struct blk_plug plug;
- struct kiocb_batch batch;
if (unlikely(nr < 0))
return -EINVAL;
@@ -1629,12 +1341,10 @@ long do_io_submit(aio_context_t ctx_id, long nr,
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
- pr_debug("EINVAL: io_submit: invalid context id\n");
+ pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
- kiocb_batch_init(&batch, nr);
-
blk_start_plug(&plug);
/*
@@ -1655,13 +1365,12 @@ long do_io_submit(aio_context_t ctx_id, long nr,
break;
}
- ret = io_submit_one(ctx, user_iocb, &tmp, &batch, compat);
+ ret = io_submit_one(ctx, user_iocb, &tmp, compat);
if (ret)
break;
}
blk_finish_plug(&plug);
- kiocb_batch_free(ctx, &batch);
put_ioctx(ctx);
return i ? i : ret;
}
@@ -1694,10 +1403,13 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
assert_spin_locked(&ctx->ctx_lock);
+ if (key != KIOCB_KEY)
+ return NULL;
+
/* TODO: use a hash or array, this sucks. */
list_for_each(pos, &ctx->active_reqs) {
struct kiocb *kiocb = list_kiocb(pos);
- if (kiocb->ki_obj.user == iocb && kiocb->ki_key == key)
+ if (kiocb->ki_obj.user == iocb)
return kiocb;
}
return NULL;
@@ -1716,7 +1428,7 @@ static struct kiocb *lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb,
SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
struct io_event __user *, result)
{
- int (*cancel)(struct kiocb *iocb, struct io_event *res);
+ struct io_event res;
struct kioctx *ctx;
struct kiocb *kiocb;
u32 key;
@@ -1731,32 +1443,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
- ret = -EAGAIN;
+
kiocb = lookup_kiocb(ctx, iocb, key);
- if (kiocb && kiocb->ki_cancel) {
- cancel = kiocb->ki_cancel;
- kiocb->ki_users ++;
- kiocbSetCancelled(kiocb);
- } else
- cancel = NULL;
+ if (kiocb)
+ ret = kiocb_cancel(ctx, kiocb, &res);
+ else
+ ret = -EINVAL;
+
spin_unlock_irq(&ctx->ctx_lock);
- if (NULL != cancel) {
- struct io_event tmp;
- pr_debug("calling cancel\n");
- memset(&tmp, 0, sizeof(tmp));
- tmp.obj = (u64)(unsigned long)kiocb->ki_obj.user;
- tmp.data = kiocb->ki_user_data;
- ret = cancel(kiocb, &tmp);
- if (!ret) {
- /* Cancellation succeeded -- copy the result
- * into the user's buffer.
- */
- if (copy_to_user(result, &tmp, sizeof(tmp)))
- ret = -EFAULT;
- }
- } else
- ret = -EINVAL;
+ if (!ret) {
+ /* Cancellation succeeded -- copy the result
+ * into the user's buffer.
+ */
+ if (copy_to_user(result, &res, sizeof(res)))
+ ret = -EFAULT;
+ }
put_ioctx(ctx);
diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c
index bbc8f8827eac..14b7ea3c8f5e 100644
--- a/fs/binfmt_aout.c
+++ b/fs/binfmt_aout.c
@@ -62,7 +62,6 @@ static int aout_core_dump(struct coredump_params *cprm)
fs = get_fs();
set_fs(KERNEL_DS);
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
strncpy(dump.u_comm, current->comm, sizeof(dump.u_comm));
dump.u_ar0 = offsetof(struct user, regs);
dump.signal = cprm->siginfo->si_signo;
@@ -256,8 +255,6 @@ static int load_aout_binary(struct linux_binprm * bprm)
(current->mm->start_data = N_DATADDR(ex));
current->mm->brk = ex.a_bss +
(current->mm->start_brk = N_BSSADDR(ex));
- current->mm->free_area_cache = current->mm->mmap_base;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, STACK_TOP, EXSTACK_DEFAULT);
if (retval < 0) {
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 3939829f6c5c..ced3dcfdac8c 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -140,6 +140,25 @@ static int padzero(unsigned long elf_bss)
#define ELF_BASE_PLATFORM NULL
#endif
+/*
+ * Use get_random_int() to implement AT_RANDOM while avoiding depletion
+ * of the entropy pool.
+ */
+static void get_atrandom_bytes(unsigned char *buf, size_t nbytes)
+{
+ unsigned char *p = buf;
+
+ while (nbytes) {
+ unsigned int random_variable;
+ size_t chunk = min(nbytes, sizeof(random_variable));
+
+ random_variable = get_random_int();
+ memcpy(p, &random_variable, chunk);
+ p += chunk;
+ nbytes -= chunk;
+ }
+}
+
static int
create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
unsigned long load_addr, unsigned long interp_load_addr)
@@ -201,7 +220,7 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
/*
* Generate 16 random bytes for userspace PRNG seeding.
*/
- get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
+ get_atrandom_bytes(k_rand_bytes, sizeof(k_rand_bytes));
u_rand_bytes = (elf_addr_t __user *)
STACK_ALLOC(p, sizeof(k_rand_bytes));
if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
@@ -735,8 +754,6 @@ static int load_elf_binary(struct linux_binprm *bprm)
/* Do this so that we can load the interpreter, if need be. We will
change some of these later */
- current->mm->free_area_cache = current->mm->mmap_base;
- current->mm->cached_hole_size = 0;
retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
executable_stack);
if (retval < 0) {
@@ -2090,8 +2107,7 @@ static int elf_core_dump(struct coredump_params *cprm)
goto cleanup;
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
-
+
fs = get_fs();
set_fs(KERNEL_DS);
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index 9c13e023e2b7..c1cc06aed601 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1687,8 +1687,6 @@ static int elf_fdpic_core_dump(struct coredump_params *cprm)
fill_elf_fdpic_header(elf, e_phnum);
has_dumped = 1;
- current->flags |= PF_DUMPCORE;
-
/*
* Set up the notes in similar form to SVR4 core dumps made
* with info from their /proc.
diff --git a/fs/bio.c b/fs/bio.c
index bb5768f59b32..0cc1b5562abc 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -27,6 +27,7 @@
#include <linux/mempool.h>
#include <linux/workqueue.h>
#include <linux/cgroup.h>
+#include <linux/aio.h>
#include <scsi/sg.h> /* for struct sg_iovec */
#include <trace/events/block.h>
@@ -1407,33 +1408,44 @@ void bio_flush_dcache_pages(struct bio *bi)
EXPORT_SYMBOL(bio_flush_dcache_pages);
#endif
-/**
- * bio_endio - end I/O on a bio
- * @bio: bio
- * @error: error, if any
- *
- * Description:
- * bio_endio() will end I/O on the whole bio. bio_endio() is the
- * preferred way to end I/O on a bio, it takes care of clearing
- * BIO_UPTODATE on error. @error is 0 on success, and and one of the
- * established -Exxxx (-EIO, for instance) error values in case
- * something went wrong. No one should call bi_end_io() directly on a
- * bio unless they own it and thus know that it has an end_io
- * function.
- **/
-void bio_endio(struct bio *bio, int error)
+static inline void __bio_endio(struct bio *bio, struct batch_complete *batch)
{
- if (error)
+ if (bio->bi_error)
clear_bit(BIO_UPTODATE, &bio->bi_flags);
else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
- error = -EIO;
+ bio->bi_error = -EIO;
+
+ if (bio_flagged(bio, BIO_BATCH_ENDIO))
+ bio->bi_batch_end_io(bio, bio->bi_error, batch);
+ else if (bio->bi_end_io)
+ bio->bi_end_io(bio, bio->bi_error);
+}
+
+void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch)
+{
+ if (error)
+ bio->bi_error = error;
trace_block_bio_complete(bio, error);
- if (bio->bi_end_io)
- bio->bi_end_io(bio, error);
+ if (batch)
+ bio_list_add(&batch->bio, bio);
+ else
+ __bio_endio(bio, batch);
+
+}
+EXPORT_SYMBOL(bio_endio_batch);
+
+void batch_complete(struct batch_complete *batch)
+{
+ struct bio *bio;
+
+ while ((bio = bio_list_pop(&batch->bio)))
+ __bio_endio(bio, batch);
+
+ batch_complete_aio(batch);
}
-EXPORT_SYMBOL(bio_endio);
+EXPORT_SYMBOL(batch_complete);
void bio_pair_release(struct bio_pair *bp)
{
diff --git a/fs/block_dev.c b/fs/block_dev.c
index aea605c98ba6..4d48cf5814f8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -27,6 +27,7 @@
#include <linux/namei.h>
#include <linux/log2.h>
#include <linux/cleancache.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include "internal.h"
@@ -616,11 +617,9 @@ void bd_forget(struct inode *inode)
struct block_device *bdev = NULL;
spin_lock(&bdev_lock);
- if (inode->i_bdev) {
- if (!sb_is_blkdev_sb(inode->i_sb))
- bdev = inode->i_bdev;
- __bd_forget(inode);
- }
+ if (!sb_is_blkdev_sb(inode->i_sb))
+ bdev = inode->i_bdev;
+ __bd_forget(inode);
spin_unlock(&bdev_lock);
if (bdev)
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 5b4ea5f55b8f..6add1b16be80 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -24,6 +24,7 @@
#include <linux/string.h>
#include <linux/backing-dev.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/falloc.h>
#include <linux/swap.h>
#include <linux/writeback.h>
@@ -1514,7 +1515,8 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
size_t count, ocount;
bool sync = (file->f_flags & O_DSYNC) || IS_SYNC(file->f_mapping->host);
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
mutex_lock(&inode->i_mutex);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ca1b767d51f7..ca26188ff629 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -32,6 +32,7 @@
#include <linux/writeback.h>
#include <linux/statfs.h>
#include <linux/compat.h>
+#include <linux/aio.h>
#include <linux/bit_spinlock.h>
#include <linux/xattr.h>
#include <linux/posix_acl.h>
diff --git a/fs/buffer.c b/fs/buffer.c
index b4dcb34c9635..01d8310d8ef0 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2949,7 +2949,7 @@ static void guard_bh_eod(int rw, struct bio *bio, struct buffer_head *bh)
}
}
-int submit_bh(int rw, struct buffer_head * bh)
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags)
{
struct bio *bio;
int ret = 0;
@@ -2984,6 +2984,7 @@ int submit_bh(int rw, struct buffer_head * bh)
bio->bi_end_io = end_bio_bh_io_sync;
bio->bi_private = bh;
+ bio->bi_flags |= bio_flags;
/* Take care of bh's that straddle the end of the device */
guard_bh_eod(rw, bio, bh);
@@ -2997,6 +2998,12 @@ int submit_bh(int rw, struct buffer_head * bh)
bio_put(bio);
return ret;
}
+EXPORT_SYMBOL_GPL(_submit_bh);
+
+int submit_bh(int rw, struct buffer_head * bh)
+{
+ return _submit_bh(rw, bh, 0);
+}
EXPORT_SYMBOL(submit_bh);
/**
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index bf338d9b67e3..eb09f41ee52d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -7,6 +7,7 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include "super.h"
#include "mds_client.h"
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index 7a0dd99e4507..50b9868af4de 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -2520,7 +2520,8 @@ cifs_writev(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
/*
* We need to hold the sem to be sure nobody modifies lock list
diff --git a/fs/compat.c b/fs/compat.c
index 5f83ffa42115..4899d3b5cf4b 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -47,6 +47,7 @@
#include <linux/fs_struct.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/mmu_context.h>
diff --git a/fs/coredump.c b/fs/coredump.c
index c6479658d487..ae862fa030b7 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -263,7 +263,6 @@ static int zap_process(struct task_struct *start, int exit_code)
struct task_struct *t;
int nr = 0;
- start->signal->flags = SIGNAL_GROUP_EXIT;
start->signal->group_exit_code = exit_code;
start->signal->group_stop_count = 0;
@@ -280,8 +279,8 @@ static int zap_process(struct task_struct *start, int exit_code)
return nr;
}
-static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
- struct core_state *core_state, int exit_code)
+static int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
+ struct core_state *core_state, int exit_code)
{
struct task_struct *g, *p;
unsigned long flags;
@@ -291,11 +290,16 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (!signal_group_exit(tsk->signal)) {
mm->core_state = core_state;
nr = zap_process(tsk, exit_code);
+ tsk->signal->group_exit_task = tsk;
+ /* ignore all signals except SIGKILL, see prepare_signal() */
+ tsk->signal->flags = SIGNAL_GROUP_COREDUMP;
+ clear_tsk_thread_flag(tsk, TIF_SIGPENDING);
}
spin_unlock_irq(&tsk->sighand->siglock);
if (unlikely(nr < 0))
return nr;
+ tsk->flags = PF_DUMPCORE;
if (atomic_read(&mm->mm_users) == nr + 1)
goto done;
/*
@@ -340,6 +344,7 @@ static inline int zap_threads(struct task_struct *tsk, struct mm_struct *mm,
if (unlikely(p->mm == mm)) {
lock_task_sighand(p, &flags);
nr += zap_process(p, exit_code);
+ p->signal->flags = SIGNAL_GROUP_EXIT;
unlock_task_sighand(p, &flags);
}
break;
@@ -386,11 +391,18 @@ static int coredump_wait(int exit_code, struct core_state *core_state)
return core_waiters;
}
-static void coredump_finish(struct mm_struct *mm)
+static void coredump_finish(struct mm_struct *mm, bool core_dumped)
{
struct core_thread *curr, *next;
struct task_struct *task;
+ spin_lock_irq(&current->sighand->siglock);
+ if (core_dumped && !__fatal_signal_pending(current))
+ current->signal->group_exit_code |= 0x80;
+ current->signal->group_exit_task = NULL;
+ current->signal->flags = SIGNAL_GROUP_EXIT;
+ spin_unlock_irq(&current->sighand->siglock);
+
next = mm->core_state->dumper.next;
while ((curr = next) != NULL) {
next = curr->next;
@@ -407,6 +419,17 @@ static void coredump_finish(struct mm_struct *mm)
mm->core_state = NULL;
}
+static bool dump_interrupted(void)
+{
+ /*
+ * SIGKILL or freezing() interrupt the coredumping. Perhaps we
+ * can do try_to_freeze() and check __fatal_signal_pending(),
+ * but then we need to teach dump_write() to restart and clear
+ * TIF_SIGPENDING.
+ */
+ return signal_pending(current);
+}
+
static void wait_for_dump_helpers(struct file *file)
{
struct pipe_inode_info *pipe;
@@ -416,17 +439,20 @@ static void wait_for_dump_helpers(struct file *file)
pipe_lock(pipe);
pipe->readers++;
pipe->writers--;
+ wake_up_interruptible_sync(&pipe->wait);
+ kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
+ pipe_unlock(pipe);
- while ((pipe->readers > 1) && (!signal_pending(current))) {
- wake_up_interruptible_sync(&pipe->wait);
- kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN);
- pipe_wait(pipe);
- }
+ /*
+ * We actually want wait_event_freezable() but then we need
+ * to clear TIF_SIGPENDING and improve dump_interrupted().
+ */
+ wait_event_interruptible(pipe->wait, pipe->readers == 1);
+ pipe_lock(pipe);
pipe->readers--;
pipe->writers++;
pipe_unlock(pipe);
-
}
/*
@@ -471,6 +497,7 @@ void do_coredump(siginfo_t *siginfo)
int ispipe;
struct files_struct *displaced;
bool need_nonrelative = false;
+ bool core_dumped = false;
static atomic_t core_dump_count = ATOMIC_INIT(0);
struct coredump_params cprm = {
.siginfo = siginfo,
@@ -514,17 +541,12 @@ void do_coredump(siginfo_t *siginfo)
old_cred = override_creds(cred);
- /*
- * Clear any false indication of pending signals that might
- * be seen by the filesystem code called to write the core file.
- */
- clear_thread_flag(TIF_SIGPENDING);
-
ispipe = format_corename(&cn, &cprm);
- if (ispipe) {
+ if (ispipe) {
int dump_count;
char **helper_argv;
+ struct subprocess_info *sub_info;
if (ispipe < 0) {
printk(KERN_WARNING "format_corename failed\n");
@@ -571,15 +593,20 @@ void do_coredump(siginfo_t *siginfo)
goto fail_dropcount;
}
- retval = call_usermodehelper_fns(helper_argv[0], helper_argv,
- NULL, UMH_WAIT_EXEC, umh_pipe_setup,
- NULL, &cprm);
+ retval = -ENOMEM;
+ sub_info = call_usermodehelper_setup(helper_argv[0],
+ helper_argv, NULL, GFP_KERNEL,
+ umh_pipe_setup, NULL, &cprm);
+ if (sub_info)
+ retval = call_usermodehelper_exec(sub_info,
+ UMH_WAIT_EXEC);
+
argv_free(helper_argv);
if (retval) {
- printk(KERN_INFO "Core dump to %s pipe failed\n",
+ printk(KERN_INFO "Core dump to %s pipe failed\n",
cn.corename);
goto close_fail;
- }
+ }
} else {
struct inode *inode;
@@ -629,9 +656,8 @@ void do_coredump(siginfo_t *siginfo)
goto close_fail;
if (displaced)
put_files_struct(displaced);
- retval = binfmt->core_dump(&cprm);
- if (retval)
- current->signal->group_exit_code |= 0x80;
+
+ core_dumped = !dump_interrupted() && binfmt->core_dump(&cprm);
if (ispipe && core_pipe_limit)
wait_for_dump_helpers(cprm.file);
@@ -644,7 +670,7 @@ fail_dropcount:
fail_unlock:
kfree(cn.corename);
fail_corename:
- coredump_finish(mm);
+ coredump_finish(mm, core_dumped);
revert_creds(old_cred);
fail_creds:
put_cred(cred);
@@ -659,7 +685,9 @@ fail:
*/
int dump_write(struct file *file, const void *addr, int nr)
{
- return access_ok(VERIFY_READ, addr, nr) && file->f_op->write(file, addr, nr, &file->f_pos) == nr;
+ return !dump_interrupted() &&
+ access_ok(VERIFY_READ, addr, nr) &&
+ file->f_op->write(file, addr, nr, &file->f_pos) == nr;
}
EXPORT_SYMBOL(dump_write);
@@ -668,7 +696,8 @@ int dump_seek(struct file *file, loff_t off)
int ret = 1;
if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
- if (file->f_op->llseek(file, off, SEEK_CUR) < 0)
+ if (dump_interrupted() ||
+ file->f_op->llseek(file, off, SEEK_CUR) < 0)
return 0;
} else {
char *buf = (char *)get_zeroed_page(GFP_KERNEL);
diff --git a/fs/direct-io.c b/fs/direct-io.c
index f853263cf74f..d0255b75255f 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,6 +37,7 @@
#include <linux/uio.h>
#include <linux/atomic.h>
#include <linux/prefetch.h>
+#include <linux/aio.h>
/*
* How many user pages to map in one call to get_user_pages(). This determines
@@ -229,7 +230,8 @@ static inline struct page *dio_get_page(struct dio *dio,
* filesystems can use it to hold additional state between get_block calls and
* dio_complete.
*/
-static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async)
+static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is_async,
+ struct batch_complete *batch)
{
ssize_t transferred = 0;
@@ -263,7 +265,7 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret, bool is
} else {
inode_dio_done(dio->inode);
if (is_async)
- aio_complete(dio->iocb, ret, 0);
+ aio_complete_batch(dio->iocb, ret, 0, batch);
}
return ret;
@@ -273,7 +275,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio);
/*
* Asynchronous IO callback.
*/
-static void dio_bio_end_aio(struct bio *bio, int error)
+static void dio_bio_end_aio(struct bio *bio, int error, struct batch_complete *batch)
{
struct dio *dio = bio->bi_private;
unsigned long remaining;
@@ -289,7 +291,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
spin_unlock_irqrestore(&dio->bio_lock, flags);
if (remaining == 0) {
- dio_complete(dio, dio->iocb->ki_pos, 0, true);
+ dio_complete(dio, dio->iocb->ki_pos, 0, true, batch);
kmem_cache_free(dio_cache, dio);
}
}
@@ -328,7 +330,7 @@ void dio_end_io(struct bio *bio, int error)
struct dio *dio = bio->bi_private;
if (dio->is_async)
- dio_bio_end_aio(bio, error);
+ dio_bio_end_aio(bio, error, NULL);
else
dio_bio_end_io(bio, error);
}
@@ -349,9 +351,10 @@ dio_bio_alloc(struct dio *dio, struct dio_submit *sdio,
bio->bi_bdev = bdev;
bio->bi_sector = first_sector;
- if (dio->is_async)
- bio->bi_end_io = dio_bio_end_aio;
- else
+ if (dio->is_async) {
+ bio->bi_batch_end_io = dio_bio_end_aio;
+ bio->bi_flags |= 1 << BIO_BATCH_ENDIO;
+ } else
bio->bi_end_io = dio_bio_end_io;
sdio->bio = bio;
@@ -969,7 +972,8 @@ do_holes:
this_chunk_bytes = this_chunk_blocks << blkbits;
BUG_ON(this_chunk_bytes == 0);
- sdio->boundary = buffer_boundary(map_bh);
+ if (this_chunk_blocks == sdio->blocks_available)
+ sdio->boundary = buffer_boundary(map_bh);
ret = submit_page_section(dio, sdio, page,
offset_in_page,
this_chunk_bytes,
@@ -1272,7 +1276,7 @@ do_blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
dio_await_completion(dio);
if (drop_refcount(dio) == 0) {
- retval = dio_complete(dio, offset, retval, false);
+ retval = dio_complete(dio, offset, retval, false, NULL);
kmem_cache_free(dio_cache, dio);
} else
BUG_ON(retval != -EIOCBQUEUED);
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index c00e055b6282..f23d2a7ed438 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -58,6 +58,8 @@ int drop_caches_sysctl_handler(ctl_table *table, int write,
if (ret)
return ret;
if (write) {
+ printk(KERN_NOTICE "%s (%d): dropped kernel caches: %d\n",
+ current->comm, task_pid_nr(current), sysctl_drop_caches);
if (sysctl_drop_caches & 1)
iterate_supers(drop_pagecache_sb, NULL);
if (sysctl_drop_caches & 2)
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index 63b1f54b6a1f..201f0a0d6b0a 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -31,6 +31,7 @@
#include <linux/security.h>
#include <linux/compat.h>
#include <linux/fs_stack.h>
+#include <linux/aio.h>
#include "ecryptfs_kernel.h"
/**
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 495d15558f42..083f420b72f1 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -105,7 +105,7 @@
struct epoll_filefd {
struct file *file;
int fd;
-};
+} __packed;
/*
* Structure used to track possible nested calls, for too deep recursions
@@ -129,6 +129,8 @@ struct nested_calls {
/*
* Each file descriptor added to the eventpoll interface will
* have an entry of this type linked to the "rbr" RB tree.
+ * Avoid increasing the size of this struct, there can be many thousands
+ * of these on a server and we do not want this to take another cache line.
*/
struct epitem {
/* RB tree node used to link this structure to the eventpoll RB tree */
@@ -159,7 +161,7 @@ struct epitem {
struct list_head fllink;
/* wakeup_source used when EPOLLWAKEUP is set */
- struct wakeup_source *ws;
+ struct wakeup_source __rcu *ws;
/* The structure that describe the interested events and the source fd */
struct epoll_event event;
@@ -349,7 +351,7 @@ static inline struct epitem *ep_item_from_epqueue(poll_table *p)
/* Tells if the epoll_ctl(2) operation needs an event copy from userspace */
static inline int ep_op_has_event(int op)
{
- return op != EPOLL_CTL_DEL;
+ return op == EPOLL_CTL_ADD || op == EPOLL_CTL_MOD;
}
/* Initialize the poll safe wake up structure */
@@ -537,6 +539,38 @@ static void ep_unregister_pollwait(struct eventpoll *ep, struct epitem *epi)
}
}
+/* call only when ep->mtx is held */
+static inline struct wakeup_source *ep_wakeup_source(struct epitem *epi)
+{
+ return rcu_dereference_check(epi->ws, lockdep_is_held(&epi->ep->mtx));
+}
+
+/* call only when ep->mtx is held */
+static inline void ep_pm_stay_awake(struct epitem *epi)
+{
+ struct wakeup_source *ws = ep_wakeup_source(epi);
+
+ if (ws)
+ __pm_stay_awake(ws);
+}
+
+static inline bool ep_has_wakeup_source(struct epitem *epi)
+{
+ return rcu_access_pointer(epi->ws) ? true : false;
+}
+
+/* call when ep->mtx cannot be held (ep_poll_callback) */
+static inline void ep_pm_stay_awake_rcu(struct epitem *epi)
+{
+ struct wakeup_source *ws;
+
+ rcu_read_lock();
+ ws = rcu_dereference(epi->ws);
+ if (ws)
+ __pm_stay_awake(ws);
+ rcu_read_unlock();
+}
+
/**
* ep_scan_ready_list - Scans the ready list in a way that makes possible for
* the scan code, to call f_op->poll(). Also allows for
@@ -600,7 +634,7 @@ static int ep_scan_ready_list(struct eventpoll *ep,
*/
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
}
}
/*
@@ -669,7 +703,7 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
- wakeup_source_unregister(epi->ws);
+ wakeup_source_unregister(ep_wakeup_source(epi));
/* At this point it is safe to free the eventpoll item */
kmem_cache_free(epi_cache, epi);
@@ -679,6 +713,36 @@ static int ep_remove(struct eventpoll *ep, struct epitem *epi)
return 0;
}
+/*
+ * Disables a "struct epitem" in the eventpoll set. Returns -EBUSY if the item
+ * had no event flags set, indicating that another thread may be currently
+ * handling that item's events (in the case that EPOLLONESHOT was being
+ * used). Otherwise a zero result indicates that the item has been disabled
+ * from receiving events. A disabled item may be re-enabled via
+ * EPOLL_CTL_MOD. Must be called with "mtx" held.
+ */
+static int ep_disable(struct eventpoll *ep, struct epitem *epi)
+{
+ int result = 0;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ep->lock, flags);
+ if (epi->event.events & EPOLLONESHOT) {
+ if (epi->event.events & ~EP_PRIVATE_BITS) {
+ if (ep_is_linked(&epi->rdllink))
+ list_del_init(&epi->rdllink);
+ /* Ensure ep_poll_callback will not add epi back onto
+ ready list: */
+ epi->event.events &= EP_PRIVATE_BITS;
+ } else
+ result = -EBUSY;
+ } else
+ result = -EINVAL;
+ spin_unlock_irqrestore(&ep->lock, flags);
+
+ return result;
+}
+
static void ep_free(struct eventpoll *ep)
{
struct rb_node *rbp;
@@ -712,11 +776,15 @@ static void ep_free(struct eventpoll *ep)
* point we are sure no poll callbacks will be lingering around, and also by
* holding "epmutex" we can be sure that no file cleanup code will hit
* us during this operation. So we can avoid the lock on "ep->lock".
+ * We do not need to lock ep->mtx, either, we only do it to prevent
+ * a lockdep warning.
*/
+ mutex_lock(&ep->mtx);
while ((rbp = rb_first(&ep->rbr)) != NULL) {
epi = rb_entry(rbp, struct epitem, rbn);
ep_remove(ep, epi);
}
+ mutex_unlock(&ep->mtx);
mutex_unlock(&epmutex);
mutex_destroy(&ep->mtx);
@@ -735,6 +803,13 @@ static int ep_eventpoll_release(struct inode *inode, struct file *file)
return 0;
}
+static inline unsigned int ep_item_poll(struct epitem *epi, poll_table *pt)
+{
+ pt->_key = epi->event.events;
+
+ return epi->ffd.file->f_op->poll(epi->ffd.file, pt) & epi->event.events;
+}
+
static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
void *priv)
{
@@ -742,10 +817,9 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
poll_table pt;
init_poll_funcptr(&pt, NULL);
+
list_for_each_entry_safe(epi, tmp, head, rdllink) {
- pt._key = epi->event.events;
- if (epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
- epi->event.events)
+ if (ep_item_poll(epi, &pt))
return POLLIN | POLLRDNORM;
else {
/*
@@ -753,7 +827,7 @@ static int ep_read_events_proc(struct eventpoll *ep, struct list_head *head,
* callback, but it's not actually ready, as far as
* caller requested events goes. We can remove it here.
*/
- __pm_relax(epi->ws);
+ __pm_relax(ep_wakeup_source(epi));
list_del_init(&epi->rdllink);
}
}
@@ -985,7 +1059,7 @@ static int ep_poll_callback(wait_queue_t *wait, unsigned mode, int sync, void *k
/* If this file is already in the ready list we exit soon */
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
}
/*
@@ -1049,8 +1123,6 @@ static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi)
rb_insert_color(&epi->rbn, &ep->rbr);
}
-
-
#define PATH_ARR_SIZE 5
/*
* These are the number paths of length 1 to 5, that we are allowing to emanate
@@ -1147,6 +1219,7 @@ static int reverse_path_check(void)
static int ep_create_wakeup_source(struct epitem *epi)
{
const char *name;
+ struct wakeup_source *ws;
if (!epi->ep->ws) {
epi->ep->ws = wakeup_source_register("eventpoll");
@@ -1155,17 +1228,29 @@ static int ep_create_wakeup_source(struct epitem *epi)
}
name = epi->ffd.file->f_path.dentry->d_name.name;
- epi->ws = wakeup_source_register(name);
- if (!epi->ws)
+ ws = wakeup_source_register(name);
+
+ if (!ws)
return -ENOMEM;
+ rcu_assign_pointer(epi->ws, ws);
return 0;
}
-static void ep_destroy_wakeup_source(struct epitem *epi)
+/* rare code path, only used when EPOLL_CTL_MOD removes a wakeup source */
+static noinline void ep_destroy_wakeup_source(struct epitem *epi)
{
- wakeup_source_unregister(epi->ws);
- epi->ws = NULL;
+ struct wakeup_source *ws = ep_wakeup_source(epi);
+
+ rcu_assign_pointer(epi->ws, NULL);
+
+ /*
+ * wait for ep_pm_stay_awake_rcu to finish, synchronize_rcu is
+ * used internally by wakeup_source_remove, too (called by
+ * wakeup_source_unregister), so we cannot use call_rcu
+ */
+ synchronize_rcu();
+ wakeup_source_unregister(ws);
}
/*
@@ -1200,13 +1285,12 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
if (error)
goto error_create_wakeup_source;
} else {
- epi->ws = NULL;
+ RCU_INIT_POINTER(epi->ws, NULL);
}
/* Initialize the poll table using the queue callback */
epq.epi = epi;
init_poll_funcptr(&epq.pt, ep_ptable_queue_proc);
- epq.pt._key = event->events;
/*
* Attach the item to the poll hooks and get current event bits.
@@ -1215,7 +1299,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
* this operation completes, the poll callback can start hitting
* the new item.
*/
- revents = tfile->f_op->poll(tfile, &epq.pt);
+ revents = ep_item_poll(epi, &epq.pt);
/*
* We have to check if something went wrong during the poll wait queue
@@ -1248,7 +1332,7 @@ static int ep_insert(struct eventpoll *ep, struct epoll_event *event,
/* If the file is already "ready" we drop it inside the ready list */
if ((revents & event->events) && !ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1289,7 +1373,7 @@ error_unregister:
list_del_init(&epi->rdllink);
spin_unlock_irqrestore(&ep->lock, flags);
- wakeup_source_unregister(epi->ws);
+ wakeup_source_unregister(ep_wakeup_source(epi));
error_create_wakeup_source:
kmem_cache_free(epi_cache, epi);
@@ -1315,12 +1399,11 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* f_op->poll() call and the new event set registering.
*/
epi->event.events = event->events; /* need barrier below */
- pt._key = event->events;
epi->event.data = event->data; /* protected by mtx */
if (epi->event.events & EPOLLWAKEUP) {
- if (!epi->ws)
+ if (!ep_has_wakeup_source(epi))
ep_create_wakeup_source(epi);
- } else if (epi->ws) {
+ } else if (ep_has_wakeup_source(epi)) {
ep_destroy_wakeup_source(epi);
}
@@ -1348,7 +1431,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
* Get current event bits. We can safely use the file* here because
* its usage count has been increased by the caller of this function.
*/
- revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt);
+ revents = ep_item_poll(epi, &pt);
/*
* If the item is "hot" and it is not registered inside the ready
@@ -1358,7 +1441,7 @@ static int ep_modify(struct eventpoll *ep, struct epitem *epi, struct epoll_even
spin_lock_irq(&ep->lock);
if (!ep_is_linked(&epi->rdllink)) {
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
/* Notify waiting tasks that events are available */
if (waitqueue_active(&ep->wq))
@@ -1384,6 +1467,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
unsigned int revents;
struct epitem *epi;
struct epoll_event __user *uevent;
+ struct wakeup_source *ws;
poll_table pt;
init_poll_funcptr(&pt, NULL);
@@ -1406,14 +1490,16 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
* instead, but then epi->ws would temporarily be out of sync
* with ep_is_linked().
*/
- if (epi->ws && epi->ws->active)
- __pm_stay_awake(ep->ws);
- __pm_relax(epi->ws);
+ ws = ep_wakeup_source(epi);
+ if (ws) {
+ if (ws->active)
+ __pm_stay_awake(ep->ws);
+ __pm_relax(ws);
+ }
+
list_del_init(&epi->rdllink);
- pt._key = epi->event.events;
- revents = epi->ffd.file->f_op->poll(epi->ffd.file, &pt) &
- epi->event.events;
+ revents = ep_item_poll(epi, &pt);
/*
* If the event mask intersect the caller-requested one,
@@ -1425,7 +1511,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
if (__put_user(revents, &uevent->events) ||
__put_user(epi->event.data, &uevent->data)) {
list_add(&epi->rdllink, head);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
return eventcnt ? eventcnt : -EFAULT;
}
eventcnt++;
@@ -1445,7 +1531,7 @@ static int ep_send_events_proc(struct eventpoll *ep, struct list_head *head,
* poll callback will queue them in ep->ovflist.
*/
list_add_tail(&epi->rdllink, &ep->rdllist);
- __pm_stay_awake(epi->ws);
+ ep_pm_stay_awake(epi);
}
}
}
@@ -1836,6 +1922,12 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
} else
error = -ENOENT;
break;
+ case EPOLL_CTL_DISABLE:
+ if (epi)
+ error = ep_disable(ep, epi);
+ else
+ error = -ENOENT;
+ break;
}
mutex_unlock(&ep->mtx);
@@ -2011,6 +2103,12 @@ static int __init eventpoll_init(void)
/* Initialize the structure used to perform file's f_op->poll() calls */
ep_nested_calls_init(&poll_readywalk_ncalls);
+ /*
+ * We can have many thousands of epitems, so prevent this from
+ * using an extra cache line on 64-bit (and smaller) CPUs
+ */
+ BUILD_BUG_ON(sizeof(void *) <= 8 && sizeof(struct epitem) > 128);
+
/* Allocates slab cache used to allocate "struct epitem" items */
epi_cache = kmem_cache_create("eventpoll_epi", sizeof(struct epitem),
0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
diff --git a/fs/exec.c b/fs/exec.c
index a96a4885bbbf..260f89f66651 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -613,7 +613,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* when the old and new regions overlap clear from new_end.
*/
free_pgd_range(&tlb, new_end, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
} else {
/*
* otherwise, clean from old_start; this is done to not touch
@@ -622,7 +622,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
* for the others its just a little faster.
*/
free_pgd_range(&tlb, old_start, old_end, new_end,
- vma->vm_next ? vma->vm_next->vm_start : 0);
+ vma->vm_next ? vma->vm_next->vm_start : USER_PGTABLES_CEILING);
}
tlb_finish_mmu(&tlb, new_end, old_end);
@@ -1027,17 +1027,7 @@ EXPORT_SYMBOL_GPL(get_task_comm);
void set_task_comm(struct task_struct *tsk, char *buf)
{
task_lock(tsk);
-
trace_task_rename(tsk, buf);
-
- /*
- * Threads may access current->comm without holding
- * the task lock, so write the string carefully.
- * Readers without a lock may see incomplete new
- * names but are safe from non-terminating string reads.
- */
- memset(tsk->comm, 0, TASK_COMM_LEN);
- wmb();
strlcpy(tsk->comm, buf, sizeof(tsk->comm));
task_unlock(tsk);
perf_event_comm(tsk);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index fe60cc1117d8..0a87bb10998d 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -31,6 +31,7 @@
#include <linux/mpage.h>
#include <linux/fiemap.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include "ext2.h"
#include "acl.h"
#include "xip.h"
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index d512c4bc4ad7..eac4f041f5fc 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -27,6 +27,7 @@
#include <linux/writeback.h>
#include <linux/mpage.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include "ext3.h"
#include "xattr.h"
#include "acl.h"
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index fb5120a5505c..3dc48cc8b6eb 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2067,7 +2067,6 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
"writeback");
- sb->s_flags |= MS_SNAP_STABLE;
return 0;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 64848b595b24..4959e29573b6 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -23,6 +23,7 @@
#include <linux/jbd2.h>
#include <linux/mount.h>
#include <linux/path.h>
+#include <linux/aio.h>
#include <linux/quotaops.h>
#include <linux/pagevec.h>
#include "ext4.h"
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index b505a145a593..21de12366b47 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -20,6 +20,7 @@
* (sct@redhat.com), 1993, 1998
*/
+#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "truncate.h"
#include "ext4_extents.h" /* Needed for EXT_MAX_BLOCKS */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index b3a5213bc73e..f63f83d0f6ee 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -37,6 +37,7 @@
#include <linux/printk.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <linux/aio.h>
#include "ext4_jbd2.h"
#include "xattr.h"
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 047a6de04a0a..f6ae3164fce1 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -18,6 +18,7 @@
#include <linux/pagevec.h>
#include <linux/mpage.h>
#include <linux/namei.h>
+#include <linux/aio.h>
#include <linux/uio.h>
#include <linux/bio.h>
#include <linux/workqueue.h>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index ea8be6fc38f1..42c8410ae30b 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -12,6 +12,7 @@
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 165012ef363a..7a6f02caf286 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -964,6 +964,29 @@ int fat_scan(struct inode *dir, const unsigned char *name,
}
EXPORT_SYMBOL_GPL(fat_scan);
+/*
+ * Scans a directory for a given logstart.
+ * Returns an error code or zero.
+ */
+int fat_scan_logstart(struct inode *dir, int i_logstart,
+ struct fat_slot_info *sinfo)
+{
+ struct super_block *sb = dir->i_sb;
+
+ sinfo->slot_off = 0;
+ sinfo->bh = NULL;
+ while (fat_get_short_entry(dir, &sinfo->slot_off, &sinfo->bh,
+ &sinfo->de) >= 0) {
+ if (fat_get_start(MSDOS_SB(sb), sinfo->de) == i_logstart) {
+ sinfo->slot_off -= sizeof(*sinfo->de);
+ sinfo->nr_slots = 1;
+ sinfo->i_pos = fat_make_i_pos(sb, sinfo->bh, sinfo->de);
+ return 0;
+ }
+ }
+ return -ENOENT;
+}
+
static int __fat_remove_entries(struct inode *dir, loff_t pos, int nr_slots)
{
struct super_block *sb = dir->i_sb;
diff --git a/fs/fat/fat.h b/fs/fat/fat.h
index e9cc3f0d58e2..21664fcf3616 100644
--- a/fs/fat/fat.h
+++ b/fs/fat/fat.h
@@ -23,6 +23,9 @@
#define FAT_ERRORS_PANIC 2 /* panic on error */
#define FAT_ERRORS_RO 3 /* remount r/o on error */
+#define FAT_NFS_STALE_RW 1 /* NFS RW support, can cause ESTALE */
+#define FAT_NFS_NOSTALE_RO 2 /* NFS RO support, no ESTALE issue */
+
struct fat_mount_options {
kuid_t fs_uid;
kgid_t fs_gid;
@@ -34,6 +37,7 @@ struct fat_mount_options {
unsigned short shortname; /* flags for shortname display/create rule */
unsigned char name_check; /* r = relaxed, n = normal, s = strict */
unsigned char errors; /* On error: continue, panic, remount-ro */
+ unsigned char nfs; /* NFS support: nostale_ro, stale_rw */
unsigned short allow_utime;/* permission for setting the [am]time */
unsigned quiet:1, /* set = fake successful chmods and chowns */
showexec:1, /* set = only set x bit for com/exe/bat */
@@ -48,8 +52,7 @@ struct fat_mount_options {
usefree:1, /* Use free_clusters for FAT32 */
tz_set:1, /* Filesystem timestamps' offset set */
rodir:1, /* allow ATTR_RO for directory */
- discard:1, /* Issue discard requests on deletions */
- nfs:1; /* Do extra work needed for NFS export */
+ discard:1; /* Issue discard requests on deletions */
};
#define FAT_HASH_BITS 8
@@ -72,6 +75,7 @@ struct msdos_sb_info {
unsigned long root_cluster; /* first cluster of the root directory */
unsigned long fsinfo_sector; /* sector number of FAT32 fsinfo */
struct mutex fat_lock;
+ struct mutex nfs_build_inode_lock;
struct mutex s_lock;
unsigned int prev_free; /* previously allocated cluster number */
unsigned int free_clusters; /* -1 if undefined */
@@ -215,6 +219,27 @@ static inline sector_t fat_clus_to_blknr(struct msdos_sb_info *sbi, int clus)
+ sbi->data_start;
}
+static inline void fat_get_blknr_offset(struct msdos_sb_info *sbi,
+ loff_t i_pos, sector_t *blknr, int *offset)
+{
+ *blknr = i_pos >> sbi->dir_per_block_bits;
+ *offset = i_pos & (sbi->dir_per_block - 1);
+}
+
+static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
+ struct inode *inode)
+{
+ loff_t i_pos;
+#if BITS_PER_LONG == 32
+ spin_lock(&sbi->inode_hash_lock);
+#endif
+ i_pos = MSDOS_I(inode)->i_pos;
+#if BITS_PER_LONG == 32
+ spin_unlock(&sbi->inode_hash_lock);
+#endif
+ return i_pos;
+}
+
static inline void fat16_towchar(wchar_t *dst, const __u8 *src, size_t len)
{
#ifdef __BIG_ENDIAN
@@ -271,6 +296,8 @@ extern int fat_dir_empty(struct inode *dir);
extern int fat_subdirs(struct inode *dir);
extern int fat_scan(struct inode *dir, const unsigned char *name,
struct fat_slot_info *sinfo);
+extern int fat_scan_logstart(struct inode *dir, int i_logstart,
+ struct fat_slot_info *sinfo);
extern int fat_get_dotdot_entry(struct inode *dir, struct buffer_head **bh,
struct msdos_dir_entry **de);
extern int fat_alloc_new_dir(struct inode *dir, struct timespec *ts);
@@ -348,6 +375,7 @@ extern struct inode *fat_build_inode(struct super_block *sb,
extern int fat_sync_inode(struct inode *inode);
extern int fat_fill_super(struct super_block *sb, void *data, int silent,
int isvfat, void (*setup)(struct super_block *));
+extern int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de);
extern int fat_flush_inodes(struct super_block *sb, struct inode *i1,
struct inode *i2);
@@ -382,12 +410,8 @@ int fat_cache_init(void);
void fat_cache_destroy(void);
/* fat/nfs.c */
-struct fid;
-extern struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type);
-extern struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
- int fh_len, int fh_type);
-extern struct dentry *fat_get_parent(struct dentry *child_dir);
+extern const struct export_operations fat_export_ops;
+extern const struct export_operations fat_export_ops_nostale;
/* helper for printk */
typedef unsigned long long llu;
diff --git a/fs/fat/file.c b/fs/fat/file.c
index 3978f8ca1823..b0b632e50ddb 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -306,6 +306,11 @@ int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
struct inode *inode = dentry->d_inode;
generic_fillattr(inode, stat);
stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
+
+ if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
+ /* Use i_pos for ino. This is used as fileid of nfs. */
+ stat->ino = fat_i_pos_read(MSDOS_SB(inode->i_sb), inode);
+ }
return 0;
}
EXPORT_SYMBOL_GPL(fat_getattr);
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index acf6e479b443..dfce656ddb33 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -18,8 +18,8 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/buffer_head.h>
-#include <linux/exportfs.h>
#include <linux/mount.h>
+#include <linux/aio.h>
#include <linux/vfs.h>
#include <linux/parser.h>
#include <linux/uio.h>
@@ -385,7 +385,7 @@ static int fat_calc_dir_size(struct inode *inode)
}
/* doesn't deal with root inode */
-static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
+int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
{
struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
int error;
@@ -444,12 +444,25 @@ static int fat_fill_inode(struct inode *inode, struct msdos_dir_entry *de)
return 0;
}
+static inline void fat_lock_build_inode(struct msdos_sb_info *sbi)
+{
+ if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ mutex_lock(&sbi->nfs_build_inode_lock);
+}
+
+static inline void fat_unlock_build_inode(struct msdos_sb_info *sbi)
+{
+ if (sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ mutex_unlock(&sbi->nfs_build_inode_lock);
+}
+
struct inode *fat_build_inode(struct super_block *sb,
struct msdos_dir_entry *de, loff_t i_pos)
{
struct inode *inode;
int err;
+ fat_lock_build_inode(MSDOS_SB(sb));
inode = fat_iget(sb, i_pos);
if (inode)
goto out;
@@ -469,6 +482,7 @@ struct inode *fat_build_inode(struct super_block *sb,
fat_attach(inode, i_pos);
insert_inode_hash(inode);
out:
+ fat_unlock_build_inode(MSDOS_SB(sb));
return inode;
}
@@ -655,20 +669,6 @@ static int fat_statfs(struct dentry *dentry, struct kstatfs *buf)
return 0;
}
-static inline loff_t fat_i_pos_read(struct msdos_sb_info *sbi,
- struct inode *inode)
-{
- loff_t i_pos;
-#if BITS_PER_LONG == 32
- spin_lock(&sbi->inode_hash_lock);
-#endif
- i_pos = MSDOS_I(inode)->i_pos;
-#if BITS_PER_LONG == 32
- spin_unlock(&sbi->inode_hash_lock);
-#endif
- return i_pos;
-}
-
static int __fat_write_inode(struct inode *inode, int wait)
{
struct super_block *sb = inode->i_sb;
@@ -676,7 +676,8 @@ static int __fat_write_inode(struct inode *inode, int wait)
struct buffer_head *bh;
struct msdos_dir_entry *raw_entry;
loff_t i_pos;
- int err;
+ sector_t blocknr;
+ int err, offset;
if (inode->i_ino == MSDOS_ROOT_INO)
return 0;
@@ -686,7 +687,8 @@ retry:
if (!i_pos)
return 0;
- bh = sb_bread(sb, i_pos >> sbi->dir_per_block_bits);
+ fat_get_blknr_offset(sbi, i_pos, &blocknr, &offset);
+ bh = sb_bread(sb, blocknr);
if (!bh) {
fat_msg(sb, KERN_ERR, "unable to read inode block "
"for updating (i_pos %lld)", i_pos);
@@ -699,8 +701,7 @@ retry:
goto retry;
}
- raw_entry = &((struct msdos_dir_entry *) (bh->b_data))
- [i_pos & (sbi->dir_per_block - 1)];
+ raw_entry = &((struct msdos_dir_entry *) (bh->b_data))[offset];
if (S_ISDIR(inode->i_mode))
raw_entry->size = 0;
else
@@ -761,12 +762,6 @@ static const struct super_operations fat_sops = {
.show_options = fat_show_options,
};
-static const struct export_operations fat_export_ops = {
- .fh_to_dentry = fat_fh_to_dentry,
- .fh_to_parent = fat_fh_to_parent,
- .get_parent = fat_get_parent,
-};
-
static int fat_show_options(struct seq_file *m, struct dentry *root)
{
struct msdos_sb_info *sbi = MSDOS_SB(root->d_sb);
@@ -814,8 +809,6 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",usefree");
if (opts->quiet)
seq_puts(m, ",quiet");
- if (opts->nfs)
- seq_puts(m, ",nfs");
if (opts->showexec)
seq_puts(m, ",showexec");
if (opts->sys_immutable)
@@ -849,6 +842,10 @@ static int fat_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",errors=panic");
else
seq_puts(m, ",errors=remount-ro");
+ if (opts->nfs == FAT_NFS_NOSTALE_RO)
+ seq_puts(m, ",nfs=nostale_ro");
+ else if (opts->nfs)
+ seq_puts(m, ",nfs=stale_rw");
if (opts->discard)
seq_puts(m, ",discard");
@@ -865,7 +862,7 @@ enum {
Opt_uni_xl_no, Opt_uni_xl_yes, Opt_nonumtail_no, Opt_nonumtail_yes,
Opt_obsolete, Opt_flush, Opt_tz_utc, Opt_rodir, Opt_err_cont,
Opt_err_panic, Opt_err_ro, Opt_discard, Opt_nfs, Opt_time_offset,
- Opt_err,
+ Opt_nfs_stale_rw, Opt_nfs_nostale_ro, Opt_err,
};
static const match_table_t fat_tokens = {
@@ -895,7 +892,9 @@ static const match_table_t fat_tokens = {
{Opt_err_panic, "errors=panic"},
{Opt_err_ro, "errors=remount-ro"},
{Opt_discard, "discard"},
- {Opt_nfs, "nfs"},
+ {Opt_nfs_stale_rw, "nfs"},
+ {Opt_nfs_stale_rw, "nfs=stale_rw"},
+ {Opt_nfs_nostale_ro, "nfs=nostale_ro"},
{Opt_obsolete, "conv=binary"},
{Opt_obsolete, "conv=text"},
{Opt_obsolete, "conv=auto"},
@@ -1092,6 +1091,12 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_err_ro:
opts->errors = FAT_ERRORS_RO;
break;
+ case Opt_nfs_stale_rw:
+ opts->nfs = FAT_NFS_STALE_RW;
+ break;
+ case Opt_nfs_nostale_ro:
+ opts->nfs = FAT_NFS_NOSTALE_RO;
+ break;
/* msdos specific */
case Opt_dots:
@@ -1150,9 +1155,6 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
case Opt_discard:
opts->discard = 1;
break;
- case Opt_nfs:
- opts->nfs = 1;
- break;
/* obsolete mount options */
case Opt_obsolete:
@@ -1183,6 +1185,10 @@ out:
opts->allow_utime = ~opts->fs_dmask & (S_IWGRP | S_IWOTH);
if (opts->unicode_xlate)
opts->utf8 = 0;
+ if (opts->nfs == FAT_NFS_NOSTALE_RO) {
+ sb->s_flags |= MS_RDONLY;
+ sb->s_export_op = &fat_export_ops_nostale;
+ }
return 0;
}
@@ -1193,7 +1199,7 @@ static int fat_read_root(struct inode *inode)
struct msdos_sb_info *sbi = MSDOS_SB(sb);
int error;
- MSDOS_I(inode)->i_pos = 0;
+ MSDOS_I(inode)->i_pos = MSDOS_ROOT_INO;
inode->i_uid = sbi->options.fs_uid;
inode->i_gid = sbi->options.fs_gid;
inode->i_version++;
@@ -1256,6 +1262,7 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat,
sb->s_magic = MSDOS_SUPER_MAGIC;
sb->s_op = &fat_sops;
sb->s_export_op = &fat_export_ops;
+ mutex_init(&sbi->nfs_build_inode_lock);
ratelimit_state_init(&sbi->ratelimit, DEFAULT_RATELIMIT_INTERVAL,
DEFAULT_RATELIMIT_BURST);
diff --git a/fs/fat/nfs.c b/fs/fat/nfs.c
index 499c10438ca2..93e14933dcb6 100644
--- a/fs/fat/nfs.c
+++ b/fs/fat/nfs.c
@@ -14,6 +14,18 @@
#include <linux/exportfs.h>
#include "fat.h"
+struct fat_fid {
+ u32 i_gen;
+ u32 i_pos_low;
+ u16 i_pos_hi;
+ u16 parent_i_pos_hi;
+ u32 parent_i_pos_low;
+ u32 parent_i_gen;
+};
+
+#define FAT_FID_SIZE_WITHOUT_PARENT 3
+#define FAT_FID_SIZE_WITH_PARENT (sizeof(struct fat_fid)/sizeof(u32))
+
/**
* Look up a directory inode given its starting cluster.
*/
@@ -38,63 +50,252 @@ static struct inode *fat_dget(struct super_block *sb, int i_logstart)
return inode;
}
-static struct inode *fat_nfs_get_inode(struct super_block *sb,
- u64 ino, u32 generation)
+static struct inode *fat_ilookup(struct super_block *sb, u64 ino, loff_t i_pos)
{
- struct inode *inode;
+ if (MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO)
+ return fat_iget(sb, i_pos);
- if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO))
- return NULL;
+ else {
+ if ((ino < MSDOS_ROOT_INO) || (ino == MSDOS_FSINFO_INO))
+ return NULL;
+ return ilookup(sb, ino);
+ }
+}
+
+static struct inode *__fat_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation, loff_t i_pos)
+{
+ struct inode *inode = fat_ilookup(sb, ino, i_pos);
- inode = ilookup(sb, ino);
if (inode && generation && (inode->i_generation != generation)) {
iput(inode);
inode = NULL;
}
+ if (inode == NULL && MSDOS_SB(sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
+ struct buffer_head *bh = NULL;
+ struct msdos_dir_entry *de ;
+ sector_t blocknr;
+ int offset;
+ fat_get_blknr_offset(MSDOS_SB(sb), i_pos, &blocknr, &offset);
+ bh = sb_bread(sb, blocknr);
+ if (!bh) {
+ fat_msg(sb, KERN_ERR,
+ "unable to read block(%llu) for building NFS inode",
+ (llu)blocknr);
+ return inode;
+ }
+ de = (struct msdos_dir_entry *)bh->b_data;
+ /* If a file is deleted on server and client is not updated
+ * yet, we must not build the inode upon a lookup call.
+ */
+ if (IS_FREE(de[offset].name))
+ inode = NULL;
+ else
+ inode = fat_build_inode(sb, &de[offset], i_pos);
+ brelse(bh);
+ }
return inode;
}
+static struct inode *fat_nfs_get_inode(struct super_block *sb,
+ u64 ino, u32 generation)
+{
+
+ return __fat_nfs_get_inode(sb, ino, generation, 0);
+}
+
+static int
+fat_encode_fh_nostale(struct inode *inode, __u32 *fh, int *lenp,
+ struct inode *parent)
+{
+ int len = *lenp;
+ struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb);
+ struct fat_fid *fid = (struct fat_fid *) fh;
+ loff_t i_pos;
+ int type = FILEID_FAT_WITHOUT_PARENT;
+
+ if (parent) {
+ if (len < FAT_FID_SIZE_WITH_PARENT) {
+ *lenp = FAT_FID_SIZE_WITH_PARENT;
+ return FILEID_INVALID;
+ }
+ } else {
+ if (len < FAT_FID_SIZE_WITHOUT_PARENT) {
+ *lenp = FAT_FID_SIZE_WITHOUT_PARENT;
+ return FILEID_INVALID;
+ }
+ }
+
+ i_pos = fat_i_pos_read(sbi, inode);
+ *lenp = FAT_FID_SIZE_WITHOUT_PARENT;
+ fid->i_gen = inode->i_generation;
+ fid->i_pos_low = i_pos & 0xFFFFFFFF;
+ fid->i_pos_hi = (i_pos >> 32) & 0xFFFF;
+ if (parent) {
+ i_pos = fat_i_pos_read(sbi, parent);
+ fid->parent_i_pos_hi = (i_pos >> 32) & 0xFFFF;
+ fid->parent_i_pos_low = i_pos & 0xFFFFFFFF;
+ fid->parent_i_gen = parent->i_generation;
+ type = FILEID_FAT_WITH_PARENT;
+ *lenp = FAT_FID_SIZE_WITH_PARENT;
+ }
+
+ return type;
+}
+
/**
* Map a NFS file handle to a corresponding dentry.
* The dentry may or may not be connected to the filesystem root.
*/
-struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
+static struct dentry *fat_fh_to_dentry(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
fat_nfs_get_inode);
}
+static struct dentry *fat_fh_to_dentry_nostale(struct super_block *sb,
+ struct fid *fh, int fh_len,
+ int fh_type)
+{
+ struct inode *inode = NULL;
+ struct fat_fid *fid = (struct fat_fid *)fh;
+ loff_t i_pos;
+
+ switch (fh_type) {
+ case FILEID_FAT_WITHOUT_PARENT:
+ if (fh_len < FAT_FID_SIZE_WITHOUT_PARENT)
+ return NULL;
+ break;
+ case FILEID_FAT_WITH_PARENT:
+ if (fh_len < FAT_FID_SIZE_WITH_PARENT)
+ return NULL;
+ break;
+ default:
+ return NULL;
+ }
+ i_pos = fid->i_pos_hi;
+ i_pos = (i_pos << 32) | (fid->i_pos_low);
+ inode = __fat_nfs_get_inode(sb, 0, fid->i_gen, i_pos);
+
+ return d_obtain_alias(inode);
+}
+
/*
* Find the parent for a file specified by NFS handle.
* This requires that the handle contain the i_ino of the parent.
*/
-struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
+static struct dentry *fat_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
return generic_fh_to_parent(sb, fid, fh_len, fh_type,
fat_nfs_get_inode);
}
+static struct dentry *fat_fh_to_parent_nostale(struct super_block *sb,
+ struct fid *fh, int fh_len,
+ int fh_type)
+{
+ struct inode *inode = NULL;
+ struct fat_fid *fid = (struct fat_fid *)fh;
+ loff_t i_pos;
+
+ if (fh_len < FAT_FID_SIZE_WITH_PARENT)
+ return NULL;
+
+ switch (fh_type) {
+ case FILEID_FAT_WITH_PARENT:
+ i_pos = fid->parent_i_pos_hi;
+ i_pos = (i_pos << 32) | (fid->parent_i_pos_low);
+ inode = __fat_nfs_get_inode(sb, 0, fid->parent_i_gen, i_pos);
+ break;
+ }
+
+ return d_obtain_alias(inode);
+}
+
+/*
+ * Rebuild the parent for a directory that is not connected
+ * to the filesystem root
+ */
+static
+struct inode *fat_rebuild_parent(struct super_block *sb, int parent_logstart)
+{
+ int search_clus, clus_to_match;
+ struct msdos_dir_entry *de;
+ struct inode *parent = NULL;
+ struct inode *dummy_grand_parent = NULL;
+ struct fat_slot_info sinfo;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
+ sector_t blknr = fat_clus_to_blknr(sbi, parent_logstart);
+ struct buffer_head *parent_bh = sb_bread(sb, blknr);
+ if (!parent_bh) {
+ fat_msg(sb, KERN_ERR,
+ "unable to read cluster of parent directory");
+ return NULL;
+ }
+
+ de = (struct msdos_dir_entry *) parent_bh->b_data;
+ clus_to_match = fat_get_start(sbi, &de[0]);
+ search_clus = fat_get_start(sbi, &de[1]);
+
+ dummy_grand_parent = fat_dget(sb, search_clus);
+ if (!dummy_grand_parent) {
+ dummy_grand_parent = new_inode(sb);
+ if (!dummy_grand_parent) {
+ brelse(parent_bh);
+ return parent;
+ }
+
+ dummy_grand_parent->i_ino = iunique(sb, MSDOS_ROOT_INO);
+ fat_fill_inode(dummy_grand_parent, &de[1]);
+ MSDOS_I(dummy_grand_parent)->i_pos = -1;
+ }
+
+ if (!fat_scan_logstart(dummy_grand_parent, clus_to_match, &sinfo))
+ parent = fat_build_inode(sb, sinfo.de, sinfo.i_pos);
+
+ brelse(parent_bh);
+ iput(dummy_grand_parent);
+
+ return parent;
+}
+
/*
* Find the parent for a directory that is not currently connected to
* the filesystem root.
*
* On entry, the caller holds child_dir->d_inode->i_mutex.
*/
-struct dentry *fat_get_parent(struct dentry *child_dir)
+static struct dentry *fat_get_parent(struct dentry *child_dir)
{
struct super_block *sb = child_dir->d_sb;
struct buffer_head *bh = NULL;
struct msdos_dir_entry *de;
struct inode *parent_inode = NULL;
+ struct msdos_sb_info *sbi = MSDOS_SB(sb);
if (!fat_get_dotdot_entry(child_dir->d_inode, &bh, &de)) {
- int parent_logstart = fat_get_start(MSDOS_SB(sb), de);
+ int parent_logstart = fat_get_start(sbi, de);
parent_inode = fat_dget(sb, parent_logstart);
+ if (!parent_inode && sbi->options.nfs == FAT_NFS_NOSTALE_RO)
+ parent_inode = fat_rebuild_parent(sb, parent_logstart);
}
brelse(bh);
return d_obtain_alias(parent_inode);
}
+
+const struct export_operations fat_export_ops = {
+ .fh_to_dentry = fat_fh_to_dentry,
+ .fh_to_parent = fat_fh_to_parent,
+ .get_parent = fat_get_parent,
+};
+
+const struct export_operations fat_export_ops_nostale = {
+ .encode_fh = fat_encode_fh_nostale,
+ .fh_to_dentry = fat_fh_to_dentry_nostale,
+ .fh_to_parent = fat_fh_to_parent_nostale,
+ .get_parent = fat_get_parent,
+};
diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c
index 6f96a8def147..06b5e086ab3a 100644
--- a/fs/fuse/cuse.c
+++ b/fs/fuse/cuse.c
@@ -38,6 +38,7 @@
#include <linux/device.h>
#include <linux/file.h>
#include <linux/fs.h>
+#include <linux/aio.h>
#include <linux/kdev_t.h>
#include <linux/kthread.h>
#include <linux/list.h>
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 11dfa0c3fb46..06c569e492ed 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -19,6 +19,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/swap.h>
#include <linux/splice.h>
+#include <linux/aio.h>
MODULE_ALIAS_MISCDEV(FUSE_MINOR);
MODULE_ALIAS("devname:fuse");
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 34b80ba95bad..f2ae8fd6242c 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -15,6 +15,7 @@
#include <linux/module.h>
#include <linux/compat.h>
#include <linux/swap.h>
+#include <linux/aio.h>
static const struct file_operations fuse_direct_io_file_operations;
@@ -971,7 +972,8 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
return err;
count = ocount;
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
mutex_lock(&inode->i_mutex);
/* We can write back this queue in page reclaim */
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 24f414f0ce61..371bd144d802 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -20,6 +20,7 @@
#include <linux/swap.h>
#include <linux/gfs2_ondisk.h>
#include <linux/backing-dev.h>
+#include <linux/aio.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index d79c2dadc536..acd16764b133 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -25,6 +25,7 @@
#include <asm/uaccess.h>
#include <linux/dlm.h>
#include <linux/dlm_plock.h>
+#include <linux/aio.h>
#include "gfs2.h"
#include "incore.h"
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 3031dfdd2358..a9d60d46ba99 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include "hfs_fs.h"
#include "btree.h"
diff --git a/fs/hfsplus/bfind.c b/fs/hfsplus/bfind.c
index d73c98d1ee99..bbfdc1707725 100644
--- a/fs/hfsplus/bfind.c
+++ b/fs/hfsplus/bfind.c
@@ -56,7 +56,8 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode,
int *end,
int *cur_rec)
{
- __be32 cur_cnid, search_cnid;
+ __be32 cur_cnid;
+ __be32 search_cnid;
if (bnode->tree->cnid == HFSPLUS_EXT_CNID) {
cur_cnid = fd->key->ext.cnid;
@@ -67,8 +68,11 @@ int hfs_find_1st_rec_by_cnid(struct hfs_bnode *bnode,
} else if (bnode->tree->cnid == HFSPLUS_ATTR_CNID) {
cur_cnid = fd->key->attr.cnid;
search_cnid = fd->search_key->attr.cnid;
- } else
+ } else {
+ cur_cnid = 0; /* used-uninitialized warning */
+ search_cnid = 0;
BUG();
+ }
if (cur_cnid == search_cnid) {
(*end) = (*cur_rec);
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 160ccc9cdb4b..cdd181d8ba09 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -14,6 +14,7 @@
#include <linux/pagemap.h>
#include <linux/mpage.h>
#include <linux/sched.h>
+#include <linux/aio.h>
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c
index 86b39b167c23..b91b688987a9 100644
--- a/fs/jbd/commit.c
+++ b/fs/jbd/commit.c
@@ -163,7 +163,7 @@ static void journal_do_submit_data(struct buffer_head **wbuf, int bufs,
for (i = 0; i < bufs; i++) {
wbuf[i]->b_end_io = end_buffer_write_sync;
/* We use-up our safety reference in submit_bh() */
- submit_bh(write_op, wbuf[i]);
+ _submit_bh(write_op, wbuf[i], 1 << BIO_SNAP_STABLE);
}
}
@@ -667,7 +667,7 @@ start_journal_io:
clear_buffer_dirty(bh);
set_buffer_uptodate(bh);
bh->b_end_io = journal_end_buffer_io_sync;
- submit_bh(write_op, bh);
+ _submit_bh(write_op, bh, 1 << BIO_SNAP_STABLE);
}
cond_resched();
diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index b7dc47ba675e..1781f06aa1c1 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -23,6 +23,7 @@
#include <linux/pagemap.h>
#include <linux/quotaops.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include "jfs_incore.h"
#include "jfs_inode.h"
#include "jfs_filsys.h"
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 6b49f14eac8c..1e92930d59c3 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -25,7 +25,7 @@
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include "nilfs.h"
#include "btnode.h"
#include "segment.h"
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index 5b2d4f0853ac..b870ae00517a 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -27,6 +27,7 @@
#include <linux/swap.h>
#include <linux/uio.h>
#include <linux/writeback.h>
+#include <linux/aio.h>
#include <asm/page.h>
#include <asm/uaccess.h>
@@ -2129,7 +2130,8 @@ static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
mutex_lock(&inode->i_mutex);
ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d3e118cc6ffa..2778b0255dc6 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -28,6 +28,7 @@
#include <linux/quotaops.h>
#include <linux/slab.h>
#include <linux/log2.h>
+#include <linux/aio.h>
#include "aops.h"
#include "attrib.h"
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index ffb2da370a99..f671e49beb34 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,6 +22,8 @@
#ifndef OCFS2_AOPS_H
#define OCFS2_AOPS_H
+#include <linux/aio.h>
+
handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
struct page *page,
unsigned from,
diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c
index 12ae194ac943..3a44a648dae7 100644
--- a/fs/ocfs2/dlmglue.c
+++ b/fs/ocfs2/dlmglue.c
@@ -2322,7 +2322,7 @@ int ocfs2_inode_lock_full_nested(struct inode *inode,
status = __ocfs2_cluster_lock(osb, lockres, level, dlm_flags,
arg_flags, subclass, _RET_IP_);
if (status < 0) {
- if (status != -EAGAIN && status != -EIOCBRETRY)
+ if (status != -EAGAIN)
mlog_errno(status);
goto bail;
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 6474cb44004d..859cef7e8b4a 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -2248,7 +2248,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (iocb->ki_left == 0)
return 0;
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
appending = file->f_flags & O_APPEND ? 1 : 0;
direct_io = file->f_flags & O_DIRECT ? 1 : 0;
@@ -2468,6 +2469,9 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
out->f_path.dentry->d_name.len,
out->f_path.dentry->d_name.name, len);
+ if (!sb_start_file_write(out))
+ return -EAGAIN;
+
if (pipe->inode)
mutex_lock_nested(&pipe->inode->i_mutex, I_MUTEX_PARENT);
@@ -2506,6 +2510,7 @@ static ssize_t ocfs2_file_splice_write(struct pipe_inode_info *pipe,
balance_dirty_pages_ratelimited(mapping);
}
+ sb_end_write(inode->i_sb);
return ret;
}
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index 88924a3133fa..c765bdf6d60e 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -28,6 +28,8 @@
#include "extent_map.h"
+struct iocb;
+
/* OCFS2 Inode Private Data */
struct ocfs2_inode_info
{
diff --git a/fs/ocfs2/ioctl.c b/fs/ocfs2/ioctl.c
index 752f0b26221d..6cab301a568d 100644
--- a/fs/ocfs2/ioctl.c
+++ b/fs/ocfs2/ioctl.c
@@ -101,13 +101,6 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if (!S_ISDIR(inode->i_mode))
flags &= ~OCFS2_DIRSYNC_FL;
- handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
- if (IS_ERR(handle)) {
- status = PTR_ERR(handle);
- mlog_errno(status);
- goto bail_unlock;
- }
-
oldflags = ocfs2_inode->ip_attr;
flags = flags & mask;
flags |= oldflags & ~mask;
@@ -120,7 +113,14 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if ((oldflags & OCFS2_IMMUTABLE_FL) || ((flags ^ oldflags) &
(OCFS2_APPEND_FL | OCFS2_IMMUTABLE_FL))) {
if (!capable(CAP_LINUX_IMMUTABLE))
- goto bail_commit;
+ goto bail_unlock;
+ }
+
+ handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS);
+ if (IS_ERR(handle)) {
+ status = PTR_ERR(handle);
+ mlog_errno(status);
+ goto bail_unlock;
}
ocfs2_inode->ip_attr = flags;
@@ -130,8 +130,8 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
if (status < 0)
mlog_errno(status);
-bail_commit:
ocfs2_commit_trans(osb, handle);
+
bail_unlock:
ocfs2_inode_unlock(inode, 1);
bail:
diff --git a/fs/pipe.c b/fs/pipe.c
index 2234f3f61f8d..34a643dd22dc 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -21,6 +21,7 @@
#include <linux/audit.h>
#include <linux/syscalls.h>
#include <linux/fcntl.h>
+#include <linux/aio.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
diff --git a/fs/proc/Makefile b/fs/proc/Makefile
index 712f24db9600..ab30716584f5 100644
--- a/fs/proc/Makefile
+++ b/fs/proc/Makefile
@@ -5,7 +5,7 @@
obj-y += proc.o
proc-y := nommu.o task_nommu.o
-proc-$(CONFIG_MMU) := mmu.o task_mmu.o
+proc-$(CONFIG_MMU) := task_mmu.o
proc-y += inode.o root.o base.o generic.o array.o \
fd.o
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 4b3b3ffb52f1..c5450183ca78 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -181,14 +181,16 @@ proc_file_read(struct file *file, char __user *buf, size_t nbytes,
{
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ rcu_read_unlock();
rv = __proc_file_read(file, buf, nbytes, ppos);
@@ -204,13 +206,16 @@ proc_file_write(struct file *file, const char __user *buffer,
ssize_t rv = -EIO;
if (pde->write_proc) {
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ const struct file_operations *fops;
+
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ rcu_read_unlock();
/* FIXME: does this routine need ppos? probably... */
rv = pde->write_proc(file, buffer, count, pde->data);
@@ -542,7 +547,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
if (S_ISDIR(dp->mode)) {
if (dp->proc_iops == NULL) {
- dp->proc_fops = &proc_dir_operations;
+ RCU_INIT_POINTER(dp->proc_fops, &proc_dir_operations);
dp->proc_iops = &proc_dir_inode_operations;
}
dir->nlink++;
@@ -551,7 +556,7 @@ static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp
dp->proc_iops = &proc_link_inode_operations;
} else if (S_ISREG(dp->mode)) {
if (dp->proc_fops == NULL)
- dp->proc_fops = &proc_file_operations;
+ RCU_INIT_POINTER(dp->proc_fops, &proc_file_operations);
if (dp->proc_iops == NULL)
dp->proc_iops = &proc_file_inode_operations;
}
@@ -604,7 +609,8 @@ static struct proc_dir_entry *__proc_create(struct proc_dir_entry **parent,
ent->mode = mode;
ent->nlink = nlink;
atomic_set(&ent->count, 1);
- spin_lock_init(&ent->pde_unload_lock);
+ atomic_set(&ent->pde_users, 1);
+ spin_lock_init(&ent->pde_openers_lock);
INIT_LIST_HEAD(&ent->pde_openers);
out:
return ent;
@@ -728,7 +734,7 @@ struct proc_dir_entry *proc_create_data(const char *name, umode_t mode,
pde = __proc_create(&parent, name, mode, nlink);
if (!pde)
goto out;
- pde->proc_fops = proc_fops;
+ rcu_assign_pointer(pde->proc_fops, proc_fops);
pde->data = data;
if (proc_register(parent, pde) < 0)
goto out_free;
@@ -764,6 +770,7 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
struct proc_dir_entry *de = NULL;
const char *fn = name;
unsigned int len;
+ DECLARE_COMPLETION_ONSTACK(c);
spin_lock(&proc_subdir_lock);
if (__xlate_proc_name(name, &parent, &fn) != 0) {
@@ -786,37 +793,30 @@ void remove_proc_entry(const char *name, struct proc_dir_entry *parent)
return;
}
- spin_lock(&de->pde_unload_lock);
/*
* Stop accepting new callers into module. If you're
* dynamically allocating ->proc_fops, save a pointer somewhere.
*/
- de->proc_fops = NULL;
- /* Wait until all existing callers into module are done. */
- if (de->pde_users > 0) {
- DECLARE_COMPLETION_ONSTACK(c);
-
- if (!de->pde_unload_completion)
- de->pde_unload_completion = &c;
-
- spin_unlock(&de->pde_unload_lock);
-
+ rcu_assign_pointer(de->proc_fops, NULL);
+ synchronize_rcu();
+ /*
+ * Wait until all existing callers into module are done.
+ * Once pde_users hits zero we are free to clean out pde_openers.
+ */
+ de->pde_unload_completion = &c;
+ if (!atomic_dec_and_test(&de->pde_users))
wait_for_completion(de->pde_unload_completion);
- spin_lock(&de->pde_unload_lock);
- }
-
+ spin_lock(&de->pde_openers_lock);
while (!list_empty(&de->pde_openers)) {
struct pde_opener *pdeo;
pdeo = list_first_entry(&de->pde_openers, struct pde_opener, lh);
list_del(&pdeo->lh);
- spin_unlock(&de->pde_unload_lock);
pdeo->release(pdeo->inode, pdeo->file);
kfree(pdeo);
- spin_lock(&de->pde_unload_lock);
}
- spin_unlock(&de->pde_unload_lock);
+ spin_unlock(&de->pde_openers_lock);
if (S_ISDIR(de->mode))
parent->nlink--;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a86aebc9ba7c..f53660a471c2 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -129,46 +129,41 @@ static const struct super_operations proc_sops = {
.show_options = proc_show_options,
};
-static void __pde_users_dec(struct proc_dir_entry *pde)
-{
- pde->pde_users--;
- if (pde->pde_unload_completion && pde->pde_users == 0)
- complete(pde->pde_unload_completion);
-}
-
void pde_users_dec(struct proc_dir_entry *pde)
{
- spin_lock(&pde->pde_unload_lock);
- __pde_users_dec(pde);
- spin_unlock(&pde->pde_unload_lock);
+ if (atomic_dec_and_test(&pde->pde_users) && pde->pde_unload_completion)
+ complete(pde->pde_unload_completion);
}
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
+ const struct file_operations *fops;
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
loff_t (*llseek)(struct file *, loff_t, int);
- spin_lock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
/*
* remove_proc_entry() is going to delete PDE (as part of module
* cleanup sequence). No new callers into module allowed.
*/
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
/*
* Bump refcount so that remove_proc_entry will wail for ->llseek to
* complete.
*/
- pde->pde_users++;
+ atomic_inc(&pde->pde_users);
+
/*
- * Save function pointer under lock, to protect against ->proc_fops
- * NULL'ifying right after ->pde_unload_lock is dropped.
+ * Save function pointer under rcu lock, to protect against
+ * ->proc_fops NULL'ifying by remove_proc_entry.
*/
- llseek = pde->proc_fops->llseek;
- spin_unlock(&pde->pde_unload_lock);
+ llseek = fops->llseek;
+ rcu_read_unlock();
if (!llseek)
llseek = default_llseek;
@@ -183,15 +178,17 @@ static ssize_t proc_reg_read(struct file *file, char __user *buf, size_t count,
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
ssize_t (*read)(struct file *, char __user *, size_t, loff_t *);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- read = pde->proc_fops->read;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ read = fops->read;
+ rcu_read_unlock();
if (read)
rv = read(file, buf, count, ppos);
@@ -205,15 +202,17 @@ static ssize_t proc_reg_write(struct file *file, const char __user *buf, size_t
struct proc_dir_entry *pde = PDE(file_inode(file));
ssize_t rv = -EIO;
ssize_t (*write)(struct file *, const char __user *, size_t, loff_t *);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- write = pde->proc_fops->write;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ write = fops->write;
+ rcu_read_unlock();
if (write)
rv = write(file, buf, count, ppos);
@@ -227,15 +226,17 @@ static unsigned int proc_reg_poll(struct file *file, struct poll_table_struct *p
struct proc_dir_entry *pde = PDE(file_inode(file));
unsigned int rv = DEFAULT_POLLMASK;
unsigned int (*poll)(struct file *, struct poll_table_struct *);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- poll = pde->proc_fops->poll;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ poll = fops->poll;
+ rcu_read_unlock();
if (poll)
rv = poll(file, pts);
@@ -249,15 +250,17 @@ static long proc_reg_unlocked_ioctl(struct file *file, unsigned int cmd, unsigne
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
long (*ioctl)(struct file *, unsigned int, unsigned long);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- ioctl = pde->proc_fops->unlocked_ioctl;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ ioctl = fops->unlocked_ioctl;
+ rcu_read_unlock();
if (ioctl)
rv = ioctl(file, cmd, arg);
@@ -272,15 +275,17 @@ static long proc_reg_compat_ioctl(struct file *file, unsigned int cmd, unsigned
struct proc_dir_entry *pde = PDE(file_inode(file));
long rv = -ENOTTY;
long (*compat_ioctl)(struct file *, unsigned int, unsigned long);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- compat_ioctl = pde->proc_fops->compat_ioctl;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ compat_ioctl = fops->compat_ioctl;
+ rcu_read_unlock();
if (compat_ioctl)
rv = compat_ioctl(file, cmd, arg);
@@ -295,15 +300,17 @@ static int proc_reg_mmap(struct file *file, struct vm_area_struct *vma)
struct proc_dir_entry *pde = PDE(file_inode(file));
int rv = -EIO;
int (*mmap)(struct file *, struct vm_area_struct *);
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
return rv;
}
- pde->pde_users++;
- mmap = pde->proc_fops->mmap;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ mmap = fops->mmap;
+ rcu_read_unlock();
if (mmap)
rv = mmap(file, vma);
@@ -319,6 +326,7 @@ static int proc_reg_open(struct inode *inode, struct file *file)
int (*open)(struct inode *, struct file *);
int (*release)(struct inode *, struct file *);
struct pde_opener *pdeo;
+ const struct file_operations *fops;
/*
* What for, you ask? Well, we can have open, rmmod, remove_proc_entry
@@ -334,32 +342,33 @@ static int proc_reg_open(struct inode *inode, struct file *file)
if (!pdeo)
return -ENOMEM;
- spin_lock(&pde->pde_unload_lock);
- if (!pde->proc_fops) {
- spin_unlock(&pde->pde_unload_lock);
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
kfree(pdeo);
return -ENOENT;
}
- pde->pde_users++;
- open = pde->proc_fops->open;
- release = pde->proc_fops->release;
- spin_unlock(&pde->pde_unload_lock);
+ atomic_inc(&pde->pde_users);
+ open = fops->open;
+ release = fops->release;
+ rcu_read_unlock();
if (open)
rv = open(inode, file);
- spin_lock(&pde->pde_unload_lock);
if (rv == 0 && release) {
/* To know what to release. */
pdeo->inode = inode;
pdeo->file = file;
/* Strictly for "too late" ->release in proc_reg_release(). */
pdeo->release = release;
+ spin_lock(&pde->pde_openers_lock);
list_add(&pdeo->lh, &pde->pde_openers);
+ spin_unlock(&pde->pde_openers_lock);
} else
kfree(pdeo);
- __pde_users_dec(pde);
- spin_unlock(&pde->pde_unload_lock);
+ pde_users_dec(pde);
return rv;
}
@@ -368,10 +377,14 @@ static struct pde_opener *find_pde_opener(struct proc_dir_entry *pde,
{
struct pde_opener *pdeo;
+ spin_lock(&pde->pde_openers_lock);
list_for_each_entry(pdeo, &pde->pde_openers, lh) {
- if (pdeo->inode == inode && pdeo->file == file)
+ if (pdeo->inode == inode && pdeo->file == file) {
+ spin_unlock(&pde->pde_openers_lock);
return pdeo;
+ }
}
+ spin_unlock(&pde->pde_openers_lock);
return NULL;
}
@@ -381,10 +394,13 @@ static int proc_reg_release(struct inode *inode, struct file *file)
int rv = 0;
int (*release)(struct inode *, struct file *);
struct pde_opener *pdeo;
+ const struct file_operations *fops;
- spin_lock(&pde->pde_unload_lock);
pdeo = find_pde_opener(pde, inode, file);
- if (!pde->proc_fops) {
+ rcu_read_lock();
+ fops = rcu_dereference(pde->proc_fops);
+ if (!fops) {
+ rcu_read_unlock();
/*
* Can't simply exit, __fput() will think that everything is OK,
* and move on to freeing struct file. remove_proc_entry() will
@@ -394,21 +410,23 @@ static int proc_reg_release(struct inode *inode, struct file *file)
* But if opener is removed from list, who will ->release it?
*/
if (pdeo) {
+ spin_lock(&pde->pde_openers_lock);
list_del(&pdeo->lh);
- spin_unlock(&pde->pde_unload_lock);
+ spin_unlock(&pde->pde_openers_lock);
rv = pdeo->release(inode, file);
kfree(pdeo);
- } else
- spin_unlock(&pde->pde_unload_lock);
+ }
return rv;
}
- pde->pde_users++;
- release = pde->proc_fops->release;
+ atomic_inc(&pde->pde_users);
+ release = fops->release;
+ rcu_read_unlock();
if (pdeo) {
+ spin_lock(&pde->pde_openers_lock);
list_del(&pdeo->lh);
- kfree(pdeo);
+ spin_unlock(&pde->pde_openers_lock);
}
- spin_unlock(&pde->pde_unload_lock);
+ kfree(pdeo);
if (release)
rv = release(inode, file);
@@ -447,6 +465,7 @@ static const struct file_operations proc_reg_file_ops_no_compat = {
struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
{
struct inode *inode = iget_locked(sb, de->low_ino);
+ const struct file_operations *fops;
if (inode && (inode->i_state & I_NEW)) {
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
@@ -463,19 +482,22 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
set_nlink(inode, de->nlink);
if (de->proc_iops)
inode->i_op = de->proc_iops;
- if (de->proc_fops) {
+ rcu_read_lock();
+ fops = rcu_dereference(de->proc_fops);
+ if (fops) {
if (S_ISREG(inode->i_mode)) {
#ifdef CONFIG_COMPAT
- if (!de->proc_fops->compat_ioctl)
+ if (!fops->compat_ioctl)
inode->i_fop =
&proc_reg_file_ops_no_compat;
else
#endif
inode->i_fop = &proc_reg_file_ops;
} else {
- inode->i_fop = de->proc_fops;
+ inode->i_fop = fops;
}
}
+ rcu_read_unlock();
unlock_new_inode(inode);
} else
pde_put(de);
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 85ff3a4598b3..75710357a517 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -30,24 +30,6 @@ extern int proc_net_init(void);
static inline int proc_net_init(void) { return 0; }
#endif
-struct vmalloc_info {
- unsigned long used;
- unsigned long largest_chunk;
-};
-
-#ifdef CONFIG_MMU
-#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
-extern void get_vmalloc_info(struct vmalloc_info *vmi);
-#else
-
-#define VMALLOC_TOTAL 0UL
-#define get_vmalloc_info(vmi) \
-do { \
- (vmi)->used = 0; \
- (vmi)->largest_chunk = 0; \
-} while(0)
-#endif
-
extern int proc_tid_stat(struct seq_file *m, struct pid_namespace *ns,
struct pid *pid, struct task_struct *task);
extern int proc_tgid_stat(struct seq_file *m, struct pid_namespace *ns,
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 1efaaa19c4f3..5aa847a603c0 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -11,6 +11,7 @@
#include <linux/swap.h>
#include <linux/vmstat.h>
#include <linux/atomic.h>
+#include <linux/vmalloc.h>
#include <asm/page.h>
#include <asm/pgtable.h>
#include "internal.h"
diff --git a/fs/proc/mmu.c b/fs/proc/mmu.c
deleted file mode 100644
index 8ae221dfd010..000000000000
--- a/fs/proc/mmu.c
+++ /dev/null
@@ -1,60 +0,0 @@
-/* mmu.c: mmu memory info files
- *
- * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells (dhowells@redhat.com)
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-#include <linux/spinlock.h>
-#include <linux/vmalloc.h>
-#include <linux/highmem.h>
-#include <asm/pgtable.h>
-#include "internal.h"
-
-void get_vmalloc_info(struct vmalloc_info *vmi)
-{
- struct vm_struct *vma;
- unsigned long free_area_size;
- unsigned long prev_end;
-
- vmi->used = 0;
-
- if (!vmlist) {
- vmi->largest_chunk = VMALLOC_TOTAL;
- }
- else {
- vmi->largest_chunk = 0;
-
- prev_end = VMALLOC_START;
-
- read_lock(&vmlist_lock);
-
- for (vma = vmlist; vma; vma = vma->next) {
- unsigned long addr = (unsigned long) vma->addr;
-
- /*
- * Some archs keep another range for modules in vmlist
- */
- if (addr < VMALLOC_START)
- continue;
- if (addr >= VMALLOC_END)
- break;
-
- vmi->used += vma->size;
-
- free_area_size = addr - prev_end;
- if (vmi->largest_chunk < free_area_size)
- vmi->largest_chunk = free_area_size;
-
- prev_end = vma->size + addr;
- }
-
- if (VMALLOC_END - prev_end > vmi->largest_chunk)
- vmi->largest_chunk = VMALLOC_END - prev_end;
-
- read_unlock(&vmlist_lock);
- }
-}
diff --git a/fs/read_write.c b/fs/read_write.c
index f738e4dccfab..b81aebe2a5ce 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -9,6 +9,7 @@
#include <linux/fcntl.h>
#include <linux/file.h>
#include <linux/uio.h>
+#include <linux/aio.h>
#include <linux/fsnotify.h>
#include <linux/security.h>
#include <linux/export.h>
@@ -325,16 +326,6 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
}
-static void wait_on_retry_sync_kiocb(struct kiocb *iocb)
-{
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!kiocbIsKicked(iocb))
- schedule();
- else
- kiocbClearKicked(iocb);
- __set_current_state(TASK_RUNNING);
-}
-
ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
{
struct iovec iov = { .iov_base = buf, .iov_len = len };
@@ -346,13 +337,7 @@ ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *pp
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
@@ -402,13 +387,7 @@ ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, lof
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos);
if (-EIOCBQUEUED == ret)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
@@ -559,13 +538,7 @@ ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov,
kiocb.ki_left = len;
kiocb.ki_nbytes = len;
- for (;;) {
- ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
- if (ret != -EIOCBRETRY)
- break;
- wait_on_retry_sync_kiocb(&kiocb);
- }
-
+ ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos);
if (ret == -EIOCBQUEUED)
ret = wait_on_sync_kiocb(&kiocb);
*ppos = kiocb.ki_pos;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ea5061fd4f3e..77d6d47abc83 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -18,6 +18,7 @@
#include <linux/writeback.h>
#include <linux/quotaops.h>
#include <linux/swap.h>
+#include <linux/aio.h>
int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to);
diff --git a/fs/splice.c b/fs/splice.c
index 23ade0e5c559..186ec03700d1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1000,7 +1000,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
};
ssize_t ret;
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(out))
+ return -EAGAIN;
pipe_lock(pipe);
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index f12189d2db1d..14374530784c 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -50,6 +50,7 @@
*/
#include "ubifs.h"
+#include <linux/aio.h>
#include <linux/mount.h>
#include <linux/namei.h>
#include <linux/slab.h>
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 7a12e48ad819..b6d15d349810 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -38,6 +38,7 @@
#include <linux/slab.h>
#include <linux/crc-itu-t.h>
#include <linux/mpage.h>
+#include <linux/aio.h>
#include "udf_i.h"
#include "udf_sb.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 5f707e537171..c24ce0e9c67c 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -31,6 +31,7 @@
#include "xfs_vnodeops.h"
#include "xfs_trace.h"
#include "xfs_bmap.h"
+#include <linux/aio.h>
#include <linux/gfp.h>
#include <linux/mpage.h>
#include <linux/pagevec.h>
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index f03bf1a456fb..a81aa74a7263 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -36,6 +36,7 @@
#include "xfs_ioctl.h"
#include "xfs_trace.h"
+#include <linux/aio.h>
#include <linux/dcache.h>
#include <linux/falloc.h>
#include <linux/pagevec.h>
@@ -775,7 +776,8 @@ xfs_file_aio_write(
if (ocount == 0)
return 0;
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
ret = -EIO;
diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h
new file mode 100644
index 000000000000..d06079c774a0
--- /dev/null
+++ b/include/asm-generic/hugetlb.h
@@ -0,0 +1,40 @@
+#ifndef _ASM_GENERIC_HUGETLB_H
+#define _ASM_GENERIC_HUGETLB_H
+
+static inline pte_t mk_huge_pte(struct page *page, pgprot_t pgprot)
+{
+ return mk_pte(page, pgprot);
+}
+
+static inline int huge_pte_write(pte_t pte)
+{
+ return pte_write(pte);
+}
+
+static inline int huge_pte_dirty(pte_t pte)
+{
+ return pte_dirty(pte);
+}
+
+static inline pte_t huge_pte_mkwrite(pte_t pte)
+{
+ return pte_mkwrite(pte);
+}
+
+static inline pte_t huge_pte_mkdirty(pte_t pte)
+{
+ return pte_mkdirty(pte);
+}
+
+static inline pte_t huge_pte_modify(pte_t pte, pgprot_t newprot)
+{
+ return pte_modify(pte, newprot);
+}
+
+static inline void huge_pte_clear(struct mm_struct *mm, unsigned long addr,
+ pte_t *ptep)
+{
+ pte_clear(mm, addr, ptep);
+}
+
+#endif /* _ASM_GENERIC_HUGETLB_H */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index bfd87685fc1f..a59ff51b0166 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -7,6 +7,16 @@
#include <linux/mm_types.h>
#include <linux/bug.h>
+/*
+ * On almost all architectures and configurations, 0 can be used as the
+ * upper ceiling to free_pgtables(): on many architectures it has the same
+ * effect as using TASK_SIZE. However, there is one configuration which
+ * must impose a more careful limit, to avoid freeing kernel pgtables.
+ */
+#ifndef USER_PGTABLES_CEILING
+#define USER_PGTABLES_CEILING 0UL
+#endif
+
#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
extern int ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address, pte_t *ptep,
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 31ff6dba4872..a7e4c595825e 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -6,94 +6,38 @@
#include <linux/aio_abi.h>
#include <linux/uio.h>
#include <linux/rcupdate.h>
-
#include <linux/atomic.h>
-
-#define AIO_MAXSEGS 4
-#define AIO_KIOGRP_NR_ATOMIC 8
+#include <linux/batch_complete.h>
struct kioctx;
+struct kiocb;
+struct batch_complete;
-/* Notes on cancelling a kiocb:
- * If a kiocb is cancelled, aio_complete may return 0 to indicate
- * that cancel has not yet disposed of the kiocb. All cancel
- * operations *must* call aio_put_req to dispose of the kiocb
- * to guard against races with the completion code.
- */
-#define KIOCB_C_CANCELLED 0x01
-#define KIOCB_C_COMPLETE 0x02
-
-#define KIOCB_SYNC_KEY (~0U)
+#define KIOCB_KEY 0
-/* ki_flags bits */
/*
- * This may be used for cancel/retry serialization in the future, but
- * for now it's unused and we probably don't want modules to even
- * think they can use it.
+ * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
+ * cancelled or completed (this makes a certain amount of sense because
+ * successful cancellation - io_cancel() - does deliver the completion to
+ * userspace).
+ *
+ * And since most things don't implement kiocb cancellation and we'd really like
+ * kiocb completion to be lockless when possible, we use ki_cancel to
+ * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
+ * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
*/
-/* #define KIF_LOCKED 0 */
-#define KIF_KICKED 1
-#define KIF_CANCELLED 2
-
-#define kiocbTryLock(iocb) test_and_set_bit(KIF_LOCKED, &(iocb)->ki_flags)
-#define kiocbTryKick(iocb) test_and_set_bit(KIF_KICKED, &(iocb)->ki_flags)
+#define KIOCB_CANCELLED ((void *) (~0ULL))
-#define kiocbSetLocked(iocb) set_bit(KIF_LOCKED, &(iocb)->ki_flags)
-#define kiocbSetKicked(iocb) set_bit(KIF_KICKED, &(iocb)->ki_flags)
-#define kiocbSetCancelled(iocb) set_bit(KIF_CANCELLED, &(iocb)->ki_flags)
+typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
-#define kiocbClearLocked(iocb) clear_bit(KIF_LOCKED, &(iocb)->ki_flags)
-#define kiocbClearKicked(iocb) clear_bit(KIF_KICKED, &(iocb)->ki_flags)
-#define kiocbClearCancelled(iocb) clear_bit(KIF_CANCELLED, &(iocb)->ki_flags)
-
-#define kiocbIsLocked(iocb) test_bit(KIF_LOCKED, &(iocb)->ki_flags)
-#define kiocbIsKicked(iocb) test_bit(KIF_KICKED, &(iocb)->ki_flags)
-#define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags)
-
-/* is there a better place to document function pointer methods? */
-/**
- * ki_retry - iocb forward progress callback
- * @kiocb: The kiocb struct to advance by performing an operation.
- *
- * This callback is called when the AIO core wants a given AIO operation
- * to make forward progress. The kiocb argument describes the operation
- * that is to be performed. As the operation proceeds, perhaps partially,
- * ki_retry is expected to update the kiocb with progress made. Typically
- * ki_retry is set in the AIO core and it itself calls file_operations
- * helpers.
- *
- * ki_retry's return value determines when the AIO operation is completed
- * and an event is generated in the AIO event ring. Except the special
- * return values described below, the value that is returned from ki_retry
- * is transferred directly into the completion ring as the operation's
- * resulting status. Once this has happened ki_retry *MUST NOT* reference
- * the kiocb pointer again.
- *
- * If ki_retry returns -EIOCBQUEUED it has made a promise that aio_complete()
- * will be called on the kiocb pointer in the future. The AIO core will
- * not ask the method again -- ki_retry must ensure forward progress.
- * aio_complete() must be called once and only once in the future, multiple
- * calls may result in undefined behaviour.
- *
- * If ki_retry returns -EIOCBRETRY it has made a promise that kick_iocb()
- * will be called on the kiocb pointer in the future. This may happen
- * through generic helpers that associate kiocb->ki_wait with a wait
- * queue head that ki_retry uses via current->io_wait. It can also happen
- * with custom tracking and manual calls to kick_iocb(), though that is
- * discouraged. In either case, kick_iocb() must be called once and only
- * once. ki_retry must ensure forward progress, the AIO core will wait
- * indefinitely for kick_iocb() to be called.
- */
struct kiocb {
- struct list_head ki_run_list;
- unsigned long ki_flags;
- int ki_users;
- unsigned ki_key; /* id of this request */
+ struct rb_node ki_node;
+
+ atomic_t ki_users;
struct file *ki_filp;
- struct kioctx *ki_ctx; /* may be NULL for sync ops */
- int (*ki_cancel)(struct kiocb *, struct io_event *);
- ssize_t (*ki_retry)(struct kiocb *);
+ struct kioctx *ki_ctx; /* NULL for sync ops */
+ kiocb_cancel_fn *ki_cancel;
void (*ki_dtor)(struct kiocb *);
union {
@@ -102,6 +46,9 @@ struct kiocb {
} ki_obj;
__u64 ki_user_data; /* user's data for completion */
+ long ki_res;
+ long ki_res2;
+
loff_t ki_pos;
void *private;
@@ -117,7 +64,6 @@ struct kiocb {
struct list_head ki_list; /* the aio core uses this
* for cancellation */
- struct list_head ki_batch; /* batch allocation */
/*
* If the aio_resfd field of the userspace iocb is not zero,
@@ -128,108 +74,55 @@ struct kiocb {
static inline bool is_sync_kiocb(struct kiocb *kiocb)
{
- return kiocb->ki_key == KIOCB_SYNC_KEY;
+ return kiocb->ki_ctx == NULL;
}
static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp)
{
*kiocb = (struct kiocb) {
- .ki_users = 1,
- .ki_key = KIOCB_SYNC_KEY,
+ .ki_users = ATOMIC_INIT(1),
+ .ki_ctx = NULL,
.ki_filp = filp,
.ki_obj.tsk = current,
};
}
-#define AIO_RING_MAGIC 0xa10a10a1
-#define AIO_RING_COMPAT_FEATURES 1
-#define AIO_RING_INCOMPAT_FEATURES 0
-struct aio_ring {
- unsigned id; /* kernel internal index number */
- unsigned nr; /* number of io_events */
- unsigned head;
- unsigned tail;
-
- unsigned magic;
- unsigned compat_features;
- unsigned incompat_features;
- unsigned header_length; /* size of aio_ring */
-
-
- struct io_event io_events[0];
-}; /* 128 bytes + ring size */
-
-#define AIO_RING_PAGES 8
-struct aio_ring_info {
- unsigned long mmap_base;
- unsigned long mmap_size;
-
- struct page **ring_pages;
- spinlock_t ring_lock;
- long nr_pages;
-
- unsigned nr, tail;
-
- struct page *internal_pages[AIO_RING_PAGES];
-};
-
-static inline unsigned aio_ring_avail(struct aio_ring_info *info,
- struct aio_ring *ring)
-{
- return (ring->head + info->nr - 1 - ring->tail) % info->nr;
-}
-
-struct kioctx {
- atomic_t users;
- int dead;
- struct mm_struct *mm;
-
- /* This needs improving */
- unsigned long user_id;
- struct hlist_node list;
-
- wait_queue_head_t wait;
-
- spinlock_t ctx_lock;
-
- int reqs_active;
- struct list_head active_reqs; /* used for cancellation */
- struct list_head run_list; /* used for kicked reqs */
-
- /* sys_io_setup currently limits this to an unsigned int */
- unsigned max_reqs;
-
- struct aio_ring_info ring_info;
-
- struct delayed_work wq;
-
- struct rcu_head rcu_head;
-};
-
/* prototypes */
-extern unsigned aio_max_size;
-
#ifdef CONFIG_AIO
extern ssize_t wait_on_sync_kiocb(struct kiocb *iocb);
-extern int aio_put_req(struct kiocb *iocb);
-extern void kick_iocb(struct kiocb *iocb);
-extern int aio_complete(struct kiocb *iocb, long res, long res2);
+extern void aio_put_req(struct kiocb *iocb);
+extern void batch_complete_aio(struct batch_complete *batch);
+extern void aio_complete_batch(struct kiocb *iocb, long res, long res2,
+ struct batch_complete *batch);
struct mm_struct;
extern void exit_aio(struct mm_struct *mm);
extern long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user *__user *iocbpp, bool compat);
+void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
#else
static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
-static inline int aio_put_req(struct kiocb *iocb) { return 0; }
-static inline void kick_iocb(struct kiocb *iocb) { }
-static inline int aio_complete(struct kiocb *iocb, long res, long res2) { return 0; }
+static inline void aio_put_req(struct kiocb *iocb) { }
+
+static inline void batch_complete_aio(struct batch_complete *batch) { }
+static inline void aio_complete_batch(struct kiocb *iocb, long res, long res2,
+ struct batch_complete *batch)
+{
+ return;
+}
struct mm_struct;
static inline void exit_aio(struct mm_struct *mm) { }
static inline long do_io_submit(aio_context_t ctx_id, long nr,
struct iocb __user * __user *iocbpp,
bool compat) { return 0; }
+static inline void kiocb_set_cancel_fn(struct kiocb *req,
+ kiocb_cancel_fn *cancel) { }
#endif /* CONFIG_AIO */
+static inline void aio_complete(struct kiocb *iocb, long res, long res2)
+{
+ aio_complete_batch(iocb, res, res2, NULL);
+}
+
static inline struct kiocb *list_kiocb(struct list_head *h)
{
return list_entry(h, struct kiocb, ki_list);
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index f7f1d7169b11..6fd5cc80f62f 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -213,8 +213,15 @@ static inline bool balloon_compaction_check(void)
return true;
}
+static inline void balloon_event_count(enum vm_event_item item)
+{
+ count_vm_event(item);
+}
#else /* !CONFIG_BALLOON_COMPACTION */
+/* A macro, to avoid generating references to the undefined COMPACTBALLOON* */
+#define balloon_event_count(item) do { } while (0)
+
static inline void *balloon_mapping_alloc(void *balloon_device,
const struct address_space_operations *a_ops)
{
diff --git a/include/linux/batch_complete.h b/include/linux/batch_complete.h
new file mode 100644
index 000000000000..8167a9d306fb
--- /dev/null
+++ b/include/linux/batch_complete.h
@@ -0,0 +1,23 @@
+#ifndef _LINUX_BATCH_COMPLETE_H
+#define _LINUX_BATCH_COMPLETE_H
+
+#include <linux/rbtree.h>
+
+/*
+ * Common stuff to the aio and block code for batch completion. Everything
+ * important is elsewhere:
+ */
+
+struct bio;
+
+struct bio_list {
+ struct bio *head;
+ struct bio *tail;
+};
+
+struct batch_complete {
+ struct bio_list bio;
+ struct rb_root kiocb;
+};
+
+#endif
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 820e7aaad4fd..5f549176745c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -24,6 +24,7 @@
#include <linux/mempool.h>
#include <linux/ioprio.h>
#include <linux/bug.h>
+#include <linux/batch_complete.h>
#ifdef CONFIG_BLOCK
@@ -68,6 +69,8 @@
#define bio_segments(bio) ((bio)->bi_vcnt - (bio)->bi_idx)
#define bio_sectors(bio) ((bio)->bi_size >> 9)
+void bio_endio_batch(struct bio *bio, int error, struct batch_complete *batch);
+
static inline unsigned int bio_cur_bytes(struct bio *bio)
{
if (bio->bi_vcnt)
@@ -241,7 +244,25 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask)
}
-extern void bio_endio(struct bio *, int);
+/**
+ * bio_endio - end I/O on a bio
+ * @bio: bio
+ * @error: error, if any
+ *
+ * Description:
+ * bio_endio() will end I/O on the whole bio. bio_endio() is the
+ * preferred way to end I/O on a bio, it takes care of clearing
+ * BIO_UPTODATE on error. @error is 0 on success, and and one of the
+ * established -Exxxx (-EIO, for instance) error values in case
+ * something went wrong. No one should call bi_end_io() directly on a
+ * bio unless they own it and thus know that it has an end_io
+ * function.
+ **/
+static inline void bio_endio(struct bio *bio, int error)
+{
+ bio_endio_batch(bio, error, NULL);
+}
+
struct request_queue;
extern int bio_phys_segments(struct request_queue *, struct bio *);
@@ -420,10 +441,6 @@ static inline bool bio_mergeable(struct bio *bio)
* member of the bio. The bio_list also caches the last list member to allow
* fast access to the tail.
*/
-struct bio_list {
- struct bio *head;
- struct bio *tail;
-};
static inline int bio_list_empty(const struct bio_list *bl)
{
@@ -527,6 +544,15 @@ static inline struct bio *bio_list_get(struct bio_list *bl)
return bio;
}
+static inline void batch_complete_init(struct batch_complete *batch)
+{
+ bio_list_init(&batch->bio);
+ batch->kiocb = RB_ROOT;
+}
+
+void batch_complete(struct batch_complete *batch);
+
+
#if defined(CONFIG_BLK_DEV_INTEGRITY)
#define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)]))
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index cdf11191e645..7259893d3180 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -16,7 +16,9 @@ struct page;
struct block_device;
struct io_context;
struct cgroup_subsys_state;
+struct batch_complete;
typedef void (bio_end_io_t) (struct bio *, int);
+typedef void (bio_batch_end_io_t) (struct bio *, int, struct batch_complete *);
typedef void (bio_destructor_t) (struct bio *);
/*
@@ -42,6 +44,7 @@ struct bio {
* top bits priority
*/
+ short bi_error;
unsigned short bi_vcnt; /* how many bio_vec's */
unsigned short bi_idx; /* current index into bvl_vec */
@@ -59,7 +62,10 @@ struct bio {
unsigned int bi_seg_front_size;
unsigned int bi_seg_back_size;
- bio_end_io_t *bi_end_io;
+ union {
+ bio_end_io_t *bi_end_io;
+ bio_batch_end_io_t *bi_batch_end_io;
+ };
void *bi_private;
#ifdef CONFIG_BLK_CGROUP
@@ -111,12 +117,14 @@ struct bio {
#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */
#define BIO_QUIET 10 /* Make BIO Quiet */
#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
+#define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */
+#define BIO_BATCH_ENDIO 13
/*
* Flags starting here get preserved by bio_reset() - this includes
* BIO_POOL_IDX()
*/
-#define BIO_RESET_BITS 12
+#define BIO_RESET_BITS 14
#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag)))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 78feda9bbae2..2f91edb6e2d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -877,7 +877,8 @@ extern struct request *blk_fetch_request(struct request_queue *q);
* This prevents code duplication in drivers.
*/
extern bool blk_update_request(struct request *rq, int error,
- unsigned int nr_bytes);
+ unsigned int nr_bytes,
+ struct batch_complete *batch);
extern bool blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
extern void blk_end_request_all(struct request *rq, int error);
@@ -885,10 +886,17 @@ extern bool blk_end_request_cur(struct request *rq, int error);
extern bool blk_end_request_err(struct request *rq, int error);
extern bool __blk_end_request(struct request *rq, int error,
unsigned int nr_bytes);
-extern void __blk_end_request_all(struct request *rq, int error);
extern bool __blk_end_request_cur(struct request *rq, int error);
extern bool __blk_end_request_err(struct request *rq, int error);
+extern void blk_end_request_all_batch(struct request *rq, int error,
+ struct batch_complete *batch);
+
+static inline void __blk_end_request_all(struct request *rq, int error)
+{
+ blk_end_request_all_batch(rq, error, NULL);
+}
+
extern void blk_complete_request(struct request *);
extern void __blk_complete_request(struct request *);
extern void blk_abort_request(struct request *);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 5afc4f94d110..4c16c4a88d47 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -181,6 +181,7 @@ void ll_rw_block(int, int, struct buffer_head * bh[]);
int sync_dirty_buffer(struct buffer_head *bh);
int __sync_dirty_buffer(struct buffer_head *bh, int rw);
void write_dirty_buffer(struct buffer_head *bh, int rw);
+int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags);
int submit_bh(int, struct buffer_head *);
void write_boundary_block(struct block_device *bdev,
sector_t bblock, unsigned blocksize);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 01c48c6806d6..2eaedc1a0d05 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,6 +27,7 @@ struct cgroup_subsys;
struct inode;
struct cgroup;
struct css_id;
+struct eventfd_ctx;
extern int cgroup_init_early(void);
extern int cgroup_init(void);
@@ -720,13 +721,6 @@ void free_css_id(struct cgroup_subsys *ss, struct cgroup_subsys_state *css);
struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id);
-/*
- * Get a cgroup whose id is greater than or equal to id under tree of root.
- * Returning a cgroup_subsys_state or NULL.
- */
-struct cgroup_subsys_state *css_get_next(struct cgroup_subsys *ss, int id,
- struct cgroup_subsys_state *root, int *foundid);
-
/* Returns true if root is ancestor of cg */
bool css_is_ancestor(struct cgroup_subsys_state *cg,
const struct cgroup_subsys_state *root);
diff --git a/include/linux/cleancache.h b/include/linux/cleancache.h
index 42e55deee757..4ce9056b31a8 100644
--- a/include/linux/cleancache.h
+++ b/include/linux/cleancache.h
@@ -33,7 +33,7 @@ struct cleancache_ops {
void (*invalidate_fs)(int);
};
-extern struct cleancache_ops
+extern struct cleancache_ops *
cleancache_register_ops(struct cleancache_ops *ops);
extern void __cleancache_init_fs(struct super_block *);
extern void __cleancache_init_shared_fs(char *, struct super_block *);
@@ -42,9 +42,9 @@ extern void __cleancache_put_page(struct page *);
extern void __cleancache_invalidate_page(struct address_space *, struct page *);
extern void __cleancache_invalidate_inode(struct address_space *);
extern void __cleancache_invalidate_fs(struct super_block *);
-extern int cleancache_enabled;
#ifdef CONFIG_CLEANCACHE
+#define cleancache_enabled (1)
static inline bool cleancache_fs_enabled(struct page *page)
{
return page->mapping->host->i_sb->cleancache_poolid >= 0;
diff --git a/include/linux/console.h b/include/linux/console.h
index d4101cc467c4..c5448f37cd6c 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -142,6 +142,7 @@ struct console {
for (con = console_drivers; con != NULL; con = con->next)
extern int console_set_on_cmdline;
+extern struct console *early_console;
extern int add_preferred_console(char *name, int idx, char *options);
extern int update_console_cmdline(char *name, int idx, char *name_new, int idx_new, char *options);
diff --git a/include/linux/debug_locks.h b/include/linux/debug_locks.h
index a975de1ff59f..822c1354f3a6 100644
--- a/include/linux/debug_locks.h
+++ b/include/linux/debug_locks.h
@@ -27,7 +27,7 @@ extern int debug_locks_off(void);
\
if (!oops_in_progress && unlikely(c)) { \
if (debug_locks_off() && !debug_locks_silent) \
- WARN_ON(1); \
+ WARN(1, "DEBUG_LOCKS_WARN_ON(%s)", #c); \
__ret = 1; \
} \
__ret; \
diff --git a/include/linux/decompress/unlz4.h b/include/linux/decompress/unlz4.h
new file mode 100644
index 000000000000..d5b68bf3ec92
--- /dev/null
+++ b/include/linux/decompress/unlz4.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZ4_H
+#define DECOMPRESS_UNLZ4_H
+
+int unlz4(unsigned char *inbuf, int len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *output,
+ int *pos,
+ void(*error)(char *x));
+#endif
diff --git a/include/linux/errno.h b/include/linux/errno.h
index f6bf082d4d4f..89627b9187f9 100644
--- a/include/linux/errno.h
+++ b/include/linux/errno.h
@@ -28,6 +28,5 @@
#define EBADTYPE 527 /* Type not supported by server */
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
-#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */
#endif
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 5b9b5b317180..41b223a59a63 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -85,6 +85,17 @@ enum fid_type {
FILEID_NILFS_WITH_PARENT = 0x62,
/*
+ * 32 bit generation number, 40 bit i_pos.
+ */
+ FILEID_FAT_WITHOUT_PARENT = 0x71,
+
+ /*
+ * 32 bit generation number, 40 bit i_pos,
+ * 32 bit parent generation number, 40 bit parent i_pos
+ */
+ FILEID_FAT_WITH_PARENT = 0x72,
+
+ /*
* Filesystems must not use 0xff file ID.
*/
FILEID_INVALID = 0xff,
diff --git a/include/linux/frontswap.h b/include/linux/frontswap.h
index 30442547b9e6..8293262401de 100644
--- a/include/linux/frontswap.h
+++ b/include/linux/frontswap.h
@@ -14,7 +14,7 @@ struct frontswap_ops {
};
extern bool frontswap_enabled;
-extern struct frontswap_ops
+extern struct frontswap_ops *
frontswap_register_ops(struct frontswap_ops *ops);
extern void frontswap_shrink(unsigned long);
extern unsigned long frontswap_curr_pages(void);
@@ -22,33 +22,19 @@ extern void frontswap_writethrough(bool);
#define FRONTSWAP_HAS_EXCLUSIVE_GETS
extern void frontswap_tmem_exclusive_gets(bool);
-extern void __frontswap_init(unsigned type);
+extern bool __frontswap_test(struct swap_info_struct *, pgoff_t);
+extern void __frontswap_init(unsigned type, unsigned long *map);
extern int __frontswap_store(struct page *page);
extern int __frontswap_load(struct page *page);
extern void __frontswap_invalidate_page(unsigned, pgoff_t);
extern void __frontswap_invalidate_area(unsigned);
#ifdef CONFIG_FRONTSWAP
+#define frontswap_enabled (1)
static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
{
- bool ret = false;
-
- if (frontswap_enabled && sis->frontswap_map)
- ret = test_bit(offset, sis->frontswap_map);
- return ret;
-}
-
-static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
-{
- if (frontswap_enabled && sis->frontswap_map)
- set_bit(offset, sis->frontswap_map);
-}
-
-static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
-{
- if (frontswap_enabled && sis->frontswap_map)
- clear_bit(offset, sis->frontswap_map);
+ return __frontswap_test(sis, offset);
}
static inline void frontswap_map_set(struct swap_info_struct *p,
@@ -71,14 +57,6 @@ static inline bool frontswap_test(struct swap_info_struct *sis, pgoff_t offset)
return false;
}
-static inline void frontswap_set(struct swap_info_struct *sis, pgoff_t offset)
-{
-}
-
-static inline void frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
-{
-}
-
static inline void frontswap_map_set(struct swap_info_struct *p,
unsigned long *map)
{
@@ -120,10 +98,10 @@ static inline void frontswap_invalidate_area(unsigned type)
__frontswap_invalidate_area(type);
}
-static inline void frontswap_init(unsigned type)
+static inline void frontswap_init(unsigned type, unsigned long *map)
{
if (frontswap_enabled)
- __frontswap_init(type);
+ __frontswap_init(type, map);
}
#endif /* _LINUX_FRONTSWAP_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c28271ab9d4..4c4f0e488313 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -675,9 +675,11 @@ static inline loff_t i_size_read(const struct inode *inode)
static inline void i_size_write(struct inode *inode, loff_t i_size)
{
#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+ preempt_disable();
write_seqcount_begin(&inode->i_size_seqcount);
inode->i_size = i_size;
write_seqcount_end(&inode->i_size_seqcount);
+ preempt_enable();
#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
preempt_disable();
inode->i_size = i_size;
@@ -1400,6 +1402,16 @@ static inline int sb_start_write_trylock(struct super_block *sb)
return __sb_start_write(sb, SB_FREEZE_WRITE, false);
}
+/*
+ * sb_start_write() for writing into a file. When file has O_NONBLOCK set,
+ * we use trylock semantics, otherwise we block on frozen filesystem.
+ */
+static inline int sb_start_file_write(struct file *file)
+{
+ return __sb_start_write(file->f_mapping->host->i_sb, SB_FREEZE_WRITE,
+ !(file->f_flags & O_NONBLOCK));
+}
+
/**
* sb_start_pagefault - get write access to a superblock from a page fault
* @sb: the super we write to
diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
index c1d6555d2567..f3cec6856a4b 100644
--- a/include/linux/hardirq.h
+++ b/include/linux/hardirq.h
@@ -94,6 +94,11 @@
*/
#define in_nmi() (preempt_count() & NMI_MASK)
+/*
+ * Are we in nmi,irq context, or softirq context?
+ */
+#define in_serving_irq() (in_nmi() || in_irq() || in_serving_softirq())
+
#if defined(CONFIG_PREEMPT_COUNT)
# define PREEMPT_CHECK_OFFSET 1
#else
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 95d0850584da..fbcadb64d78f 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -1317,6 +1317,18 @@ void vmbus_driver_unregister(struct hv_driver *hv_driver);
0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, \
0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40 \
}
+
+/*
+ * Synthetic Video GUID
+ * {DA0A7802-E377-4aac-8E77-0558EB1073F8}
+ */
+#define HV_SYNTHVID_GUID \
+ .guid = { \
+ 0x02, 0x78, 0x0a, 0xda, 0x77, 0xe3, 0xac, 0x4a, \
+ 0x8e, 0x77, 0x05, 0x58, 0xeb, 0x10, 0x73, 0xf8 \
+ }
+
+
/*
* Common header for Hyper-V ICs
*/
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index f5dbce50466e..66017028dcb3 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -37,7 +37,7 @@ void irq_work_sync(struct irq_work *work);
#ifdef CONFIG_IRQ_WORK
bool irq_work_needs_cpu(void);
#else
-static bool irq_work_needs_cpu(void) { return false; }
+static inline bool irq_work_needs_cpu(void) { return false; }
#endif
#endif /* _LINUX_IRQ_WORK_H */
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 456a70e3776d..a661acf1888b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -390,7 +390,6 @@ extern struct pid *session_of_pgrp(struct pid *pgrp);
unsigned long int_sqrt(unsigned long);
extern void bust_spinlocks(int yes);
-extern void wake_up_klogd(void);
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
extern int panic_timeout;
extern int panic_on_oops;
diff --git a/include/linux/kmod.h b/include/linux/kmod.h
index 5398d5807075..0555cc66a15b 100644
--- a/include/linux/kmod.h
+++ b/include/linux/kmod.h
@@ -67,16 +67,15 @@ struct subprocess_info {
};
extern int
-call_usermodehelper_fns(char *path, char **argv, char **envp, int wait,
- int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *), void *data);
+call_usermodehelper(char *path, char **argv, char **envp, int wait);
-static inline int
-call_usermodehelper(char *path, char **argv, char **envp, int wait)
-{
- return call_usermodehelper_fns(path, argv, envp, wait,
- NULL, NULL, NULL);
-}
+extern struct subprocess_info *
+call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t gfp_mask,
+ int (*init)(struct subprocess_info *info, struct cred *new),
+ void (*cleanup)(struct subprocess_info *), void *data);
+
+extern int
+call_usermodehelper_exec(struct subprocess_info *info, int wait);
extern struct ctl_table usermodehelper_table[];
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f1e877b79ed8..cfc2f119779a 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -365,7 +365,7 @@ extern void lockdep_trace_alloc(gfp_t mask);
#define lockdep_recursing(tsk) ((tsk)->lockdep_recursion)
-#else /* !LOCKDEP */
+#else /* !CONFIG_LOCKDEP */
static inline void lockdep_off(void)
{
@@ -479,82 +479,36 @@ static inline void print_irqtrace_events(struct task_struct *curr)
* on the per lock-class debug mode:
*/
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_PROVE_LOCKING
-# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
-# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
-# else
-# define spin_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
-# define spin_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, NULL, i)
-# endif
-# define spin_release(l, n, i) lock_release(l, n, i)
+#ifdef CONFIG_PROVE_LOCKING
+ #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
+ #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 2, n, i)
+ #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 2, n, i)
#else
-# define spin_acquire(l, s, t, i) do { } while (0)
-# define spin_release(l, n, i) do { } while (0)
+ #define lock_acquire_exclusive(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
+ #define lock_acquire_shared(l, s, t, n, i) lock_acquire(l, s, t, 1, 1, n, i)
+ #define lock_acquire_shared_recursive(l, s, t, n, i) lock_acquire(l, s, t, 2, 1, n, i)
#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_PROVE_LOCKING
-# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
-# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 2, NULL, i)
-# else
-# define rwlock_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
-# define rwlock_acquire_read(l, s, t, i) lock_acquire(l, s, t, 2, 1, NULL, i)
-# endif
-# define rwlock_release(l, n, i) lock_release(l, n, i)
-#else
-# define rwlock_acquire(l, s, t, i) do { } while (0)
-# define rwlock_acquire_read(l, s, t, i) do { } while (0)
-# define rwlock_release(l, n, i) do { } while (0)
-#endif
+#define spin_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
+#define spin_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
+#define spin_release(l, n, i) lock_release(l, n, i)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_PROVE_LOCKING
-# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
-# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
-# else
-# define mutex_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
-# define mutex_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
-# endif
-# define mutex_release(l, n, i) lock_release(l, n, i)
-#else
-# define mutex_acquire(l, s, t, i) do { } while (0)
-# define mutex_acquire_nest(l, s, t, n, i) do { } while (0)
-# define mutex_release(l, n, i) do { } while (0)
-#endif
+#define rwlock_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i) lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define rwlock_release(l, n, i) lock_release(l, n, i)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_PROVE_LOCKING
-# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 2, NULL, i)
-# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 2, n, i)
-# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 2, NULL, i)
-# else
-# define rwsem_acquire(l, s, t, i) lock_acquire(l, s, t, 0, 1, NULL, i)
-# define rwsem_acquire_nest(l, s, t, n, i) lock_acquire(l, s, t, 0, 1, n, i)
-# define rwsem_acquire_read(l, s, t, i) lock_acquire(l, s, t, 1, 1, NULL, i)
-# endif
+#define mutex_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
+#define mutex_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
+#define mutex_release(l, n, i) lock_release(l, n, i)
+
+#define rwsem_acquire(l, s, t, i) lock_acquire_exclusive(l, s, t, NULL, i)
+#define rwsem_acquire_nest(l, s, t, n, i) lock_acquire_exclusive(l, s, t, n, i)
+#define rwsem_acquire_read(l, s, t, i) lock_acquire_shared(l, s, t, NULL, i)
# define rwsem_release(l, n, i) lock_release(l, n, i)
-#else
-# define rwsem_acquire(l, s, t, i) do { } while (0)
-# define rwsem_acquire_nest(l, s, t, n, i) do { } while (0)
-# define rwsem_acquire_read(l, s, t, i) do { } while (0)
-# define rwsem_release(l, n, i) do { } while (0)
-#endif
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-# ifdef CONFIG_PROVE_LOCKING
-# define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 2, NULL, _THIS_IP_)
-# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 2, NULL, _THIS_IP_)
-# else
-# define lock_map_acquire(l) lock_acquire(l, 0, 0, 0, 1, NULL, _THIS_IP_)
-# define lock_map_acquire_read(l) lock_acquire(l, 0, 0, 2, 1, NULL, _THIS_IP_)
-# endif
+#define lock_map_acquire(l) lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_read(l) lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
# define lock_map_release(l) lock_release(l, 1, _THIS_IP_)
-#else
-# define lock_map_acquire(l) do { } while (0)
-# define lock_map_acquire_read(l) do { } while (0)
-# define lock_map_release(l) do { } while (0)
-#endif
#ifdef CONFIG_PROVE_LOCKING
# define might_lock(lock) \
diff --git a/include/linux/lz4.h b/include/linux/lz4.h
new file mode 100644
index 000000000000..d21c13f10a64
--- /dev/null
+++ b/include/linux/lz4.h
@@ -0,0 +1,87 @@
+#ifndef __LZ4_H__
+#define __LZ4_H__
+/*
+ * LZ4 Kernel Interface
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#define LZ4_MEM_COMPRESS (4096 * sizeof(unsigned char *))
+#define LZ4HC_MEM_COMPRESS (65538 * sizeof(unsigned char *))
+
+/*
+ * lz4_compressbound()
+ * Provides the maximum size that LZ4 may output in a "worst case" scenario
+ * (input data not compressible)
+ */
+static inline size_t lz4_compressbound(size_t isize)
+{
+ return isize + (isize / 255) + 16;
+}
+
+/*
+ * lz4_compress()
+ * src : source address of the original data
+ * src_len : size of the original data
+ * dst : output buffer address of the compressed data
+ * This requires 'dst' of size LZ4_COMPRESSBOUND.
+ * dst_len : is the output size, which is returned after compress done
+ * workmem : address of the working memory.
+ * This requires 'workmem' of size LZ4_MEM_COMPRESS.
+ * return : Success if return 0
+ * Error if return (< 0)
+ * note : Destination buffer and workmem must be already allocated with
+ * the defined size.
+ */
+int lz4_compress(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+ /*
+ * lz4hc_compress()
+ * src : source address of the original data
+ * src_len : size of the original data
+ * dst : output buffer address of the compressed data
+ * This requires 'dst' of size LZ4_COMPRESSBOUND.
+ * dst_len : is the output size, which is returned after compress done
+ * workmem : address of the working memory.
+ * This requires 'workmem' of size LZ4HC_MEM_COMPRESS.
+ * return : Success if return 0
+ * Error if return (< 0)
+ * note : Destination buffer and workmem must be already allocated with
+ * the defined size.
+ */
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len, void *wrkmem);
+
+/*
+ * lz4_decompress()
+ * src : source address of the compressed data
+ * src_len : is the input size, whcih is returned after decompress done
+ * dest : output buffer address of the decompressed data
+ * actual_dest_len: is the size of uncompressed data, supposing it's known
+ * return : Success if return 0
+ * Error if return (< 0)
+ * note : Destination buffer must be already allocated.
+ * slightly faster than lz4_decompress_unknownoutputsize()
+ */
+int lz4_decompress(const char *src, size_t *src_len, char *dest,
+ size_t actual_dest_len);
+
+/*
+ * lz4_decompress_unknownoutputsize()
+ * src : source address of the compressed data
+ * src_len : is the input size, therefore the compressed size
+ * dest : output buffer address of the decompressed data
+ * dest_len: is the max size of the destination buffer, which is
+ * returned with actual size of decompressed data after
+ * decompress done
+ * return : Success if return 0
+ * Error if return (< 0)
+ * note : Destination buffer must be already allocated.
+ */
+int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
+ char *dest, size_t *dest_len);
+#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
index f4c8aa990442..012acc8bfc3b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -900,7 +900,8 @@ extern void pagefault_out_of_memory(void);
* Flags passed to show_mem() and show_free_areas() to suppress output in
* various contexts.
*/
-#define SHOW_MEM_FILTER_NODES (0x0001u) /* filter disallowed nodes */
+#define SHOW_MEM_FILTER_NODES (0x0001u) /* disallowed nodes */
+#define SHOW_MEM_FILTER_PAGE_COUNT (0x0002u) /* page type count */
extern void show_free_areas(unsigned int flags);
extern bool skip_free_areas_node(unsigned int flags, int nid);
@@ -1292,6 +1293,61 @@ extern void free_area_init_node(int nid, unsigned long * zones_size,
unsigned long zone_start_pfn, unsigned long *zholes_size);
extern void free_initmem(void);
+/*
+ * Free reserved pages within range [PAGE_ALIGN(start), end & PAGE_MASK)
+ * into the buddy system. The freed pages will be poisoned with pattern
+ * "poison" if it's non-zero.
+ * Return pages freed into the buddy system.
+ */
+extern unsigned long free_reserved_area(unsigned long start, unsigned long end,
+ int poison, char *s);
+#ifdef CONFIG_HIGHMEM
+/*
+ * Free a highmem page into the buddy system, adjusting totalhigh_pages
+ * and totalram_pages.
+ */
+extern void free_highmem_page(struct page *page);
+#endif
+
+static inline void adjust_managed_page_count(struct page *page, long count)
+{
+ totalram_pages += count;
+}
+
+/* Free the reserved page into the buddy system, so it gets managed. */
+static inline void __free_reserved_page(struct page *page)
+{
+ ClearPageReserved(page);
+ init_page_count(page);
+ __free_page(page);
+}
+
+static inline void free_reserved_page(struct page *page)
+{
+ __free_reserved_page(page);
+ adjust_managed_page_count(page, 1);
+}
+
+static inline void mark_page_reserved(struct page *page)
+{
+ SetPageReserved(page);
+ adjust_managed_page_count(page, -1);
+}
+
+/*
+ * Default method to free all the __init memory into the buddy system.
+ * The freed pages will be poisoned with pattern "poison" if it is
+ * non-zero. Return pages freed into the buddy system.
+ */
+static inline unsigned long free_initmem_default(int poison)
+{
+ extern char __init_begin[], __init_end[];
+
+ return free_reserved_area(PAGE_ALIGN((unsigned long)&__init_begin) ,
+ ((unsigned long)&__init_end) & PAGE_MASK,
+ poison, "unused kernel");
+}
+
#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
/*
* With CONFIG_HAVE_MEMBLOCK_NODE_MAP set, an architecture may initialise its
@@ -1671,8 +1727,12 @@ int in_gate_area_no_mm(unsigned long addr);
#define in_gate_area(mm, addr) ({(void)mm; in_gate_area_no_mm(addr);})
#endif /* __HAVE_ARCH_GATE_AREA */
+#ifdef CONFIG_SYSCTL
+extern int sysctl_drop_caches;
int drop_caches_sysctl_handler(struct ctl_table *, int,
void __user *, size_t *, loff_t *);
+#endif
+
unsigned long shrink_slab(struct shrink_control *shrink,
unsigned long nr_pages_scanned,
unsigned long lru_pages);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index ace9a5f01c64..fb425aa16c01 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -330,12 +330,9 @@ struct mm_struct {
unsigned long (*get_unmapped_area) (struct file *filp,
unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags);
- void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
#endif
unsigned long mmap_base; /* base of mmap area */
unsigned long task_size; /* size of task vm space */
- unsigned long cached_hole_size; /* if non-zero, the largest hole below free_area_cache */
- unsigned long free_area_cache; /* first hole of size cached_hole_size or larger */
unsigned long highest_vm_end; /* highest vma end address */
pgd_t * pgd;
atomic_t mm_users; /* How many users with user space? */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ede274957e05..72e1cb5537d5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -527,7 +527,7 @@ static inline int zone_is_oom_locked(const struct zone *zone)
return test_bit(ZONE_OOM_LOCKED, &zone->flags);
}
-static inline unsigned zone_end_pfn(const struct zone *zone)
+static inline unsigned long zone_end_pfn(const struct zone *zone)
{
return zone->zone_start_pfn + zone->spanned_pages;
}
@@ -815,7 +815,10 @@ unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long);
/*
* zone_idx() returns 0 for the ZONE_DMA zone, 1 for the ZONE_NORMAL zone, etc.
*/
-#define zone_idx(zone) ((zone) - (zone)->zone_pgdat->node_zones)
+static inline enum zone_type zone_idx(struct zone *zone)
+{
+ return zone - zone->zone_pgdat->node_zones;
+}
static inline int populated_zone(struct zone *zone)
{
@@ -856,25 +859,18 @@ static inline int is_normal_idx(enum zone_type idx)
*/
static inline int is_highmem(struct zone *zone)
{
-#ifdef CONFIG_HIGHMEM
- int zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones;
- return zone_off == ZONE_HIGHMEM * sizeof(*zone) ||
- (zone_off == ZONE_MOVABLE * sizeof(*zone) &&
- zone_movable_is_highmem());
-#else
- return 0;
-#endif
+ return is_highmem_idx(zone_idx(zone));
}
static inline int is_normal(struct zone *zone)
{
- return zone == zone->zone_pgdat->node_zones + ZONE_NORMAL;
+ return zone_idx(zone) == ZONE_NORMAL;
}
static inline int is_dma32(struct zone *zone)
{
#ifdef CONFIG_ZONE_DMA32
- return zone == zone->zone_pgdat->node_zones + ZONE_DMA32;
+ return zone_idx(zone) == ZONE_DMA32;
#else
return 0;
#endif
@@ -883,7 +879,7 @@ static inline int is_dma32(struct zone *zone)
static inline int is_dma(struct zone *zone)
{
#ifdef CONFIG_ZONE_DMA
- return zone == zone->zone_pgdat->node_zones + ZONE_DMA;
+ return zone_idx(zone) == ZONE_DMA;
#else
return 0;
#endif
diff --git a/include/linux/net.h b/include/linux/net.h
index aa1673160a45..99c9f0c103c2 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -240,8 +240,8 @@ do { \
#define net_dbg_ratelimited(fmt, ...) \
net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__)
-#define net_random() random32()
-#define net_srandom(seed) srandom32((__force u32)seed)
+#define net_random() prandom_u32()
+#define net_srandom(seed) prandom_seed((__force u32)(seed))
extern int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
struct kvec *vec, size_t num, size_t len);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0e38e13eb249..e3dea75a078b 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -149,7 +149,7 @@ static inline int page_cache_get_speculative(struct page *page)
{
VM_BUG_ON(in_interrupt());
-#if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU)
+#ifdef CONFIG_TINY_RCU
# ifdef CONFIG_PREEMPT_COUNT
VM_BUG_ON(!in_atomic());
# endif
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
new file mode 100644
index 000000000000..d0cf8872dc43
--- /dev/null
+++ b/include/linux/percpu-refcount.h
@@ -0,0 +1,114 @@
+/*
+ * Dynamic percpu refcounts:
+ * (C) 2012 Google, Inc.
+ * Author: Kent Overstreet <koverstreet@google.com>
+ *
+ * This implements a refcount with similar semantics to atomic_t - atomic_inc(),
+ * atomic_dec_and_test() - but potentially percpu.
+ *
+ * There's one important difference between percpu refs and normal atomic_t
+ * refcounts; you have to keep track of your initial refcount, and then when you
+ * start shutting down you call percpu_ref_kill() _before_ dropping the initial
+ * refcount.
+ *
+ * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
+ * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
+ * puts the ref back in single atomic_t mode, collecting the per cpu refs and
+ * issuing the appropriate barriers, and then marks the ref as shutting down so
+ * that percpu_ref_put() will check for the ref hitting 0. After it returns,
+ * it's safe to drop the initial ref.
+ *
+ * BACKGROUND:
+ *
+ * Percpu refcounts are quite useful for performance, but if we blindly
+ * converted all refcounts to percpu counters we'd waste quite a bit of memory.
+ *
+ * Think about all the refcounts embedded in kobjects, files, etc. most of which
+ * aren't used much. These start out as simple atomic counters - a little bigger
+ * than a bare atomic_t, 16 bytes instead of 4 - but if we exceed some arbitrary
+ * number of gets in one second, we then switch to percpu counters.
+ *
+ * This heuristic isn't perfect because it'll fire if the refcount was only
+ * being used on one cpu; ideally we'd be able to count the number of cache
+ * misses on percpu_ref_get() or something similar, but that'd make the non
+ * percpu path significantly heavier/more complex. We can count the number of
+ * gets() without any extra atomic instructions on arches that support
+ * atomic64_t - simply by changing the atomic_inc() to atomic_add_return().
+ *
+ * USAGE:
+ *
+ * See fs/aio.c for some example usage; it's used there for struct kioctx, which
+ * is created when userspaces calls io_setup(), and destroyed when userspace
+ * calls io_destroy() or the process exits.
+ *
+ * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
+ * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove
+ * the kioctx from the proccess's list of kioctxs - after that, there can't be
+ * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
+ * the initial ref with percpu_ref_put().
+ *
+ * Code that does a two stage shutdown like this often needs some kind of
+ * explicit synchronization to ensure the initial refcount can only be dropped
+ * once - percpu_ref_kill() does this for you, it returns true once and false if
+ * someone else already called it. The aio code uses it this way, but it's not
+ * necessary if the code has some other mechanism to synchronize teardown.
+ *
+ * As mentioned previously, we decide when to convert a ref to percpu counters
+ * in percpu_ref_get(). However, since percpu_ref_get() will often be called
+ * with rcu_read_lock() held, it's not done there - percpu_ref_get() returns
+ * true if the ref should be converted to percpu counters.
+ *
+ * The caller should then call percpu_ref_alloc() after dropping
+ * rcu_read_lock(); if there is an uncommonly used codepath where it's
+ * inconvenient to call percpu_ref_alloc() after get(), it may be safely skipped
+ * and percpu_ref_get() will return true again the next time the counter wraps
+ * around.
+ */
+
+#ifndef _LINUX_PERCPU_REFCOUNT_H
+#define _LINUX_PERCPU_REFCOUNT_H
+
+#include <linux/atomic.h>
+#include <linux/percpu.h>
+
+struct percpu_ref {
+ atomic64_t count;
+ unsigned long pcpu_count;
+};
+
+void percpu_ref_init(struct percpu_ref *ref);
+void __percpu_ref_get(struct percpu_ref *ref, bool alloc);
+int percpu_ref_put(struct percpu_ref *ref);
+
+int percpu_ref_kill(struct percpu_ref *ref);
+int percpu_ref_dead(struct percpu_ref *ref);
+
+/**
+ * percpu_ref_get - increment a dynamic percpu refcount
+ *
+ * Increments @ref and possibly converts it to percpu counters. Must be called
+ * with rcu_read_lock() held, and may potentially drop/reacquire rcu_read_lock()
+ * to allocate percpu counters - if sleeping/allocation isn't safe for some
+ * other reason (e.g. a spinlock), see percpu_ref_get_noalloc().
+ *
+ * Analagous to atomic_inc().
+ */
+static inline void percpu_ref_get(struct percpu_ref *ref)
+{
+ __percpu_ref_get(ref, true);
+}
+
+/**
+ * percpu_ref_get_noalloc - increment a dynamic percpu refcount
+ *
+ * Increments @ref, to be used when it's not safe to allocate percpu counters.
+ * Must be called with rcu_read_lock() held.
+ *
+ * Analagous to atomic_inc().
+ */
+static inline void percpu_ref_get_noalloc(struct percpu_ref *ref)
+{
+ __percpu_ref_get(ref, false);
+}
+
+#endif
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 215e5e3dda10..d56406aaffe1 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/bug.h>
#include <linux/mm.h>
+#include <linux/workqueue.h>
#include <linux/threads.h>
#include <linux/nsproxy.h>
#include <linux/kref.h>
@@ -13,7 +14,9 @@ struct pidmap {
void *page;
};
-#define PIDMAP_ENTRIES ((PID_MAX_LIMIT + 8*PAGE_SIZE - 1)/PAGE_SIZE/8)
+#define BITS_PER_PAGE (PAGE_SIZE * 8)
+#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
+#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE)
struct bsd_acct_struct;
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h
index 042058fdb0af..92a51df74d12 100644
--- a/include/linux/posix-timers.h
+++ b/include/linux/posix-timers.h
@@ -7,14 +7,20 @@
#include <linux/timex.h>
#include <linux/alarmtimer.h>
-union cpu_time_count {
- cputime_t cpu;
- unsigned long long sched;
-};
+
+static inline unsigned long long cputime_to_expires(cputime_t expires)
+{
+ return (__force unsigned long long)expires;
+}
+
+static inline cputime_t expires_to_cputime(unsigned long long expires)
+{
+ return (__force cputime_t)expires;
+}
struct cpu_timer_list {
struct list_head entry;
- union cpu_time_count expires, incr;
+ unsigned long long expires, incr;
struct task_struct *task;
int firing;
};
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 1249a54d17e0..4890fe62c011 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -1,6 +1,7 @@
#ifndef __KERNEL_PRINTK__
#define __KERNEL_PRINTK__
+#include <stdarg.h>
#include <linux/init.h>
#include <linux/kern_levels.h>
@@ -95,8 +96,14 @@ int no_printk(const char *fmt, ...)
return 0;
}
+#ifdef CONFIG_EARLY_PRINTK
extern asmlinkage __printf(1, 2)
void early_printk(const char *fmt, ...);
+void early_vprintk(const char *fmt, va_list ap);
+#else
+static inline __printf(1, 2) __cold
+void early_printk(const char *s, ...) { }
+#endif
#ifdef CONFIG_PRINTK
asmlinkage __printf(5, 0)
@@ -134,6 +141,8 @@ extern int printk_delay_msec;
extern int dmesg_restrict;
extern int kptr_restrict;
+extern void wake_up_klogd(void);
+
void log_buf_kexec_setup(void);
void __init setup_log_buf(int early);
#else
@@ -162,6 +171,10 @@ static inline bool printk_timed_ratelimit(unsigned long *caller_jiffies,
return false;
}
+static inline void wake_up_klogd(void)
+{
+}
+
static inline void log_buf_kexec_setup(void)
{
}
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 8307f2f94d86..bd828e095359 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -68,16 +68,16 @@ struct proc_dir_entry {
* If you're allocating ->proc_fops dynamically, save a pointer
* somewhere.
*/
- const struct file_operations *proc_fops;
+ const struct file_operations __rcu *proc_fops;
struct proc_dir_entry *next, *parent, *subdir;
void *data;
read_proc_t *read_proc;
write_proc_t *write_proc;
atomic_t count; /* use count */
- int pde_users; /* number of callers into module in progress */
+ atomic_t pde_users; /* number of callers into module in progress */
struct completion *pde_unload_completion;
+ spinlock_t pde_openers_lock;
struct list_head pde_openers; /* who did ->open, but not ->release */
- spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */
u8 namelen;
char name[];
};
diff --git a/include/linux/ramfs.h b/include/linux/ramfs.h
index 5bf5500db83d..69e37c2d1ea5 100644
--- a/include/linux/ramfs.h
+++ b/include/linux/ramfs.h
@@ -6,7 +6,13 @@ struct inode *ramfs_get_inode(struct super_block *sb, const struct inode *dir,
extern struct dentry *ramfs_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data);
-#ifndef CONFIG_MMU
+#ifdef CONFIG_MMU
+static inline int
+ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize)
+{
+ return 0;
+}
+#else
extern int ramfs_nommu_expand_for_mapping(struct inode *inode, size_t newsize);
extern unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
unsigned long addr,
diff --git a/include/linux/random.h b/include/linux/random.h
index 347ce553a306..3b9377d6b7a5 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -29,13 +29,6 @@ u32 prandom_u32(void);
void prandom_bytes(void *buf, int nbytes);
void prandom_seed(u32 seed);
-/*
- * These macros are preserved for backward compatibility and should be
- * removed as soon as a transition is finished.
- */
-#define random32() prandom_u32()
-#define srandom32(seed) prandom_seed(seed)
-
u32 prandom_u32_state(struct rnd_state *);
void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
diff --git a/include/linux/relay.h b/include/linux/relay.h
index 91cacc34c159..d7c8359693c6 100644
--- a/include/linux/relay.h
+++ b/include/linux/relay.h
@@ -20,9 +20,6 @@
#include <linux/poll.h>
#include <linux/kref.h>
-/* Needs a _much_ better name... */
-#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE)
-
/*
* Tracks changes to rchan/rchan_buf structs
*/
diff --git a/include/linux/rtc-pxa.h b/include/linux/rtc-pxa.h
new file mode 100644
index 000000000000..71bc45f060fc
--- /dev/null
+++ b/include/linux/rtc-pxa.h
@@ -0,0 +1,18 @@
+/*
+ * include/linux/rtc-pxa.h
+ *
+ * RTC PXA Header file
+ *
+ * Copyright (C) 2010 Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __LINUX_RTC_PXA_H
+#define __LINUX_RTC_PXA_H
+
+extern int pxa_rtc_sync_time(unsigned int ticks);
+
+#endif /* __LINUX_RTC_PXA_H */
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 580b24c8b8ca..c2c28975293c 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -133,7 +133,13 @@ extern struct rtc_device *rtc_device_register(const char *name,
struct device *dev,
const struct rtc_class_ops *ops,
struct module *owner);
+extern struct rtc_device *devm_rtc_device_register(struct device *dev,
+ const char *name,
+ const struct rtc_class_ops *ops,
+ struct module *owner);
extern void rtc_device_unregister(struct rtc_device *rtc);
+extern void devm_rtc_device_unregister(struct device *dev,
+ struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index be99a5ce1ac6..7ae10dbfd2b0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -313,8 +313,6 @@ extern int mutex_spin_on_owner(struct mutex *lock, struct task_struct *owner);
struct nsproxy;
struct user_namespace;
-#include <linux/aio.h>
-
#ifdef CONFIG_MMU
extern void arch_pick_mmap_layout(struct mm_struct *mm);
extern unsigned long
@@ -324,8 +322,6 @@ extern unsigned long
arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags);
-extern void arch_unmap_area(struct mm_struct *, unsigned long);
-extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
#else
static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
#endif
@@ -625,6 +621,7 @@ struct signal_struct {
#define SIGNAL_STOP_STOPPED 0x00000001 /* job control stop in effect */
#define SIGNAL_STOP_CONTINUED 0x00000002 /* SIGCONT since WCONTINUED reap */
#define SIGNAL_GROUP_EXIT 0x00000004 /* group exit in progress */
+#define SIGNAL_GROUP_COREDUMP 0x00000008 /* coredump in progress */
/*
* Pending notifications to parent.
*/
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index bd6cf61142be..d4b7a184f08c 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -50,7 +50,12 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
COMPACTMIGRATE_SCANNED, COMPACTFREE_SCANNED,
COMPACTISOLATED,
COMPACTSTALL, COMPACTFAIL, COMPACTSUCCESS,
-#endif
+#ifdef CONFIG_BALLOON_COMPACTION
+ COMPACTBALLOONISOLATED, /* isolated from balloon pagelist */
+ COMPACTBALLOONMIGRATED, /* balloon page sucessfully migrated */
+ COMPACTBALLOONRETURNED, /* putback to pagelist, not-migrated */
+#endif /* CONFIG_BALLOON_COMPACTION */
+#endif /* CONFIG_COMPACTION */
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
#endif
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 6071e911c7f4..7d5773a99f20 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -3,7 +3,9 @@
#include <linux/spinlock.h>
#include <linux/init.h>
+#include <linux/list.h>
#include <asm/page.h> /* pgprot_t */
+#include <linux/rbtree.h>
struct vm_area_struct; /* vma defining user mapping in mm_types.h */
@@ -35,6 +37,17 @@ struct vm_struct {
const void *caller;
};
+struct vmap_area {
+ unsigned long va_start;
+ unsigned long va_end;
+ unsigned long flags;
+ struct rb_node rb_node; /* address sorted rbtree */
+ struct list_head list; /* address sorted list */
+ struct list_head purge_list; /* "lazy purge" list */
+ struct vm_struct *vm;
+ struct rcu_head rcu_head;
+};
+
/*
* Highlevel APIs for driver use
*/
@@ -130,8 +143,7 @@ extern long vwrite(char *buf, char *addr, unsigned long count);
/*
* Internals. Dont't use..
*/
-extern rwlock_t vmlist_lock;
-extern struct vm_struct *vmlist;
+extern struct list_head vmap_area_list;
extern __init void vm_area_add_early(struct vm_struct *vm);
extern __init void vm_area_register_early(struct vm_struct *vm, size_t align);
@@ -158,4 +170,22 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
# endif
#endif
+struct vmalloc_info {
+ unsigned long used;
+ unsigned long largest_chunk;
+};
+
+#ifdef CONFIG_MMU
+#define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START)
+extern void get_vmalloc_info(struct vmalloc_info *vmi);
+#else
+
+#define VMALLOC_TOTAL 0UL
+#define get_vmalloc_info(vmi) \
+do { \
+ (vmi)->used = 0; \
+ (vmi)->largest_chunk = 0; \
+} while (0)
+#endif
+
#endif /* _LINUX_VMALLOC_H */
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 7cb64d4b499d..ac38be2692d8 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -330,6 +330,92 @@ do { \
__ret; \
})
+#define __wait_event_hrtimeout(wq, condition, timeout, state) \
+({ \
+ int __ret = 0; \
+ DEFINE_WAIT(__wait); \
+ struct hrtimer_sleeper __t; \
+ \
+ hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \
+ HRTIMER_MODE_REL); \
+ hrtimer_init_sleeper(&__t, current); \
+ if ((timeout).tv64 != KTIME_MAX) \
+ hrtimer_start_range_ns(&__t.timer, timeout, \
+ current->timer_slack_ns, \
+ HRTIMER_MODE_REL); \
+ \
+ for (;;) { \
+ prepare_to_wait(&wq, &__wait, state); \
+ if (condition) \
+ break; \
+ if (state == TASK_INTERRUPTIBLE && \
+ signal_pending(current)) { \
+ __ret = -ERESTARTSYS; \
+ break; \
+ } \
+ if (!__t.task) { \
+ __ret = -ETIME; \
+ break; \
+ } \
+ schedule(); \
+ } \
+ \
+ hrtimer_cancel(&__t.timer); \
+ destroy_hrtimer_on_stack(&__t.timer); \
+ finish_wait(&wq, &__wait); \
+ __ret; \
+})
+
+/**
+ * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, as a ktime_t
+ *
+ * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if @condition became true, or -ETIME if the timeout
+ * elapsed.
+ */
+#define wait_event_hrtimeout(wq, condition, timeout) \
+({ \
+ int __ret = 0; \
+ if (!(condition)) \
+ __ret = __wait_event_hrtimeout(wq, condition, timeout, \
+ TASK_UNINTERRUPTIBLE); \
+ __ret; \
+})
+
+/**
+ * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ * @timeout: timeout, as a ktime_t
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE) until the
+ * @condition evaluates to true or a signal is received.
+ * The @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ *
+ * The function returns 0 if @condition became true, -ERESTARTSYS if it was
+ * interrupted by a signal, or -ETIME if the timeout elapsed.
+ */
+#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \
+({ \
+ long __ret = 0; \
+ if (!(condition)) \
+ __ret = __wait_event_hrtimeout(wq, condition, timeout, \
+ TASK_INTERRUPTIBLE); \
+ __ret; \
+})
+
#define __wait_event_interruptible_exclusive(wq, condition, ret) \
do { \
DEFINE_WAIT(__wait); \
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 9a9367c0c076..579a5007c696 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -5,6 +5,7 @@
#define WRITEBACK_H
#include <linux/sched.h>
+#include <linux/workqueue.h>
#include <linux/fs.h>
DECLARE_PER_CPU(int, dirty_throttle_leaks);
diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index fce8e6b66d55..f5b702341be2 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -874,7 +874,7 @@ struct ip_vs_app {
struct ipvs_master_sync_state {
struct list_head sync_queue;
struct ip_vs_sync_buff *sync_buff;
- int sync_queue_len;
+ unsigned long sync_queue_len;
unsigned int sync_queue_delay;
struct task_struct *master_thread;
struct delayed_work master_wakeup_work;
@@ -966,7 +966,7 @@ struct netns_ipvs {
int sysctl_snat_reroute;
int sysctl_sync_ver;
int sysctl_sync_ports;
- int sysctl_sync_qlen_max;
+ unsigned long sysctl_sync_qlen_max;
int sysctl_sync_sock_size;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
@@ -1053,7 +1053,7 @@ static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
return ACCESS_ONCE(ipvs->sysctl_sync_ports);
}
-static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_qlen_max;
}
@@ -1106,7 +1106,7 @@ static inline int sysctl_sync_ports(struct netns_ipvs *ipvs)
return 1;
}
-static inline int sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
+static inline unsigned long sysctl_sync_qlen_max(struct netns_ipvs *ipvs)
{
return IPVS_SYNC_QLEN_MAX;
}
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
new file mode 100644
index 000000000000..0421f49a20f7
--- /dev/null
+++ b/include/trace/events/filemap.h
@@ -0,0 +1,58 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM filemap
+
+#if !defined(_TRACE_FILEMAP_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_FILEMAP_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include <linux/mm.h>
+#include <linux/memcontrol.h>
+#include <linux/device.h>
+#include <linux/kdev_t.h>
+
+DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
+
+ TP_PROTO(struct page *page),
+
+ TP_ARGS(page),
+
+ TP_STRUCT__entry(
+ __field(struct page *, page)
+ __field(unsigned long, i_ino)
+ __field(unsigned long, index)
+ __field(dev_t, s_dev)
+ ),
+
+ TP_fast_assign(
+ __entry->page = page;
+ __entry->i_ino = page->mapping->host->i_ino;
+ __entry->index = page->index;
+ if (page->mapping->host->i_sb)
+ __entry->s_dev = page->mapping->host->i_sb->s_dev;
+ else
+ __entry->s_dev = page->mapping->host->i_rdev;
+ ),
+
+ TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
+ MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
+ __entry->i_ino,
+ __entry->page,
+ page_to_pfn(__entry->page),
+ __entry->index << PAGE_SHIFT)
+);
+
+DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_delete_from_page_cache,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page)
+ );
+
+DEFINE_EVENT(mm_filemap_op_page_cache, mm_filemap_add_to_page_cache,
+ TP_PROTO(struct page *page),
+ TP_ARGS(page)
+ );
+
+#endif /* _TRACE_FILEMAP_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/include/trace/events/printk.h b/include/trace/events/printk.h
index 94ec79cc011a..c008bc99f9fa 100644
--- a/include/trace/events/printk.h
+++ b/include/trace/events/printk.h
@@ -6,31 +6,18 @@
#include <linux/tracepoint.h>
-TRACE_EVENT_CONDITION(console,
- TP_PROTO(const char *log_buf, unsigned start, unsigned end,
- unsigned log_buf_len),
+TRACE_EVENT(console,
+ TP_PROTO(const char *text, size_t len),
- TP_ARGS(log_buf, start, end, log_buf_len),
-
- TP_CONDITION(start != end),
+ TP_ARGS(text, len),
TP_STRUCT__entry(
- __dynamic_array(char, msg, end - start + 1)
+ __dynamic_array(char, msg, len + 1)
),
TP_fast_assign(
- if ((start & (log_buf_len - 1)) > (end & (log_buf_len - 1))) {
- memcpy(__get_dynamic_array(msg),
- log_buf + (start & (log_buf_len - 1)),
- log_buf_len - (start & (log_buf_len - 1)));
- memcpy((char *)__get_dynamic_array(msg) +
- log_buf_len - (start & (log_buf_len - 1)),
- log_buf, end & (log_buf_len - 1));
- } else
- memcpy(__get_dynamic_array(msg),
- log_buf + (start & (log_buf_len - 1)),
- end - start);
- ((char *)__get_dynamic_array(msg))[end - start] = 0;
+ memcpy(__get_dynamic_array(msg), text, len);
+ ((char *)__get_dynamic_array(msg))[len] = 0;
),
TP_printk("%s", __get_str(msg))
diff --git a/include/uapi/linux/eventpoll.h b/include/uapi/linux/eventpoll.h
index 2c267bcbb85c..8c99ce7202c5 100644
--- a/include/uapi/linux/eventpoll.h
+++ b/include/uapi/linux/eventpoll.h
@@ -25,6 +25,7 @@
#define EPOLL_CTL_ADD 1
#define EPOLL_CTL_DEL 2
#define EPOLL_CTL_MOD 3
+#define EPOLL_CTL_DISABLE 4
/*
* Request the handling of system wakeup events so as to prevent system suspends
diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h
index c7fc1e6517c3..a4ed56cf0eac 100644
--- a/include/uapi/linux/fs.h
+++ b/include/uapi/linux/fs.h
@@ -88,7 +88,6 @@ struct inodes_stat_t {
#define MS_STRICTATIME (1<<24) /* Always perform atime updates */
/* These sb flags are internal to the kernel */
-#define MS_SNAP_STABLE (1<<27) /* Snapshot pages during writeback, if needed */
#define MS_NOSEC (1<<28)
#define MS_BORN (1<<29)
#define MS_ACTIVE (1<<30)
diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h
index 022ab186a812..52ebcc89f306 100644
--- a/include/uapi/linux/ptrace.h
+++ b/include/uapi/linux/ptrace.h
@@ -5,6 +5,7 @@
/* has the defines to get at the registers. */
+#include <linux/types.h>
#define PTRACE_TRACEME 0
#define PTRACE_PEEKTEXT 1
@@ -52,6 +53,17 @@
#define PTRACE_INTERRUPT 0x4207
#define PTRACE_LISTEN 0x4208
+#define PTRACE_PEEKSIGINFO 0x4209
+
+struct ptrace_peeksiginfo_args {
+ __u64 off; /* from which siginfo to start */
+ __u32 flags;
+ __s32 nr; /* how may siginfos to take */
+};
+
+/* Read signals from a shared (process wide) queue */
+#define PTRACE_PEEKSIGINFO_SHARED (1 << 0)
+
/* Wait extended result codes for the above trace options. */
#define PTRACE_EVENT_FORK 1
#define PTRACE_EVENT_VFORK 2
diff --git a/include/xen/tmem.h b/include/xen/tmem.h
index 591550a22ac7..3930a90045ff 100644
--- a/include/xen/tmem.h
+++ b/include/xen/tmem.h
@@ -3,7 +3,15 @@
#include <linux/types.h>
+#ifdef CONFIG_XEN_TMEM_MODULE
+#define tmem_enabled true
+#else
/* defined in drivers/xen/tmem.c */
extern bool tmem_enabled;
+#endif
+
+#ifdef CONFIG_XEN_SELFBALLOONING
+extern int xen_selfballoon_init(bool, bool);
+#endif
#endif /* _XEN_TMEM_H */
diff --git a/init/Kconfig b/init/Kconfig
index 279d5f784553..e67bb293e341 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -98,10 +98,13 @@ config HAVE_KERNEL_XZ
config HAVE_KERNEL_LZO
bool
+config HAVE_KERNEL_LZ4
+ bool
+
choice
prompt "Kernel compression mode"
default KERNEL_GZIP
- depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO
+ depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_XZ || HAVE_KERNEL_LZO || HAVE_KERNEL_LZ4
help
The linux kernel is a kind of self-extracting executable.
Several compression algorithms are available, which differ
@@ -168,6 +171,18 @@ config KERNEL_LZO
size is about 10% bigger than gzip; however its speed
(both compression and decompression) is the fastest.
+config KERNEL_LZ4
+ bool "LZ4"
+ depends on HAVE_KERNEL_LZ4
+ help
+ LZ4 is an LZ77-type compressor with a fixed, byte-oriented encoding.
+ A preliminary version of LZ4 de/compression tool is available at
+ <https://code.google.com/p/lz4/>.
+
+ Its compression ratio is worse than LZO. The size of the kernel
+ is about 8% bigger than LZO. But the decompression speed is
+ faster than LZO.
+
endchoice
config DEFAULT_HOSTNAME
@@ -929,6 +944,23 @@ config MEMCG_KMEM
the kmem extension can use it to guarantee that no group of processes
will ever exhaust kernel resources alone.
+config MEMCG_DEBUG_ASYNC_DESTROY
+ bool "Memory Resource Controller Debug asynchronous object destruction"
+ depends on MEMCG_KMEM || MEMCG_SWAP
+ default n
+ help
+ When a memcg is destroyed, the memory consumed by it may not be
+ immediately freed. This is because when some extensions are used, such
+ as swap or kernel memory, objects can outlive the group and hold a
+ reference to it.
+
+ If this is the case, the dangling_memcgs file will show information
+ about what are the memcgs still alive, and which references are still
+ preventing it to be freed. There is nothing wrong with that, but it is
+ very useful when debugging, to know where this memory is being held.
+ This is a developer-oriented debugging facility only, and no
+ guarantees of interface stability will be given.
+
config CGROUP_HUGETLB
bool "HugeTLB Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS && HUGETLB_PAGE
diff --git a/init/do_mounts_initrd.c b/init/do_mounts_initrd.c
index a32ec1ce882b..3e0878e8a80d 100644
--- a/init/do_mounts_initrd.c
+++ b/init/do_mounts_initrd.c
@@ -50,6 +50,7 @@ static int init_linuxrc(struct subprocess_info *info, struct cred *new)
static void __init handle_initrd(void)
{
+ struct subprocess_info *info;
static char *argv[] = { "linuxrc", NULL, };
extern char *envp_init[];
int error;
@@ -70,8 +71,11 @@ static void __init handle_initrd(void)
*/
current->flags |= PF_FREEZER_SKIP;
- call_usermodehelper_fns("/linuxrc", argv, envp_init, UMH_WAIT_PROC,
- init_linuxrc, NULL, NULL);
+ info = call_usermodehelper_setup("/linuxrc", argv, envp_init,
+ GFP_KERNEL, init_linuxrc, NULL, NULL);
+ if (!info)
+ return;
+ call_usermodehelper_exec(info, UMH_WAIT_PROC);
current->flags &= ~PF_FREEZER_SKIP;
diff --git a/init/main.c b/init/main.c
index b3e061428545..68f2ab9f3de5 100644
--- a/init/main.c
+++ b/init/main.c
@@ -538,11 +538,8 @@ asmlinkage void __init start_kernel(void)
* fragile until we cpu_idle() for the first time.
*/
preempt_disable();
- if (!irqs_disabled()) {
- printk(KERN_WARNING "start_kernel(): bug: interrupts were "
- "enabled *very* early, fixing it\n");
+ if (WARN(!irqs_disabled(), "Interrupts were enabled *very* early, fixing it\n"))
local_irq_disable();
- }
idr_init_cache();
perf_event_init();
rcu_init();
@@ -558,9 +555,7 @@ asmlinkage void __init start_kernel(void)
time_init();
profile_init();
call_function_init();
- if (!irqs_disabled())
- printk(KERN_CRIT "start_kernel(): bug: interrupts were "
- "enabled early\n");
+ WARN(!irqs_disabled(), "Interrupts were enabled early\n");
early_boot_irqs_disabled = false;
local_irq_enable();
@@ -702,9 +697,7 @@ int __init_or_module do_one_initcall(initcall_t fn)
strlcat(msgbuf, "disabled interrupts ", sizeof(msgbuf));
local_irq_enable();
}
- if (msgbuf[0]) {
- printk("initcall %pF returned with %s\n", fn, msgbuf);
- }
+ WARN(msgbuf[0], "initcall %pF returned with %s\n", fn, msgbuf);
return ret;
}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index e5c4f609f22c..3953fda2e8bd 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -840,7 +840,8 @@ out_putfd:
fd = error;
}
mutex_unlock(&root->d_inode->i_mutex);
- mnt_drop_write(mnt);
+ if (!ro)
+ mnt_drop_write(mnt);
out_putname:
putname(name);
return fd;
diff --git a/ipc/msg.c b/ipc/msg.c
index 31cd1bf6af27..309583c308cb 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -66,6 +66,7 @@ struct msg_sender {
#define SEARCH_EQUAL 2
#define SEARCH_NOTEQUAL 3
#define SEARCH_LESSEQUAL 4
+#define SEARCH_NUMBER 5
#define msg_ids(ns) ((ns)->ids[IPC_MSG_IDS])
@@ -583,6 +584,7 @@ static int testmsg(struct msg_msg *msg, long type, int mode)
switch(mode)
{
case SEARCH_ANY:
+ case SEARCH_NUMBER:
return 1;
case SEARCH_LESSEQUAL:
if (msg->m_type <=type)
@@ -738,6 +740,8 @@ SYSCALL_DEFINE4(msgsnd, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
static inline int convert_mode(long *msgtyp, int msgflg)
{
+ if (msgflg & MSG_COPY)
+ return SEARCH_NUMBER;
/*
* find message of correct type.
* msgtyp = 0 => get first.
@@ -774,14 +778,10 @@ static long do_msg_fill(void __user *dest, struct msg_msg *msg, size_t bufsz)
* This function creates new kernel message structure, large enough to store
* bufsz message bytes.
*/
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
struct msg_msg *copy;
- *copy_number = *msgtyp;
- *msgtyp = 0;
/*
* Create dummy message to copy real message to.
*/
@@ -797,9 +797,7 @@ static inline void free_copy(struct msg_msg *copy)
free_msg(copy);
}
#else
-static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz,
- int msgflg, long *msgtyp,
- unsigned long *copy_number)
+static inline struct msg_msg *prepare_copy(void __user *buf, size_t bufsz)
{
return ERR_PTR(-ENOSYS);
}
@@ -809,6 +807,30 @@ static inline void free_copy(struct msg_msg *copy)
}
#endif
+static struct msg_msg *find_msg(struct msg_queue *msq, long *msgtyp, int mode)
+{
+ struct msg_msg *msg;
+ long count = 0;
+
+ list_for_each_entry(msg, &msq->q_messages, m_list) {
+ if (testmsg(msg, *msgtyp, mode) &&
+ !security_msg_queue_msgrcv(msq, msg, current,
+ *msgtyp, mode)) {
+ if (mode == SEARCH_LESSEQUAL && msg->m_type != 1) {
+ *msgtyp = msg->m_type - 1;
+ } else if (mode == SEARCH_NUMBER) {
+ if (*msgtyp == count)
+ return msg;
+ } else
+ return msg;
+ count++;
+ }
+ }
+
+ return ERR_PTR(-EAGAIN);
+}
+
+
long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int msgflg,
long (*msg_handler)(void __user *, struct msg_msg *, size_t))
@@ -818,18 +840,17 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
int mode;
struct ipc_namespace *ns;
struct msg_msg *copy = NULL;
- unsigned long copy_number = 0;
ns = current->nsproxy->ipc_ns;
if (msqid < 0 || (long) bufsz < 0)
return -EINVAL;
if (msgflg & MSG_COPY) {
- copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax),
- msgflg, &msgtyp, &copy_number);
+ copy = prepare_copy(buf, min_t(size_t, bufsz, ns->msg_ctlmax));
if (IS_ERR(copy))
return PTR_ERR(copy);
}
+
mode = convert_mode(&msgtyp, msgflg);
msq = msg_lock_check(ns, msqid);
@@ -840,44 +861,13 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
for (;;) {
struct msg_receiver msr_d;
- struct list_head *tmp;
- long msg_counter = 0;
msg = ERR_PTR(-EACCES);
if (ipcperms(ns, &msq->q_perm, S_IRUGO))
goto out_unlock;
- msg = ERR_PTR(-EAGAIN);
- tmp = msq->q_messages.next;
- while (tmp != &msq->q_messages) {
- struct msg_msg *walk_msg;
-
- walk_msg = list_entry(tmp, struct msg_msg, m_list);
- if (testmsg(walk_msg, msgtyp, mode) &&
- !security_msg_queue_msgrcv(msq, walk_msg, current,
- msgtyp, mode)) {
-
- msg = walk_msg;
- if (mode == SEARCH_LESSEQUAL &&
- walk_msg->m_type != 1) {
- msgtyp = walk_msg->m_type - 1;
- } else if (msgflg & MSG_COPY) {
- if (copy_number == msg_counter) {
- /*
- * Found requested message.
- * Copy it.
- */
- msg = copy_msg(msg, copy);
- if (IS_ERR(msg))
- goto out_unlock;
- break;
- }
- } else
- break;
- msg_counter++;
- }
- tmp = tmp->next;
- }
+ msg = find_msg(msq, &msgtyp, mode);
+
if (!IS_ERR(msg)) {
/*
* Found a suitable message.
@@ -891,8 +881,10 @@ long do_msgrcv(int msqid, void __user *buf, size_t bufsz, long msgtyp,
* If we are copying, then do not unlink message and do
* not update queue parameters.
*/
- if (msgflg & MSG_COPY)
+ if (msgflg & MSG_COPY) {
+ msg = copy_msg(msg, copy);
goto out_unlock;
+ }
list_del(&msg->m_list);
msq->q_qnum--;
msq->q_rtime = get_seconds();
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 5df8e4bf1db0..d43439e6eb47 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -17,7 +17,7 @@
#include <linux/ipc_namespace.h>
#include <linux/utsname.h>
#include <linux/proc_fs.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
#include "util.h"
@@ -37,59 +37,70 @@ struct ipc_namespace init_ipc_ns = {
atomic_t nr_ipc_ns = ATOMIC_INIT(1);
struct msg_msgseg {
- struct msg_msgseg* next;
+ struct msg_msgseg *next;
/* the next part of the message follows immediately */
};
-#define DATALEN_MSG (PAGE_SIZE-sizeof(struct msg_msg))
-#define DATALEN_SEG (PAGE_SIZE-sizeof(struct msg_msgseg))
+#define DATALEN_MSG (int)(PAGE_SIZE-sizeof(struct msg_msg))
+#define DATALEN_SEG (int)(PAGE_SIZE-sizeof(struct msg_msgseg))
-struct msg_msg *load_msg(const void __user *src, int len)
+
+static struct msg_msg *alloc_msg(int len)
{
struct msg_msg *msg;
struct msg_msgseg **pseg;
- int err;
int alen;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
msg = kmalloc(sizeof(*msg) + alen, GFP_KERNEL);
if (msg == NULL)
- return ERR_PTR(-ENOMEM);
+ return NULL;
msg->next = NULL;
msg->security = NULL;
- if (copy_from_user(msg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
-
len -= alen;
- src = ((char __user *)src) + alen;
pseg = &msg->next;
while (len > 0) {
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- seg = kmalloc(sizeof(*seg) + alen,
- GFP_KERNEL);
- if (seg == NULL) {
- err = -ENOMEM;
+ alen = min(len, DATALEN_SEG);
+ seg = kmalloc(sizeof(*seg) + alen, GFP_KERNEL);
+ if (seg == NULL)
goto out_err;
- }
*pseg = seg;
seg->next = NULL;
- if (copy_from_user(seg + 1, src, alen)) {
- err = -EFAULT;
- goto out_err;
- }
pseg = &seg->next;
len -= alen;
- src = ((char __user *)src) + alen;
+ }
+
+ return msg;
+
+out_err:
+ free_msg(msg);
+ return NULL;
+}
+
+struct msg_msg *load_msg(const void __user *src, int len)
+{
+ struct msg_msg *msg;
+ struct msg_msgseg *seg;
+ int err = -EFAULT;
+ int alen;
+
+ msg = alloc_msg(len);
+ if (msg == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ alen = min(len, DATALEN_MSG);
+ if (copy_from_user(msg + 1, src, alen))
+ goto out_err;
+
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ src = (char __user *)src + alen;
+ alen = min(len, DATALEN_SEG);
+ if (copy_from_user(seg + 1, src, alen))
+ goto out_err;
}
err = security_msg_msg_alloc(msg);
@@ -113,23 +124,16 @@ struct msg_msg *copy_msg(struct msg_msg *src, struct msg_msg *dst)
if (src->m_ts > dst->m_ts)
return ERR_PTR(-EINVAL);
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
-
+ alen = min(len, DATALEN_MSG);
memcpy(dst + 1, src + 1, alen);
- len -= alen;
- dst_pseg = dst->next;
- src_pseg = src->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
- memcpy(dst_pseg + 1, src_pseg + 1, alen);
- dst_pseg = dst_pseg->next;
+ for (dst_pseg = dst->next, src_pseg = src->next;
+ src_pseg != NULL;
+ dst_pseg = dst_pseg->next, src_pseg = src_pseg->next) {
+
len -= alen;
- src_pseg = src_pseg->next;
+ alen = min(len, DATALEN_SEG);
+ memcpy(dst_pseg + 1, src_pseg + 1, alen);
}
dst->m_type = src->m_type;
@@ -148,24 +152,16 @@ int store_msg(void __user *dest, struct msg_msg *msg, int len)
int alen;
struct msg_msgseg *seg;
- alen = len;
- if (alen > DATALEN_MSG)
- alen = DATALEN_MSG;
+ alen = min(len, DATALEN_MSG);
if (copy_to_user(dest, msg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = msg->next;
- while (len > 0) {
- alen = len;
- if (alen > DATALEN_SEG)
- alen = DATALEN_SEG;
+ for (seg = msg->next; seg != NULL; seg = seg->next) {
+ len -= alen;
+ dest = (char __user *)dest + alen;
+ alen = min(len, DATALEN_SEG);
if (copy_to_user(dest, seg + 1, alen))
return -1;
- len -= alen;
- dest = ((char __user *)dest) + alen;
- seg = seg->next;
}
return 0;
}
diff --git a/ipc/sem.c b/ipc/sem.c
index 5b167d00efa6..a9234c7b4cac 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -61,8 +61,8 @@
* - A woken up task may not even touch the semaphore array anymore, it may
* have been destroyed already by a semctl(RMID).
* - The synchronizations between wake-ups due to a timeout/signal and a
- * wake-up due to a completed semaphore operation is achieved by using an
- * intermediate state (IN_WAKEUP).
+ * wake-up due to a completed semaphore operation is achieved by using a
+ * special wakeup scheme (queuewakeup_wait and support functions)
* - UNDO values are stored in an array (one per process and per
* semaphore array, lazily allocated). For backwards compatibility, multiple
* modes for the UNDO variables are supported (per process, per thread)
@@ -90,6 +90,135 @@
#include <asm/uaccess.h>
#include "util.h"
+
+#ifdef CONFIG_PREEMPT_RT_BASE
+ #define SYSVSEM_COMPLETION 1
+#else
+ #define SYSVSEM_CUSTOM 1
+#endif
+
+#ifdef SYSVSEM_COMPLETION
+ /* Using a completion causes some overhead, but avoids a busy loop
+ * that increases the worst case latency.
+ */
+ struct queue_done {
+ struct completion done;
+ };
+
+ static void queuewakeup_prepare(void)
+ {
+ /* no preparation necessary */
+ }
+
+ static void queuewakeup_completed(void)
+ {
+ /* empty */
+ }
+
+ static void queuewakeup_block(struct queue_done *qd)
+ {
+ /* empty */
+ }
+
+ static void queuewakeup_handsoff(struct queue_done *qd)
+ {
+ complete_all(&qd->done);
+ }
+
+ static void queuewakeup_init(struct queue_done *qd)
+ {
+ init_completion(&qd->done);
+ }
+
+ static void queuewakeup_wait(struct queue_done *qd)
+ {
+ wait_for_completion(&qd->done);
+ }
+
+#elif defined(SYSVSEM_SPINLOCK)
+ /* Note: Spinlocks do not work because:
+ * - lockdep complains [could be fixed]
+ * - only 255 concurrent spin_lock() calls are permitted, then the
+ * preempt-counter overflows
+ */
+#error SYSVSEM_SPINLOCK is a prove of concept, does not work.
+ struct queue_done {
+ spinlock_t done;
+ };
+
+ static void queuewakeup_prepare(void)
+ {
+ /* empty */
+ }
+
+ static void queuewakeup_completed(void)
+ {
+ /* empty */
+ }
+
+ static void queuewakeup_block(struct queue_done *qd)
+ {
+ BUG_ON(spin_is_locked(&qd->done));
+ spin_lock(&qd->done);
+ }
+
+ static void queuewakeup_handsoff(struct queue_done *qd)
+ {
+ spin_unlock(&qd->done);
+ }
+
+ static void queuewakeup_init(struct queue_done *qd)
+ {
+ spin_lock_init(&qd->done);
+ }
+
+ static void queuewakeup_wait(struct queue_done *qd)
+ {
+ spin_unlock_wait(&qd->done);
+ }
+#else
+ struct queue_done {
+ atomic_t done;
+ };
+
+ static void queuewakeup_prepare(void)
+ {
+ preempt_disable();
+ }
+
+ static void queuewakeup_completed(void)
+ {
+ preempt_enable();
+ }
+
+ static void queuewakeup_block(struct queue_done *qd)
+ {
+ BUG_ON(atomic_read(&qd->done) != 1);
+ atomic_set(&qd->done, 2);
+ }
+
+ static void queuewakeup_handsoff(struct queue_done *qd)
+ {
+ BUG_ON(atomic_read(&qd->done) != 2);
+ smp_mb();
+ atomic_set(&qd->done, 1);
+ }
+
+ static void queuewakeup_init(struct queue_done *qd)
+ {
+ atomic_set(&qd->done, 1);
+ }
+
+ static void queuewakeup_wait(struct queue_done *qd)
+ {
+ while (atomic_read(&qd->done) != 1)
+ cpu_relax();
+
+ smp_mb();
+ }
+#endif
+
+
/* One semaphore structure for each semaphore in the system. */
struct sem {
int semval; /* current value */
@@ -108,6 +237,7 @@ struct sem_queue {
struct sembuf *sops; /* array of pending operations */
int nsops; /* number of operations */
int alter; /* does *sops alter the array? */
+ struct queue_done done; /* completion synchronization */
};
/* Each task has a list of undo requests. They are executed automatically
@@ -245,23 +375,27 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
* - queue.status is initialized to -EINTR before blocking.
* - wakeup is performed by
* * unlinking the queue entry from sma->sem_pending
- * * setting queue.status to IN_WAKEUP
- * This is the notification for the blocked thread that a
- * result value is imminent.
+ * * setting queue.status to the actual result code
+ * This is the notification for the blocked thread that someone
+ * (usually: update_queue()) completed the semtimedop() operation.
* * call wake_up_process
- * * set queue.status to the final value.
+ * * queuewakeup_handsoff(&q->done);
* - the previously blocked thread checks queue.status:
- * * if it's IN_WAKEUP, then it must wait until the value changes
- * * if it's not -EINTR, then the operation was completed by
- * update_queue. semtimedop can return queue.status without
- * performing any operation on the sem array.
- * * otherwise it must acquire the spinlock and check what's up.
+ * * if it's not -EINTR, then someone completed the operation.
+ * First, queuewakeup_wait() must be called. Afterwards,
+ * semtimedop must return queue.status without performing any
+ * operation on the sem array.
+ * - otherwise it must acquire the spinlock and repeat the test
+ * - If it is still -EINTR, then no update_queue() completed the
+ * operation, thus semtimedop() can proceed normally.
*
- * The two-stage algorithm is necessary to protect against the following
+ * queuewakeup_wait() is necessary to protect against the following
* races:
* - if queue.status is set after wake_up_process, then the woken up idle
* thread could race forward and try (and fail) to acquire sma->lock
- * before update_queue had a chance to set queue.status
+ * before update_queue had a chance to set queue.status.
+ * More importantly, it would mean that wake_up_process must be done
+ * while holding sma->lock, i.e. this would reduce the scalability.
* - if queue.status is written before wake_up_process and if the
* blocked process is woken up by a signal between writing
* queue.status and the wake_up_process, then the woken up
@@ -271,7 +405,6 @@ static inline void sem_rmid(struct ipc_namespace *ns, struct sem_array *s)
* (yes, this happened on s390 with sysv msg).
*
*/
-#define IN_WAKEUP 1
/**
* newary - Create a new semaphore set
@@ -461,15 +594,11 @@ undo:
static void wake_up_sem_queue_prepare(struct list_head *pt,
struct sem_queue *q, int error)
{
- if (list_empty(pt)) {
- /*
- * Hold preempt off so that we don't get preempted and have the
- * wakee busy-wait until we're scheduled back on.
- */
- preempt_disable();
- }
- q->status = IN_WAKEUP;
- q->pid = error;
+ if (list_empty(pt))
+ queuewakeup_prepare();
+
+ queuewakeup_block(&q->done);
+ q->status = error;
list_add_tail(&q->simple_list, pt);
}
@@ -480,8 +609,8 @@ static void wake_up_sem_queue_prepare(struct list_head *pt,
*
* Do the actual wake-up.
* The function is called without any locks held, thus the semaphore array
- * could be destroyed already and the tasks can disappear as soon as the
- * status is set to the actual return code.
+ * could be destroyed already and the tasks can disappear as soon as
+ * queuewakeup_handsoff() is called.
*/
static void wake_up_sem_queue_do(struct list_head *pt)
{
@@ -491,12 +620,11 @@ static void wake_up_sem_queue_do(struct list_head *pt)
did_something = !list_empty(pt);
list_for_each_entry_safe(q, t, pt, simple_list) {
wake_up_process(q->sleeper);
- /* q can disappear immediately after writing q->status. */
- smp_wmb();
- q->status = q->pid;
+ /* q can disappear immediately after completing q->done */
+ queuewakeup_handsoff(&q->done);
}
if (did_something)
- preempt_enable();
+ queuewakeup_completed();
}
static void unlink_queue(struct sem_array *sma, struct sem_queue *q)
@@ -1331,33 +1459,6 @@ out:
return un;
}
-
-/**
- * get_queue_result - Retrieve the result code from sem_queue
- * @q: Pointer to queue structure
- *
- * Retrieve the return code from the pending queue. If IN_WAKEUP is found in
- * q->status, then we must loop until the value is replaced with the final
- * value: This may happen if a task is woken up by an unrelated event (e.g.
- * signal) and in parallel the task is woken up by another task because it got
- * the requested semaphores.
- *
- * The function can be called with or without holding the semaphore spinlock.
- */
-static int get_queue_result(struct sem_queue *q)
-{
- int error;
-
- error = q->status;
- while (unlikely(error == IN_WAKEUP)) {
- cpu_relax();
- error = q->status;
- }
-
- return error;
-}
-
-
SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
unsigned, nsops, const struct timespec __user *, timeout)
{
@@ -1503,6 +1604,7 @@ SYSCALL_DEFINE4(semtimedop, int, semid, struct sembuf __user *, tsops,
queue.status = -EINTR;
queue.sleeper = current;
+ queuewakeup_init(&queue.done);
sleep_again:
current->state = TASK_INTERRUPTIBLE;
@@ -1513,17 +1615,14 @@ sleep_again:
else
schedule();
- error = get_queue_result(&queue);
+ error = queue.status;
if (error != -EINTR) {
/* fast path: update_queue already obtained all requested
- * resources.
- * Perform a smp_mb(): User space could assume that semop()
- * is a memory barrier: Without the mb(), the cpu could
- * speculatively read in user space stale data that was
- * overwritten by the previous owner of the semaphore.
+ * resources. Just ensure that update_queue completed
+ * it's access to &queue.
*/
- smp_mb();
+ queuewakeup_wait(&queue.done);
goto out_free;
}
@@ -1533,23 +1632,16 @@ sleep_again:
/*
* Wait until it's guaranteed that no wakeup_sem_queue_do() is ongoing.
*/
- error = get_queue_result(&queue);
-
- /*
- * Array removed? If yes, leave without sem_unlock().
- */
- if (IS_ERR(sma)) {
- goto out_free;
- }
-
-
- /*
- * If queue.status != -EINTR we are woken up by another process.
- * Leave without unlink_queue(), but with sem_unlock().
- */
-
+ error = queue.status;
if (error != -EINTR) {
- goto out_unlock_free;
+ /* If there is a return code, then we can leave immediately. */
+ if (!IS_ERR(sma)) {
+ /* sem_lock() succeeded - then unlock */
+ sem_unlock(sma);
+ }
+ /* Except that we must wait for the hands-off */
+ queuewakeup_wait(&queue.done);
+ goto out_free;
}
/*
diff --git a/kernel/acct.c b/kernel/acct.c
index b9bd7f098ee5..a370ccb43840 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -201,7 +201,8 @@ static int acct_on(struct filename *pathname)
struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
- file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+ file = file_open_name(pathname,
+ O_WRONLY|O_APPEND|O_LARGEFILE|O_NONBLOCK, 0);
if (IS_ERR(file))
return PTR_ERR(file);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 17f2099a7fe0..06aeb421d6b9 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5266,55 +5266,6 @@ struct cgroup_subsys_state *css_lookup(struct cgroup_subsys *ss, int id)
}
EXPORT_SYMBOL_GPL(css_lookup);
-/**
- * css_get_next - lookup next cgroup under specified hierarchy.
- * @ss: pointer to subsystem
- * @id: current position of iteration.
- * @root: pointer to css. search tree under this.
- * @foundid: position of found object.
- *
- * Search next css under the specified hierarchy of rootid. Calling under
- * rcu_read_lock() is necessary. Returns NULL if it reaches the end.
- */
-struct cgroup_subsys_state *
-css_get_next(struct cgroup_subsys *ss, int id,
- struct cgroup_subsys_state *root, int *foundid)
-{
- struct cgroup_subsys_state *ret = NULL;
- struct css_id *tmp;
- int tmpid;
- int rootid = css_id(root);
- int depth = css_depth(root);
-
- if (!rootid)
- return NULL;
-
- BUG_ON(!ss->use_id);
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- /* fill start point for scan */
- tmpid = id;
- while (1) {
- /*
- * scan next entry from bitmap(tree), tmpid is updated after
- * idr_get_next().
- */
- tmp = idr_get_next(&ss->idr, &tmpid);
- if (!tmp)
- break;
- if (tmp->depth >= depth && tmp->stack[depth] == rootid) {
- ret = rcu_dereference(tmp->css);
- if (ret) {
- *foundid = tmpid;
- break;
- }
- }
- /* continue to scan from next id */
- tmpid = tmpid + 1;
- }
- return ret;
-}
-
/*
* get corresponding css from file open on cgroupfs directory
*/
diff --git a/kernel/fork.c b/kernel/fork.c
index 7d40687b1434..48c89a053959 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -70,6 +70,7 @@
#include <linux/khugepaged.h>
#include <linux/signalfd.h>
#include <linux/uprobes.h>
+#include <linux/aio.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
@@ -364,8 +365,6 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
mm->locked_vm = 0;
mm->mmap = NULL;
mm->mmap_cache = NULL;
- mm->free_area_cache = oldmm->mmap_base;
- mm->cached_hole_size = ~0UL;
mm->map_count = 0;
cpumask_clear(mm_cpumask(mm));
mm->mm_rb = RB_ROOT;
@@ -539,8 +538,6 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
mm->nr_ptes = 0;
memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
spin_lock_init(&mm->page_table_lock);
- mm->free_area_cache = TASK_UNMAPPED_BASE;
- mm->cached_hole_size = ~0UL;
mm_init_aio(mm);
mm_init_owner(mm, p);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index bddd3d7a74b6..38f5fab4d5eb 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -786,7 +786,7 @@ static int kimage_load_normal_segment(struct kimage *image,
struct kexec_segment *segment)
{
unsigned long maddr;
- unsigned long ubytes, mbytes;
+ size_t ubytes, mbytes;
int result;
unsigned char __user *buf;
@@ -819,13 +819,9 @@ static int kimage_load_normal_segment(struct kimage *image,
/* Start with a clear page */
clear_page(ptr);
ptr += maddr & ~PAGE_MASK;
- mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
- if (mchunk > mbytes)
- mchunk = mbytes;
-
- uchunk = mchunk;
- if (uchunk > ubytes)
- uchunk = ubytes;
+ mchunk = min_t(size_t, mbytes,
+ PAGE_SIZE - (maddr & ~PAGE_MASK));
+ uchunk = min(ubytes, mchunk);
result = copy_from_user(ptr, buf, uchunk);
kunmap(page);
@@ -850,7 +846,7 @@ static int kimage_load_crash_segment(struct kimage *image,
* We do things a page at a time for the sake of kmap.
*/
unsigned long maddr;
- unsigned long ubytes, mbytes;
+ size_t ubytes, mbytes;
int result;
unsigned char __user *buf;
@@ -871,13 +867,10 @@ static int kimage_load_crash_segment(struct kimage *image,
}
ptr = kmap(page);
ptr += maddr & ~PAGE_MASK;
- mchunk = PAGE_SIZE - (maddr & ~PAGE_MASK);
- if (mchunk > mbytes)
- mchunk = mbytes;
-
- uchunk = mchunk;
- if (uchunk > ubytes) {
- uchunk = ubytes;
+ mchunk = min_t(size_t, mbytes,
+ PAGE_SIZE - (maddr & ~PAGE_MASK));
+ uchunk = min(ubytes, mchunk);
+ if (mchunk > uchunk) {
/* Zero the trailing part of the page */
memset(ptr + uchunk, 0, mchunk - uchunk);
}
@@ -1118,12 +1111,8 @@ void __weak crash_free_reserved_phys_range(unsigned long begin,
{
unsigned long addr;
- for (addr = begin; addr < end; addr += PAGE_SIZE) {
- ClearPageReserved(pfn_to_page(addr >> PAGE_SHIFT));
- init_page_count(pfn_to_page(addr >> PAGE_SHIFT));
- free_page((unsigned long)__va(addr));
- totalram_pages++;
- }
+ for (addr = begin; addr < end; addr += PAGE_SIZE)
+ free_reserved_page(pfn_to_page(addr >> PAGE_SHIFT));
}
int crash_shrink_memory(unsigned long new_size)
@@ -1452,14 +1441,13 @@ void vmcoreinfo_append_str(const char *fmt, ...)
{
va_list args;
char buf[0x50];
- int r;
+ size_t r;
va_start(args, fmt);
r = vsnprintf(buf, sizeof(buf), fmt, args);
va_end(args);
- if (r + vmcoreinfo_size > vmcoreinfo_max_size)
- r = vmcoreinfo_max_size - vmcoreinfo_size;
+ r = min(r, vmcoreinfo_max_size - vmcoreinfo_size);
memcpy(&vmcoreinfo_data[vmcoreinfo_size], buf, r);
@@ -1489,7 +1477,7 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_SYMBOL(swapper_pg_dir);
#endif
VMCOREINFO_SYMBOL(_stext);
- VMCOREINFO_SYMBOL(vmlist);
+ VMCOREINFO_SYMBOL(vmap_area_list);
#ifndef CONFIG_NEED_MULTIPLE_NODES
VMCOREINFO_SYMBOL(mem_map);
@@ -1527,7 +1515,8 @@ static int __init crash_save_vmcoreinfo_init(void)
VMCOREINFO_OFFSET(free_area, free_list);
VMCOREINFO_OFFSET(list_head, next);
VMCOREINFO_OFFSET(list_head, prev);
- VMCOREINFO_OFFSET(vm_struct, addr);
+ VMCOREINFO_OFFSET(vmap_area, va_start);
+ VMCOREINFO_OFFSET(vmap_area, list);
VMCOREINFO_LENGTH(zone.free_area, MAX_ORDER);
log_buf_kexec_setup();
VMCOREINFO_LENGTH(free_area.free_list, MIGRATE_TYPES);
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 56dd34976d7b..1296e72e4161 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -77,6 +77,7 @@ static void free_modprobe_argv(struct subprocess_info *info)
static int call_modprobe(char *module_name, int wait)
{
+ struct subprocess_info *info;
static char *envp[] = {
"HOME=/",
"TERM=linux",
@@ -98,8 +99,15 @@ static int call_modprobe(char *module_name, int wait)
argv[3] = module_name; /* check free_modprobe_argv() */
argv[4] = NULL;
- return call_usermodehelper_fns(modprobe_path, argv, envp,
- wait | UMH_KILLABLE, NULL, free_modprobe_argv, NULL);
+ info = call_usermodehelper_setup(modprobe_path, argv, envp, GFP_KERNEL,
+ NULL, free_modprobe_argv, NULL);
+ if (!info)
+ goto free_module_name;
+
+ return call_usermodehelper_exec(info, wait | UMH_KILLABLE);
+
+free_module_name:
+ kfree(module_name);
free_argv:
kfree(argv);
out:
@@ -502,14 +510,28 @@ static void helper_unlock(void)
* @argv: arg vector for process
* @envp: environment for process
* @gfp_mask: gfp mask for memory allocation
+ * @cleanup: a cleanup function
+ * @init: an init function
+ * @data: arbitrary context sensitive data
*
* Returns either %NULL on allocation failure, or a subprocess_info
* structure. This should be passed to call_usermodehelper_exec to
* exec the process and free the structure.
+ *
+ * The init function is used to customize the helper process prior to
+ * exec. A non-zero return code causes the process to error out, exit,
+ * and return the failure to the calling process
+ *
+ * The cleanup function is just before ethe subprocess_info is about to
+ * be freed. This can be used for freeing the argv and envp. The
+ * Function must be runnable in either a process context or the
+ * context in which call_usermodehelper_exec is called.
*/
-static
struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
- char **envp, gfp_t gfp_mask)
+ char **envp, gfp_t gfp_mask,
+ int (*init)(struct subprocess_info *info, struct cred *new),
+ void (*cleanup)(struct subprocess_info *info),
+ void *data)
{
struct subprocess_info *sub_info;
sub_info = kzalloc(sizeof(struct subprocess_info), gfp_mask);
@@ -520,50 +542,27 @@ struct subprocess_info *call_usermodehelper_setup(char *path, char **argv,
sub_info->path = path;
sub_info->argv = argv;
sub_info->envp = envp;
+
+ sub_info->cleanup = cleanup;
+ sub_info->init = init;
+ sub_info->data = data;
out:
return sub_info;
}
-
-/**
- * call_usermodehelper_setfns - set a cleanup/init function
- * @info: a subprocess_info returned by call_usermodehelper_setup
- * @cleanup: a cleanup function
- * @init: an init function
- * @data: arbitrary context sensitive data
- *
- * The init function is used to customize the helper process prior to
- * exec. A non-zero return code causes the process to error out, exit,
- * and return the failure to the calling process
- *
- * The cleanup function is just before ethe subprocess_info is about to
- * be freed. This can be used for freeing the argv and envp. The
- * Function must be runnable in either a process context or the
- * context in which call_usermodehelper_exec is called.
- */
-static
-void call_usermodehelper_setfns(struct subprocess_info *info,
- int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *info),
- void *data)
-{
- info->cleanup = cleanup;
- info->init = init;
- info->data = data;
-}
+EXPORT_SYMBOL(call_usermodehelper_setup);
/**
* call_usermodehelper_exec - start a usermode application
* @sub_info: information about the subprocessa
* @wait: wait for the application to finish and return status.
- * when -1 don't wait at all, but you get no useful error back when
- * the program couldn't be exec'ed. This makes it safe to call
+ * when UMH_NO_WAIT don't wait at all, but you get no useful error back
+ * when the program couldn't be exec'ed. This makes it safe to call
* from interrupt context.
*
* Runs a user-space application. The application is started
* asynchronously if wait is not set, and runs as a child of keventd.
* (ie. it runs with full root capabilities).
*/
-static
int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
{
DECLARE_COMPLETION_ONSTACK(done);
@@ -615,31 +614,34 @@ unlock:
helper_unlock();
return retval;
}
+EXPORT_SYMBOL(call_usermodehelper_exec);
-/*
- * call_usermodehelper_fns() will not run the caller-provided cleanup function
- * if a memory allocation failure is experienced. So the caller might need to
- * check the call_usermodehelper_fns() return value: if it is -ENOMEM, perform
- * the necessaary cleanup within the caller.
+/**
+ * call_usermodehelper() - prepare and start a usermode application
+ * @path: path to usermode executable
+ * @argv: arg vector for process
+ * @envp: environment for process
+ * @wait: wait for the application to finish and return status.
+ * when UMH_NO_WAIT don't wait at all, but you get no useful error back
+ * when the program couldn't be exec'ed. This makes it safe to call
+ * from interrupt context.
+ *
+ * This function is the equivalent to use call_usermodehelper_setup() and
+ * call_usermodehelper_exec().
*/
-int call_usermodehelper_fns(
- char *path, char **argv, char **envp, int wait,
- int (*init)(struct subprocess_info *info, struct cred *new),
- void (*cleanup)(struct subprocess_info *), void *data)
+int call_usermodehelper(char *path, char **argv, char **envp, int wait)
{
struct subprocess_info *info;
gfp_t gfp_mask = (wait == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
- info = call_usermodehelper_setup(path, argv, envp, gfp_mask);
-
+ info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
+ NULL, NULL, NULL);
if (info == NULL)
return -ENOMEM;
- call_usermodehelper_setfns(info, init, cleanup, data);
-
return call_usermodehelper_exec(info, wait);
}
-EXPORT_SYMBOL(call_usermodehelper_fns);
+EXPORT_SYMBOL(call_usermodehelper);
static int proc_cap_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
diff --git a/kernel/kthread.c b/kernel/kthread.c
index a2fbbb782bad..b9db231032f4 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -52,8 +52,21 @@ enum KTHREAD_BITS {
KTHREAD_IS_PARKED,
};
-#define to_kthread(tsk) \
- container_of((tsk)->vfork_done, struct kthread, exited)
+#define __to_kthread(vfork) \
+ container_of(vfork, struct kthread, exited)
+
+static inline struct kthread *to_kthread(struct task_struct *k)
+{
+ return __to_kthread(k->vfork_done);
+}
+
+static struct kthread *to_live_kthread(struct task_struct *k)
+{
+ struct completion *vfork = ACCESS_ONCE(k->vfork_done);
+ if (likely(vfork))
+ return __to_kthread(vfork);
+ return NULL;
+}
/**
* kthread_should_stop - should this kthread return now?
@@ -311,19 +324,6 @@ struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data),
return p;
}
-static struct kthread *task_get_live_kthread(struct task_struct *k)
-{
- struct kthread *kthread;
-
- get_task_struct(k);
- kthread = to_kthread(k);
- /* It might have exited */
- barrier();
- if (k->vfork_done != NULL)
- return kthread;
- return NULL;
-}
-
/**
* kthread_unpark - unpark a thread created by kthread_create().
* @k: thread created by kthread_create().
@@ -334,7 +334,7 @@ static struct kthread *task_get_live_kthread(struct task_struct *k)
*/
void kthread_unpark(struct task_struct *k)
{
- struct kthread *kthread = task_get_live_kthread(k);
+ struct kthread *kthread = to_live_kthread(k);
if (kthread) {
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
@@ -350,7 +350,6 @@ void kthread_unpark(struct task_struct *k)
wake_up_process(k);
}
}
- put_task_struct(k);
}
/**
@@ -367,7 +366,7 @@ void kthread_unpark(struct task_struct *k)
*/
int kthread_park(struct task_struct *k)
{
- struct kthread *kthread = task_get_live_kthread(k);
+ struct kthread *kthread = to_live_kthread(k);
int ret = -ENOSYS;
if (kthread) {
@@ -380,7 +379,6 @@ int kthread_park(struct task_struct *k)
}
ret = 0;
}
- put_task_struct(k);
return ret;
}
@@ -401,10 +399,13 @@ int kthread_park(struct task_struct *k)
*/
int kthread_stop(struct task_struct *k)
{
- struct kthread *kthread = task_get_live_kthread(k);
+ struct kthread *kthread;
int ret;
trace_sched_kthread_stop(k);
+
+ get_task_struct(k);
+ kthread = to_live_kthread(k);
if (kthread) {
set_bit(KTHREAD_SHOULD_STOP, &kthread->flags);
clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags);
@@ -412,10 +413,9 @@ int kthread_stop(struct task_struct *k)
wait_for_completion(&kthread->exited);
}
ret = k->exit_code;
-
put_task_struct(k);
- trace_sched_kthread_stop_ret(ret);
+ trace_sched_kthread_stop_ret(ret);
return ret;
}
EXPORT_SYMBOL(kthread_stop);
diff --git a/kernel/lglock.c b/kernel/lglock.c
index 6535a667a5a7..86ae2aebf004 100644
--- a/kernel/lglock.c
+++ b/kernel/lglock.c
@@ -21,7 +21,7 @@ void lg_local_lock(struct lglock *lg)
arch_spinlock_t *lock;
preempt_disable();
- rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
arch_spin_lock(lock);
}
@@ -31,7 +31,7 @@ void lg_local_unlock(struct lglock *lg)
{
arch_spinlock_t *lock;
- rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = this_cpu_ptr(lg->lock);
arch_spin_unlock(lock);
preempt_enable();
@@ -43,7 +43,7 @@ void lg_local_lock_cpu(struct lglock *lg, int cpu)
arch_spinlock_t *lock;
preempt_disable();
- rwlock_acquire_read(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ lock_acquire_shared(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
arch_spin_lock(lock);
}
@@ -53,7 +53,7 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu)
{
arch_spinlock_t *lock;
- rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ lock_release(&lg->lock_dep_map, 1, _RET_IP_);
lock = per_cpu_ptr(lg->lock, cpu);
arch_spin_unlock(lock);
preempt_enable();
@@ -65,7 +65,7 @@ void lg_global_lock(struct lglock *lg)
int i;
preempt_disable();
- rwlock_acquire(&lg->lock_dep_map, 0, 0, _RET_IP_);
+ lock_acquire_exclusive(&lg->lock_dep_map, 0, 0, NULL, _RET_IP_);
for_each_possible_cpu(i) {
arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
@@ -78,7 +78,7 @@ void lg_global_unlock(struct lglock *lg)
{
int i;
- rwlock_release(&lg->lock_dep_map, 1, _RET_IP_);
+ lock_release(&lg->lock_dep_map, 1, _RET_IP_);
for_each_possible_cpu(i) {
arch_spinlock_t *lock;
lock = per_cpu_ptr(lg->lock, i);
diff --git a/kernel/pid.c b/kernel/pid.c
index 047dc6264638..6283d6412aff 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -51,9 +51,6 @@ int pid_max = PID_MAX_DEFAULT;
int pid_max_min = RESERVED_PIDS + 1;
int pid_max_max = PID_MAX_LIMIT;
-#define BITS_PER_PAGE (PAGE_SIZE*8)
-#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1)
-
static inline int mk_pid(struct pid_namespace *pid_ns,
struct pidmap *map, int off)
{
@@ -183,15 +180,19 @@ static int alloc_pidmap(struct pid_namespace *pid_ns)
break;
}
if (likely(atomic_read(&map->nr_free))) {
- do {
+ for ( ; ; ) {
if (!test_and_set_bit(offset, map->page)) {
atomic_dec(&map->nr_free);
set_last_pid(pid_ns, last, pid);
return pid;
}
offset = find_next_offset(map, offset);
+ if (offset >= BITS_PER_PAGE)
+ break;
pid = mk_pid(pid_ns, map, offset);
- } while (offset < BITS_PER_PAGE && pid < pid_max);
+ if (pid >= pid_max)
+ break;
+ }
}
if (map < &pid_ns->pidmap[(pid_max-1)/BITS_PER_PAGE]) {
++map;
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index c1c3dc1c6023..f158e271fe44 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -19,8 +19,6 @@
#include <linux/reboot.h>
#include <linux/export.h>
-#define BITS_PER_PAGE (PAGE_SIZE*8)
-
struct pid_cache {
int nr_ids;
char name[16];
diff --git a/kernel/posix-cpu-timers.c b/kernel/posix-cpu-timers.c
index 8fd709c9bb58..edf94b66d45b 100644
--- a/kernel/posix-cpu-timers.c
+++ b/kernel/posix-cpu-timers.c
@@ -49,59 +49,28 @@ static int check_clock(const clockid_t which_clock)
return error;
}
-static inline union cpu_time_count
+static inline unsigned long long
timespec_to_sample(const clockid_t which_clock, const struct timespec *tp)
{
- union cpu_time_count ret;
- ret.sched = 0; /* high half always zero when .cpu used */
+ unsigned long long ret;
+
+ ret = 0; /* high half always zero when .cpu used */
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- ret.sched = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
+ ret = (unsigned long long)tp->tv_sec * NSEC_PER_SEC + tp->tv_nsec;
} else {
- ret.cpu = timespec_to_cputime(tp);
+ ret = cputime_to_expires(timespec_to_cputime(tp));
}
return ret;
}
static void sample_to_timespec(const clockid_t which_clock,
- union cpu_time_count cpu,
+ unsigned long long expires,
struct timespec *tp)
{
if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED)
- *tp = ns_to_timespec(cpu.sched);
+ *tp = ns_to_timespec(expires);
else
- cputime_to_timespec(cpu.cpu, tp);
-}
-
-static inline int cpu_time_before(const clockid_t which_clock,
- union cpu_time_count now,
- union cpu_time_count then)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- return now.sched < then.sched;
- } else {
- return now.cpu < then.cpu;
- }
-}
-static inline void cpu_time_add(const clockid_t which_clock,
- union cpu_time_count *acc,
- union cpu_time_count val)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- acc->sched += val.sched;
- } else {
- acc->cpu += val.cpu;
- }
-}
-static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
- union cpu_time_count a,
- union cpu_time_count b)
-{
- if (CPUCLOCK_WHICH(which_clock) == CPUCLOCK_SCHED) {
- a.sched -= b.sched;
- } else {
- a.cpu -= b.cpu;
- }
- return a;
+ cputime_to_timespec((__force cputime_t)expires, tp);
}
/*
@@ -109,65 +78,49 @@ static inline union cpu_time_count cpu_time_sub(const clockid_t which_clock,
* given the current clock sample.
*/
static void bump_cpu_timer(struct k_itimer *timer,
- union cpu_time_count now)
+ unsigned long long now)
{
int i;
+ unsigned long long delta, incr;
- if (timer->it.cpu.incr.sched == 0)
+ if (timer->it.cpu.incr == 0)
return;
- if (CPUCLOCK_WHICH(timer->it_clock) == CPUCLOCK_SCHED) {
- unsigned long long delta, incr;
+ if (now < timer->it.cpu.expires)
+ return;
- if (now.sched < timer->it.cpu.expires.sched)
- return;
- incr = timer->it.cpu.incr.sched;
- delta = now.sched + incr - timer->it.cpu.expires.sched;
- /* Don't use (incr*2 < delta), incr*2 might overflow. */
- for (i = 0; incr < delta - incr; i++)
- incr = incr << 1;
- for (; i >= 0; incr >>= 1, i--) {
- if (delta < incr)
- continue;
- timer->it.cpu.expires.sched += incr;
- timer->it_overrun += 1 << i;
- delta -= incr;
- }
- } else {
- cputime_t delta, incr;
+ incr = timer->it.cpu.incr;
+ delta = now + incr - timer->it.cpu.expires;
- if (now.cpu < timer->it.cpu.expires.cpu)
- return;
- incr = timer->it.cpu.incr.cpu;
- delta = now.cpu + incr - timer->it.cpu.expires.cpu;
- /* Don't use (incr*2 < delta), incr*2 might overflow. */
- for (i = 0; incr < delta - incr; i++)
- incr += incr;
- for (; i >= 0; incr = incr >> 1, i--) {
- if (delta < incr)
- continue;
- timer->it.cpu.expires.cpu += incr;
- timer->it_overrun += 1 << i;
- delta -= incr;
- }
+ /* Don't use (incr*2 < delta), incr*2 might overflow. */
+ for (i = 0; incr < delta - incr; i++)
+ incr = incr << 1;
+
+ for (; i >= 0; incr >>= 1, i--) {
+ if (delta < incr)
+ continue;
+
+ timer->it.cpu.expires += incr;
+ timer->it_overrun += 1 << i;
+ delta -= incr;
}
}
-static inline cputime_t prof_ticks(struct task_struct *p)
+static inline unsigned long long prof_ticks(struct task_struct *p)
{
cputime_t utime, stime;
task_cputime(p, &utime, &stime);
- return utime + stime;
+ return cputime_to_expires(utime + stime);
}
-static inline cputime_t virt_ticks(struct task_struct *p)
+static inline unsigned long long virt_ticks(struct task_struct *p)
{
cputime_t utime;
task_cputime(p, &utime, NULL);
- return utime;
+ return cputime_to_expires(utime);
}
static int
@@ -208,19 +161,19 @@ posix_cpu_clock_set(const clockid_t which_clock, const struct timespec *tp)
* Sample a per-thread clock for the given task.
*/
static int cpu_clock_sample(const clockid_t which_clock, struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
switch (CPUCLOCK_WHICH(which_clock)) {
default:
return -EINVAL;
case CPUCLOCK_PROF:
- cpu->cpu = prof_ticks(p);
+ *sample = prof_ticks(p);
break;
case CPUCLOCK_VIRT:
- cpu->cpu = virt_ticks(p);
+ *sample = virt_ticks(p);
break;
case CPUCLOCK_SCHED:
- cpu->sched = task_sched_runtime(p);
+ *sample = task_sched_runtime(p);
break;
}
return 0;
@@ -267,7 +220,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times)
*/
static int cpu_clock_sample_group(const clockid_t which_clock,
struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
struct task_cputime cputime;
@@ -276,15 +229,15 @@ static int cpu_clock_sample_group(const clockid_t which_clock,
return -EINVAL;
case CPUCLOCK_PROF:
thread_group_cputime(p, &cputime);
- cpu->cpu = cputime.utime + cputime.stime;
+ *sample = cputime_to_expires(cputime.utime + cputime.stime);
break;
case CPUCLOCK_VIRT:
thread_group_cputime(p, &cputime);
- cpu->cpu = cputime.utime;
+ *sample = cputime_to_expires(cputime.utime);
break;
case CPUCLOCK_SCHED:
thread_group_cputime(p, &cputime);
- cpu->sched = cputime.sum_exec_runtime;
+ *sample = cputime.sum_exec_runtime;
break;
}
return 0;
@@ -295,7 +248,7 @@ static int posix_cpu_clock_get(const clockid_t which_clock, struct timespec *tp)
{
const pid_t pid = CPUCLOCK_PID(which_clock);
int error = -EINVAL;
- union cpu_time_count rtn;
+ unsigned long long rtn;
if (pid == 0) {
/*
@@ -429,6 +382,21 @@ static int posix_cpu_timer_del(struct k_itimer *timer)
return ret;
}
+static void cleanup_timers_list(struct list_head *head,
+ unsigned long long curr)
+{
+ struct cpu_timer_list *timer, *next;
+
+ list_for_each_entry_safe(timer, next, head, entry) {
+ list_del_init(&timer->entry);
+ if (timer->expires < curr) {
+ timer->expires = 0;
+ } else {
+ timer->expires -= curr;
+ }
+ }
+}
+
/*
* Clean out CPU timers still ticking when a thread exited. The task
* pointer is cleared, and the expiry time is replaced with the residual
@@ -439,37 +407,12 @@ static void cleanup_timers(struct list_head *head,
cputime_t utime, cputime_t stime,
unsigned long long sum_exec_runtime)
{
- struct cpu_timer_list *timer, *next;
- cputime_t ptime = utime + stime;
-
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (timer->expires.cpu < ptime) {
- timer->expires.cpu = 0;
- } else {
- timer->expires.cpu -= ptime;
- }
- }
- ++head;
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (timer->expires.cpu < utime) {
- timer->expires.cpu = 0;
- } else {
- timer->expires.cpu -= utime;
- }
- }
+ cputime_t ptime = utime + stime;
- ++head;
- list_for_each_entry_safe(timer, next, head, entry) {
- list_del_init(&timer->entry);
- if (timer->expires.sched < sum_exec_runtime) {
- timer->expires.sched = 0;
- } else {
- timer->expires.sched -= sum_exec_runtime;
- }
- }
+ cleanup_timers_list(head, cputime_to_expires(ptime));
+ cleanup_timers_list(++head, cputime_to_expires(utime));
+ cleanup_timers_list(++head, sum_exec_runtime);
}
/*
@@ -499,7 +442,7 @@ void posix_cpu_timers_exit_group(struct task_struct *tsk)
tsk->se.sum_exec_runtime + sig->sum_sched_runtime);
}
-static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
+static void clear_dead_task(struct k_itimer *timer, unsigned long long now)
{
/*
* That's all for this thread or process.
@@ -507,9 +450,7 @@ static void clear_dead_task(struct k_itimer *timer, union cpu_time_count now)
*/
put_task_struct(timer->it.cpu.task);
timer->it.cpu.task = NULL;
- timer->it.cpu.expires = cpu_time_sub(timer->it_clock,
- timer->it.cpu.expires,
- now);
+ timer->it.cpu.expires -= now;
}
static inline int expires_gt(cputime_t expires, cputime_t new_exp)
@@ -541,14 +482,14 @@ static void arm_timer(struct k_itimer *timer)
listpos = head;
list_for_each_entry(next, head, entry) {
- if (cpu_time_before(timer->it_clock, nt->expires, next->expires))
+ if (nt->expires < next->expires)
break;
listpos = &next->entry;
}
list_add(&nt->entry, listpos);
if (listpos == head) {
- union cpu_time_count *exp = &nt->expires;
+ unsigned long long exp = nt->expires;
/*
* We are the new earliest-expiring POSIX 1.b timer, hence
@@ -559,17 +500,17 @@ static void arm_timer(struct k_itimer *timer)
switch (CPUCLOCK_WHICH(timer->it_clock)) {
case CPUCLOCK_PROF:
- if (expires_gt(cputime_expires->prof_exp, exp->cpu))
- cputime_expires->prof_exp = exp->cpu;
+ if (expires_gt(cputime_expires->prof_exp, expires_to_cputime(exp)))
+ cputime_expires->prof_exp = expires_to_cputime(exp);
break;
case CPUCLOCK_VIRT:
- if (expires_gt(cputime_expires->virt_exp, exp->cpu))
- cputime_expires->virt_exp = exp->cpu;
+ if (expires_gt(cputime_expires->virt_exp, expires_to_cputime(exp)))
+ cputime_expires->virt_exp = expires_to_cputime(exp);
break;
case CPUCLOCK_SCHED:
if (cputime_expires->sched_exp == 0 ||
- cputime_expires->sched_exp > exp->sched)
- cputime_expires->sched_exp = exp->sched;
+ cputime_expires->sched_exp > exp)
+ cputime_expires->sched_exp = exp;
break;
}
}
@@ -584,20 +525,20 @@ static void cpu_timer_fire(struct k_itimer *timer)
/*
* User don't want any signal.
*/
- timer->it.cpu.expires.sched = 0;
+ timer->it.cpu.expires = 0;
} else if (unlikely(timer->sigq == NULL)) {
/*
* This a special case for clock_nanosleep,
* not a normal timer from sys_timer_create.
*/
wake_up_process(timer->it_process);
- timer->it.cpu.expires.sched = 0;
- } else if (timer->it.cpu.incr.sched == 0) {
+ timer->it.cpu.expires = 0;
+ } else if (timer->it.cpu.incr == 0) {
/*
* One-shot timer. Clear it as soon as it's fired.
*/
posix_timer_event(timer, 0);
- timer->it.cpu.expires.sched = 0;
+ timer->it.cpu.expires = 0;
} else if (posix_timer_event(timer, ++timer->it_requeue_pending)) {
/*
* The signal did not get queued because the signal
@@ -615,7 +556,7 @@ static void cpu_timer_fire(struct k_itimer *timer)
*/
static int cpu_timer_sample_group(const clockid_t which_clock,
struct task_struct *p,
- union cpu_time_count *cpu)
+ unsigned long long *sample)
{
struct task_cputime cputime;
@@ -624,13 +565,13 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
default:
return -EINVAL;
case CPUCLOCK_PROF:
- cpu->cpu = cputime.utime + cputime.stime;
+ *sample = cputime_to_expires(cputime.utime + cputime.stime);
break;
case CPUCLOCK_VIRT:
- cpu->cpu = cputime.utime;
+ *sample = cputime_to_expires(cputime.utime);
break;
case CPUCLOCK_SCHED:
- cpu->sched = cputime.sum_exec_runtime + task_delta_exec(p);
+ *sample = cputime.sum_exec_runtime + task_delta_exec(p);
break;
}
return 0;
@@ -646,7 +587,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
struct itimerspec *new, struct itimerspec *old)
{
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count old_expires, new_expires, old_incr, val;
+ unsigned long long old_expires, new_expires, old_incr, val;
int ret;
if (unlikely(p == NULL)) {
@@ -701,7 +642,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
}
if (old) {
- if (old_expires.sched == 0) {
+ if (old_expires == 0) {
old->it_value.tv_sec = 0;
old->it_value.tv_nsec = 0;
} else {
@@ -716,11 +657,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* new setting.
*/
bump_cpu_timer(timer, val);
- if (cpu_time_before(timer->it_clock, val,
- timer->it.cpu.expires)) {
- old_expires = cpu_time_sub(
- timer->it_clock,
- timer->it.cpu.expires, val);
+ if (val < timer->it.cpu.expires) {
+ old_expires = timer->it.cpu.expires - val;
sample_to_timespec(timer->it_clock,
old_expires,
&old->it_value);
@@ -743,8 +681,8 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
goto out;
}
- if (new_expires.sched != 0 && !(flags & TIMER_ABSTIME)) {
- cpu_time_add(timer->it_clock, &new_expires, val);
+ if (new_expires != 0 && !(flags & TIMER_ABSTIME)) {
+ new_expires += val;
}
/*
@@ -753,8 +691,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
* arm the timer (we'll just fake it for timer_gettime).
*/
timer->it.cpu.expires = new_expires;
- if (new_expires.sched != 0 &&
- cpu_time_before(timer->it_clock, val, new_expires)) {
+ if (new_expires != 0 && val < new_expires) {
arm_timer(timer);
}
@@ -778,8 +715,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
timer->it_overrun_last = 0;
timer->it_overrun = -1;
- if (new_expires.sched != 0 &&
- !cpu_time_before(timer->it_clock, val, new_expires)) {
+ if (new_expires != 0 && !(val < new_expires)) {
/*
* The designated time already passed, so we notify
* immediately, even if the thread never runs to
@@ -799,7 +735,7 @@ static int posix_cpu_timer_set(struct k_itimer *timer, int flags,
static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
{
- union cpu_time_count now;
+ unsigned long long now;
struct task_struct *p = timer->it.cpu.task;
int clear_dead;
@@ -809,7 +745,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
sample_to_timespec(timer->it_clock,
timer->it.cpu.incr, &itp->it_interval);
- if (timer->it.cpu.expires.sched == 0) { /* Timer not armed at all. */
+ if (timer->it.cpu.expires == 0) { /* Timer not armed at all. */
itp->it_value.tv_sec = itp->it_value.tv_nsec = 0;
return;
}
@@ -841,7 +777,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
*/
put_task_struct(p);
timer->it.cpu.task = NULL;
- timer->it.cpu.expires.sched = 0;
+ timer->it.cpu.expires = 0;
read_unlock(&tasklist_lock);
goto dead;
} else {
@@ -862,10 +798,9 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
goto dead;
}
- if (cpu_time_before(timer->it_clock, now, timer->it.cpu.expires)) {
+ if (now < timer->it.cpu.expires) {
sample_to_timespec(timer->it_clock,
- cpu_time_sub(timer->it_clock,
- timer->it.cpu.expires, now),
+ timer->it.cpu.expires - now,
&itp->it_value);
} else {
/*
@@ -877,6 +812,28 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
}
}
+static unsigned long long
+check_timers_list(struct list_head *timers,
+ struct list_head *firing,
+ unsigned long long curr)
+{
+ int maxfire = 20;
+
+ while (!list_empty(timers)) {
+ struct cpu_timer_list *t;
+
+ t = list_first_entry(timers, struct cpu_timer_list, entry);
+
+ if (!--maxfire || curr < t->expires)
+ return t->expires;
+
+ t->firing = 1;
+ list_move_tail(&t->entry, firing);
+ }
+
+ return 0;
+}
+
/*
* Check for any per-thread CPU timers that have fired and move them off
* the tsk->cpu_timers[N] list onto the firing list. Here we update the
@@ -885,54 +842,20 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp)
static void check_thread_timers(struct task_struct *tsk,
struct list_head *firing)
{
- int maxfire;
struct list_head *timers = tsk->cpu_timers;
struct signal_struct *const sig = tsk->signal;
+ struct task_cputime *tsk_expires = &tsk->cputime_expires;
+ unsigned long long expires;
unsigned long soft;
- maxfire = 20;
- tsk->cputime_expires.prof_exp = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || prof_ticks(tsk) < t->expires.cpu) {
- tsk->cputime_expires.prof_exp = t->expires.cpu;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ expires = check_timers_list(timers, firing, prof_ticks(tsk));
+ tsk_expires->prof_exp = expires_to_cputime(expires);
- ++timers;
- maxfire = 20;
- tsk->cputime_expires.virt_exp = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || virt_ticks(tsk) < t->expires.cpu) {
- tsk->cputime_expires.virt_exp = t->expires.cpu;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ expires = check_timers_list(++timers, firing, virt_ticks(tsk));
+ tsk_expires->virt_exp = expires_to_cputime(expires);
- ++timers;
- maxfire = 20;
- tsk->cputime_expires.sched_exp = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *t = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || tsk->se.sum_exec_runtime < t->expires.sched) {
- tsk->cputime_expires.sched_exp = t->expires.sched;
- break;
- }
- t->firing = 1;
- list_move_tail(&t->entry, firing);
- }
+ tsk_expires->sched_exp = check_timers_list(++timers, firing,
+ tsk->se.sum_exec_runtime);
/*
* Check for the special case thread timers.
@@ -980,7 +903,8 @@ static void stop_process_timers(struct signal_struct *sig)
static u32 onecputick;
static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it,
- cputime_t *expires, cputime_t cur_time, int signo)
+ unsigned long long *expires,
+ unsigned long long cur_time, int signo)
{
if (!it->expires)
return;
@@ -1031,9 +955,8 @@ static inline int task_cputime_zero(const struct task_cputime *cputime)
static void check_process_timers(struct task_struct *tsk,
struct list_head *firing)
{
- int maxfire;
struct signal_struct *const sig = tsk->signal;
- cputime_t utime, ptime, virt_expires, prof_expires;
+ unsigned long long utime, ptime, virt_expires, prof_expires;
unsigned long long sum_sched_runtime, sched_expires;
struct list_head *timers = sig->cpu_timers;
struct task_cputime cputime;
@@ -1043,52 +966,13 @@ static void check_process_timers(struct task_struct *tsk,
* Collect the current process totals.
*/
thread_group_cputimer(tsk, &cputime);
- utime = cputime.utime;
- ptime = utime + cputime.stime;
+ utime = cputime_to_expires(cputime.utime);
+ ptime = utime + cputime_to_expires(cputime.stime);
sum_sched_runtime = cputime.sum_exec_runtime;
- maxfire = 20;
- prof_expires = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || ptime < tl->expires.cpu) {
- prof_expires = tl->expires.cpu;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
- ++timers;
- maxfire = 20;
- virt_expires = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || utime < tl->expires.cpu) {
- virt_expires = tl->expires.cpu;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
-
- ++timers;
- maxfire = 20;
- sched_expires = 0;
- while (!list_empty(timers)) {
- struct cpu_timer_list *tl = list_first_entry(timers,
- struct cpu_timer_list,
- entry);
- if (!--maxfire || sum_sched_runtime < tl->expires.sched) {
- sched_expires = tl->expires.sched;
- break;
- }
- tl->firing = 1;
- list_move_tail(&tl->entry, firing);
- }
+ prof_expires = check_timers_list(timers, firing, ptime);
+ virt_expires = check_timers_list(++timers, firing, utime);
+ sched_expires = check_timers_list(++timers, firing, sum_sched_runtime);
/*
* Check for the special case process timers.
@@ -1127,8 +1011,8 @@ static void check_process_timers(struct task_struct *tsk,
}
}
- sig->cputime_expires.prof_exp = prof_expires;
- sig->cputime_expires.virt_exp = virt_expires;
+ sig->cputime_expires.prof_exp = expires_to_cputime(prof_expires);
+ sig->cputime_expires.virt_exp = expires_to_cputime(virt_expires);
sig->cputime_expires.sched_exp = sched_expires;
if (task_cputime_zero(&sig->cputime_expires))
stop_process_timers(sig);
@@ -1141,7 +1025,7 @@ static void check_process_timers(struct task_struct *tsk,
void posix_cpu_timer_schedule(struct k_itimer *timer)
{
struct task_struct *p = timer->it.cpu.task;
- union cpu_time_count now;
+ unsigned long long now;
if (unlikely(p == NULL))
/*
@@ -1170,7 +1054,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer)
*/
put_task_struct(p);
timer->it.cpu.task = p = NULL;
- timer->it.cpu.expires.sched = 0;
+ timer->it.cpu.expires = 0;
goto out_unlock;
} else if (unlikely(p->exit_state) && thread_group_empty(p)) {
/*
@@ -1345,7 +1229,7 @@ void run_posix_cpu_timers(struct task_struct *tsk)
void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
cputime_t *newval, cputime_t *oldval)
{
- union cpu_time_count now;
+ unsigned long long now;
BUG_ON(clock_idx == CPUCLOCK_SCHED);
cpu_timer_sample_group(clock_idx, tsk, &now);
@@ -1357,17 +1241,17 @@ void set_process_cpu_timer(struct task_struct *tsk, unsigned int clock_idx,
* it to be absolute.
*/
if (*oldval) {
- if (*oldval <= now.cpu) {
+ if (*oldval <= now) {
/* Just about to fire. */
*oldval = cputime_one_jiffy;
} else {
- *oldval -= now.cpu;
+ *oldval -= now;
}
}
if (!*newval)
return;
- *newval += now.cpu;
+ *newval += now;
}
/*
@@ -1415,7 +1299,7 @@ static int do_cpu_nanosleep(const clockid_t which_clock, int flags,
}
while (!signal_pending(current)) {
- if (timer.it.cpu.expires.sched == 0) {
+ if (timer.it.cpu.expires == 0) {
/*
* Our timer fired and was reset, below
* deletion can not fail.
diff --git a/kernel/printk.c b/kernel/printk.c
index 4d36fe337066..26cbed12dede 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -32,6 +32,7 @@
#include <linux/security.h>
#include <linux/bootmem.h>
#include <linux/memblock.h>
+#include <linux/aio.h>
#include <linux/syscalls.h>
#include <linux/kexec.h>
#include <linux/kdb.h>
@@ -49,13 +50,6 @@
#define CREATE_TRACE_POINTS
#include <trace/events/printk.h>
-/*
- * Architectures can override it:
- */
-void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
-{
-}
-
/* printk's without a loglevel use this.. */
#define DEFAULT_MESSAGE_LOGLEVEL CONFIG_DEFAULT_MESSAGE_LOGLEVEL
@@ -63,8 +57,6 @@ void asmlinkage __attribute__((weak)) early_printk(const char *fmt, ...)
#define MINIMUM_CONSOLE_LOGLEVEL 1 /* Minimum loglevel we let people use */
#define DEFAULT_CONSOLE_LOGLEVEL 7 /* anything MORE serious than KERN_DEBUG */
-DECLARE_WAIT_QUEUE_HEAD(log_wait);
-
int console_printk[4] = {
DEFAULT_CONSOLE_LOGLEVEL, /* console_loglevel */
DEFAULT_MESSAGE_LOGLEVEL, /* default_message_loglevel */
@@ -224,6 +216,7 @@ struct log {
static DEFINE_RAW_SPINLOCK(logbuf_lock);
#ifdef CONFIG_PRINTK
+DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* the next printk record to read by syslog(READ) or /proc/kmsg */
static u64 syslog_seq;
static u32 syslog_idx;
@@ -621,6 +614,9 @@ static int devkmsg_open(struct inode *inode, struct file *file)
struct devkmsg_user *user;
int err;
+ if (dmesg_restrict && !capable(CAP_SYSLOG))
+ return -EACCES;
+
/* write-only does not need any file context */
if ((file->f_flags & O_ACCMODE) == O_WRONLY)
return 0;
@@ -1266,7 +1262,7 @@ static void call_console_drivers(int level, const char *text, size_t len)
{
struct console *con;
- trace_console(text, 0, len, len);
+ trace_console(text, len);
if (!console_drivers)
return;
@@ -1725,6 +1721,29 @@ static size_t cont_print_text(char *text, size_t size) { return 0; }
#endif /* CONFIG_PRINTK */
+#ifdef CONFIG_EARLY_PRINTK
+struct console *early_console;
+
+void early_vprintk(const char *fmt, va_list ap)
+{
+ if (early_console) {
+ char buf[512];
+ int n = vscnprintf(buf, sizeof(buf), fmt, ap);
+
+ early_console->write(early_console, buf, n);
+ }
+}
+
+asmlinkage void early_printk(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ early_vprintk(fmt, ap);
+ va_end(ap);
+}
+#endif
+
static int __add_preferred_console(char *name, int idx, char *options,
char *brl_options)
{
@@ -1958,45 +1977,6 @@ int is_console_locked(void)
return console_locked;
}
-/*
- * Delayed printk version, for scheduler-internal messages:
- */
-#define PRINTK_BUF_SIZE 512
-
-#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_SCHED 0x02
-
-static DEFINE_PER_CPU(int, printk_pending);
-static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
-
-static void wake_up_klogd_work_func(struct irq_work *irq_work)
-{
- int pending = __this_cpu_xchg(printk_pending, 0);
-
- if (pending & PRINTK_PENDING_SCHED) {
- char *buf = __get_cpu_var(printk_sched_buf);
- printk(KERN_WARNING "[sched_delayed] %s", buf);
- }
-
- if (pending & PRINTK_PENDING_WAKEUP)
- wake_up_interruptible(&log_wait);
-}
-
-static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
- .func = wake_up_klogd_work_func,
- .flags = IRQ_WORK_LAZY,
-};
-
-void wake_up_klogd(void)
-{
- preempt_disable();
- if (waitqueue_active(&log_wait)) {
- this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
- irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
- }
- preempt_enable();
-}
-
static void console_cont_flush(char *text, size_t size)
{
unsigned long flags;
@@ -2459,6 +2439,44 @@ static int __init printk_late_init(void)
late_initcall(printk_late_init);
#if defined CONFIG_PRINTK
+/*
+ * Delayed printk version, for scheduler-internal messages:
+ */
+#define PRINTK_BUF_SIZE 512
+
+#define PRINTK_PENDING_WAKEUP 0x01
+#define PRINTK_PENDING_SCHED 0x02
+
+static DEFINE_PER_CPU(int, printk_pending);
+static DEFINE_PER_CPU(char [PRINTK_BUF_SIZE], printk_sched_buf);
+
+static void wake_up_klogd_work_func(struct irq_work *irq_work)
+{
+ int pending = __this_cpu_xchg(printk_pending, 0);
+
+ if (pending & PRINTK_PENDING_SCHED) {
+ char *buf = __get_cpu_var(printk_sched_buf);
+ printk(KERN_WARNING "[sched_delayed] %s", buf);
+ }
+
+ if (pending & PRINTK_PENDING_WAKEUP)
+ wake_up_interruptible(&log_wait);
+}
+
+static DEFINE_PER_CPU(struct irq_work, wake_up_klogd_work) = {
+ .func = wake_up_klogd_work_func,
+ .flags = IRQ_WORK_LAZY,
+};
+
+void wake_up_klogd(void)
+{
+ preempt_disable();
+ if (waitqueue_active(&log_wait)) {
+ this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP);
+ irq_work_queue(&__get_cpu_var(wake_up_klogd_work));
+ }
+ preempt_enable();
+}
int printk_sched(const char *fmt, ...)
{
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index acbd28424d81..aed981a3f69c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -17,6 +17,7 @@
#include <linux/ptrace.h>
#include <linux/security.h>
#include <linux/signal.h>
+#include <linux/uio.h>
#include <linux/audit.h>
#include <linux/pid_namespace.h>
#include <linux/syscalls.h>
@@ -24,6 +25,7 @@
#include <linux/regset.h>
#include <linux/hw_breakpoint.h>
#include <linux/cn_proc.h>
+#include <linux/compat.h>
static int ptrace_trapping_sleep_fn(void *flags)
@@ -618,6 +620,81 @@ static int ptrace_setsiginfo(struct task_struct *child, const siginfo_t *info)
return error;
}
+static int ptrace_peek_siginfo(struct task_struct *child,
+ unsigned long addr,
+ unsigned long data)
+{
+ struct ptrace_peeksiginfo_args arg;
+ struct sigpending *pending;
+ struct sigqueue *q;
+ int ret, i;
+
+ ret = copy_from_user(&arg, (void __user *) addr,
+ sizeof(struct ptrace_peeksiginfo_args));
+ if (ret)
+ return -EFAULT;
+
+ if (arg.flags & ~PTRACE_PEEKSIGINFO_SHARED)
+ return -EINVAL; /* unknown flags */
+
+ if (arg.nr < 0)
+ return -EINVAL;
+
+ if (arg.flags & PTRACE_PEEKSIGINFO_SHARED)
+ pending = &child->signal->shared_pending;
+ else
+ pending = &child->pending;
+
+ for (i = 0; i < arg.nr; ) {
+ siginfo_t info;
+ s32 off = arg.off + i;
+
+ spin_lock_irq(&child->sighand->siglock);
+ list_for_each_entry(q, &pending->list, list) {
+ if (!off--) {
+ copy_siginfo(&info, &q->info);
+ break;
+ }
+ }
+ spin_unlock_irq(&child->sighand->siglock);
+
+ if (off >= 0) /* beyond the end of the list */
+ break;
+
+#ifdef CONFIG_COMPAT
+ if (unlikely(is_compat_task())) {
+ compat_siginfo_t __user *uinfo = compat_ptr(data);
+
+ ret = copy_siginfo_to_user32(uinfo, &info);
+ ret |= __put_user(info.si_code, &uinfo->si_code);
+ } else
+#endif
+ {
+ siginfo_t __user *uinfo = (siginfo_t __user *) data;
+
+ ret = copy_siginfo_to_user(uinfo, &info);
+ ret |= __put_user(info.si_code, &uinfo->si_code);
+ }
+
+ if (ret) {
+ ret = -EFAULT;
+ break;
+ }
+
+ data += sizeof(siginfo_t);
+ i++;
+
+ if (signal_pending(current))
+ break;
+
+ cond_resched();
+ }
+
+ if (i > 0)
+ return i;
+
+ return ret;
+}
#ifdef PTRACE_SINGLESTEP
#define is_singlestep(request) ((request) == PTRACE_SINGLESTEP)
@@ -748,6 +825,10 @@ int ptrace_request(struct task_struct *child, long request,
ret = put_user(child->ptrace_message, datalp);
break;
+ case PTRACE_PEEKSIGINFO:
+ ret = ptrace_peek_siginfo(child, addr, data);
+ break;
+
case PTRACE_GETSIGINFO:
ret = ptrace_getsiginfo(child, &siginfo);
if (!ret)
diff --git a/kernel/range.c b/kernel/range.c
index 9b8ae2d6ed68..071b0ab455cb 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -97,7 +97,8 @@ void subtract_range(struct range *range, int az, u64 start, u64 end)
range[i].end = range[j].end;
range[i].start = end;
} else {
- printk(KERN_ERR "run of slot in ranges\n");
+ pr_err("%s: run out of slot in ranges\n",
+ __func__);
}
range[j].end = start;
continue;
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 2d5f94c1c7fb..d8534308fd05 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -1441,7 +1441,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
rnp->grphi, rnp->qsmask);
raw_spin_unlock_irq(&rnp->lock);
#ifdef CONFIG_PROVE_RCU_DELAY
- if ((random32() % (rcu_num_nodes * 8)) == 0 &&
+ if ((prandom_u32() % (rcu_num_nodes * 8)) == 0 &&
system_state == SYSTEM_RUNNING)
schedule_timeout_uninterruptible(2);
#endif /* #ifdef CONFIG_PROVE_RCU_DELAY */
diff --git a/kernel/relay.c b/kernel/relay.c
index 01ab081ac53a..4c2959b6c34d 100644
--- a/kernel/relay.c
+++ b/kernel/relay.c
@@ -550,6 +550,9 @@ static int __cpuinit relay_hotcpu_callback(struct notifier_block *nb,
return NOTIFY_OK;
}
+/* Needs a _much_ better name... */
+#define FIX_SIZE(x) ((((x) - 1) & PAGE_MASK) + PAGE_SIZE)
+
/**
* relay_open - create a new relay channel
* @base_filename: base name of files to create, %NULL for buffering only
@@ -1099,8 +1102,7 @@ static size_t relay_file_read_end_pos(struct rchan_buf *buf,
static int subbuf_read_actor(size_t read_start,
struct rchan_buf *buf,
size_t avail,
- read_descriptor_t *desc,
- read_actor_t actor)
+ read_descriptor_t *desc)
{
void *from;
int ret = 0;
@@ -1121,15 +1123,13 @@ static int subbuf_read_actor(size_t read_start,
typedef int (*subbuf_actor_t) (size_t read_start,
struct rchan_buf *buf,
size_t avail,
- read_descriptor_t *desc,
- read_actor_t actor);
+ read_descriptor_t *desc);
/*
* relay_file_read_subbufs - read count bytes, bridging subbuf boundaries
*/
static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
subbuf_actor_t subbuf_actor,
- read_actor_t actor,
read_descriptor_t *desc)
{
struct rchan_buf *buf = filp->private_data;
@@ -1150,7 +1150,7 @@ static ssize_t relay_file_read_subbufs(struct file *filp, loff_t *ppos,
break;
avail = min(desc->count, avail);
- ret = subbuf_actor(read_start, buf, avail, desc, actor);
+ ret = subbuf_actor(read_start, buf, avail, desc);
if (desc->error < 0)
break;
@@ -1174,8 +1174,7 @@ static ssize_t relay_file_read(struct file *filp,
desc.count = count;
desc.arg.buf = buffer;
desc.error = 0;
- return relay_file_read_subbufs(filp, ppos, subbuf_read_actor,
- NULL, &desc);
+ return relay_file_read_subbufs(filp, ppos, subbuf_read_actor, &desc);
}
static void relay_consume_bytes(struct rchan_buf *rbuf, int bytes_consumed)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3bd15a43eebc..9b805790ec55 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1030,6 +1030,7 @@ extern void update_group_power(struct sched_domain *sd, int cpu);
extern void trigger_load_balance(struct rq *rq, int cpu);
extern void idle_balance(int this_cpu, struct rq *this_rq);
+extern void update_group_power(struct sched_domain *sd, int cpu);
#else /* CONFIG_SMP */
diff --git a/kernel/semaphore.c b/kernel/semaphore.c
index 4567fc020fe3..6815171a4fff 100644
--- a/kernel/semaphore.c
+++ b/kernel/semaphore.c
@@ -193,7 +193,7 @@ EXPORT_SYMBOL(up);
struct semaphore_waiter {
struct list_head list;
struct task_struct *task;
- int up;
+ bool up;
};
/*
@@ -209,12 +209,12 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
list_add_tail(&waiter.list, &sem->wait_list);
waiter.task = task;
- waiter.up = 0;
+ waiter.up = false;
for (;;) {
if (signal_pending_state(state, task))
goto interrupted;
- if (timeout <= 0)
+ if (unlikely(timeout <= 0))
goto timed_out;
__set_task_state(task, state);
raw_spin_unlock_irq(&sem->lock);
@@ -258,6 +258,6 @@ static noinline void __sched __up(struct semaphore *sem)
struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
struct semaphore_waiter, list);
list_del(&waiter->list);
- waiter->up = 1;
+ waiter->up = true;
wake_up_process(waiter->task);
}
diff --git a/kernel/signal.c b/kernel/signal.c
index 497330ec2ae9..bb7ca79a97b2 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -855,12 +855,14 @@ static void ptrace_trap_notify(struct task_struct *t)
* Returns true if the signal should be actually delivered, otherwise
* it should be dropped.
*/
-static int prepare_signal(int sig, struct task_struct *p, bool force)
+static bool prepare_signal(int sig, struct task_struct *p, bool force)
{
struct signal_struct *signal = p->signal;
struct task_struct *t;
- if (unlikely(signal->flags & SIGNAL_GROUP_EXIT)) {
+ if (signal->flags & (SIGNAL_GROUP_EXIT | SIGNAL_GROUP_COREDUMP)) {
+ if (signal->flags & SIGNAL_GROUP_COREDUMP)
+ return sig == SIGKILL;
/*
* The process is in the middle of dying, nothing to do.
*/
diff --git a/kernel/smp.c b/kernel/smp.c
index 8e451f3ff51b..31670c8d8f89 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -12,6 +12,7 @@
#include <linux/gfp.h>
#include <linux/smp.h>
#include <linux/cpu.h>
+#include <linux/hardirq.h>
#include "smpboot.h"
@@ -100,16 +101,16 @@ void __init call_function_init(void)
* previous function call. For multi-cpu calls its even more interesting
* as we'll have to ensure no other cpu is observing our csd.
*/
-static void csd_lock_wait(struct call_single_data *data)
+static void csd_lock_wait(struct call_single_data *csd)
{
- while (data->flags & CSD_FLAG_LOCK)
+ while (csd->flags & CSD_FLAG_LOCK)
cpu_relax();
}
-static void csd_lock(struct call_single_data *data)
+static void csd_lock(struct call_single_data *csd)
{
- csd_lock_wait(data);
- data->flags = CSD_FLAG_LOCK;
+ csd_lock_wait(csd);
+ csd->flags = CSD_FLAG_LOCK;
/*
* prevent CPU from reordering the above assignment
@@ -119,16 +120,16 @@ static void csd_lock(struct call_single_data *data)
smp_mb();
}
-static void csd_unlock(struct call_single_data *data)
+static void csd_unlock(struct call_single_data *csd)
{
- WARN_ON(!(data->flags & CSD_FLAG_LOCK));
+ WARN_ON(!(csd->flags & CSD_FLAG_LOCK));
/*
* ensure we're all done before releasing data:
*/
smp_mb();
- data->flags &= ~CSD_FLAG_LOCK;
+ csd->flags &= ~CSD_FLAG_LOCK;
}
/*
@@ -137,7 +138,7 @@ static void csd_unlock(struct call_single_data *data)
* ->func, ->info, and ->flags set.
*/
static
-void generic_exec_single(int cpu, struct call_single_data *data, int wait)
+void generic_exec_single(int cpu, struct call_single_data *csd, int wait)
{
struct call_single_queue *dst = &per_cpu(call_single_queue, cpu);
unsigned long flags;
@@ -145,7 +146,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
raw_spin_lock_irqsave(&dst->lock, flags);
ipi = list_empty(&dst->list);
- list_add_tail(&data->list, &dst->list);
+ list_add_tail(&csd->list, &dst->list);
raw_spin_unlock_irqrestore(&dst->lock, flags);
/*
@@ -163,7 +164,7 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
arch_send_call_function_single_ipi(cpu);
if (wait)
- csd_lock_wait(data);
+ csd_lock_wait(csd);
}
/*
@@ -173,7 +174,6 @@ void generic_exec_single(int cpu, struct call_single_data *data, int wait)
void generic_smp_call_function_single_interrupt(void)
{
struct call_single_queue *q = &__get_cpu_var(call_single_queue);
- unsigned int data_flags;
LIST_HEAD(list);
/*
@@ -186,25 +186,26 @@ void generic_smp_call_function_single_interrupt(void)
raw_spin_unlock(&q->lock);
while (!list_empty(&list)) {
- struct call_single_data *data;
+ struct call_single_data *csd;
+ unsigned int csd_flags;
- data = list_entry(list.next, struct call_single_data, list);
- list_del(&data->list);
+ csd = list_entry(list.next, struct call_single_data, list);
+ list_del(&csd->list);
/*
- * 'data' can be invalid after this call if flags == 0
+ * 'csd' can be invalid after this call if flags == 0
* (when called through generic_exec_single()),
* so save them away before making the call:
*/
- data_flags = data->flags;
+ csd_flags = csd->flags;
- data->func(data->info);
+ csd->func(csd->info);
/*
* Unlocked CSDs are valid through generic_exec_single():
*/
- if (data_flags & CSD_FLAG_LOCK)
- csd_unlock(data);
+ if (csd_flags & CSD_FLAG_LOCK)
+ csd_unlock(csd);
}
}
@@ -240,8 +241,9 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
- WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
- && !oops_in_progress);
+ WARN_ON_ONCE(cpu_online(this_cpu)
+ && (irqs_disabled() || in_serving_irq())
+ && !oops_in_progress);
if (cpu == this_cpu) {
local_irq_save(flags);
@@ -249,16 +251,16 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
local_irq_restore(flags);
} else {
if ((unsigned)cpu < nr_cpu_ids && cpu_online(cpu)) {
- struct call_single_data *data = &d;
+ struct call_single_data *csd = &d;
if (!wait)
- data = &__get_cpu_var(csd_data);
+ csd = &__get_cpu_var(csd_data);
- csd_lock(data);
+ csd_lock(csd);
- data->func = func;
- data->info = info;
- generic_exec_single(cpu, data, wait);
+ csd->func = func;
+ csd->info = info;
+ generic_exec_single(cpu, csd, wait);
} else {
err = -ENXIO; /* CPU not online */
}
@@ -325,7 +327,7 @@ EXPORT_SYMBOL_GPL(smp_call_function_any);
* pre-allocated data structure. Useful for embedding @data inside
* other structures, for instance.
*/
-void __smp_call_function_single(int cpu, struct call_single_data *data,
+void __smp_call_function_single(int cpu, struct call_single_data *csd,
int wait)
{
unsigned int this_cpu;
@@ -343,11 +345,11 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
if (cpu == this_cpu) {
local_irq_save(flags);
- data->func(data->info);
+ csd->func(csd->info);
local_irq_restore(flags);
} else {
- csd_lock(data);
- generic_exec_single(cpu, data, wait);
+ csd_lock(csd);
+ generic_exec_single(cpu, csd, wait);
}
put_cpu();
}
@@ -369,7 +371,7 @@ void __smp_call_function_single(int cpu, struct call_single_data *data,
void smp_call_function_many(const struct cpumask *mask,
smp_call_func_t func, void *info, bool wait)
{
- struct call_function_data *data;
+ struct call_function_data *cfd;
int cpu, next_cpu, this_cpu = smp_processor_id();
/*
@@ -378,8 +380,9 @@ void smp_call_function_many(const struct cpumask *mask,
* send smp call function interrupt to this cpu and as such deadlocks
* can't happen.
*/
- WARN_ON_ONCE(cpu_online(this_cpu) && irqs_disabled()
- && !oops_in_progress && !early_boot_irqs_disabled);
+ WARN_ON_ONCE(cpu_online(this_cpu)
+ && (irqs_disabled() || in_serving_irq())
+ && !oops_in_progress && !early_boot_irqs_disabled);
/* Try to fastpath. So, what's a CPU they want? Ignoring this one. */
cpu = cpumask_first_and(mask, cpu_online_mask);
@@ -401,24 +404,24 @@ void smp_call_function_many(const struct cpumask *mask,
return;
}
- data = &__get_cpu_var(cfd_data);
+ cfd = &__get_cpu_var(cfd_data);
- cpumask_and(data->cpumask, mask, cpu_online_mask);
- cpumask_clear_cpu(this_cpu, data->cpumask);
+ cpumask_and(cfd->cpumask, mask, cpu_online_mask);
+ cpumask_clear_cpu(this_cpu, cfd->cpumask);
/* Some callers race with other cpus changing the passed mask */
- if (unlikely(!cpumask_weight(data->cpumask)))
+ if (unlikely(!cpumask_weight(cfd->cpumask)))
return;
/*
- * After we put an entry into the list, data->cpumask
- * may be cleared again when another CPU sends another IPI for
- * a SMP function call, so data->cpumask will be zero.
+ * After we put an entry into the list, cfd->cpumask may be cleared
+ * again when another CPU sends another IPI for a SMP function call, so
+ * cfd->cpumask will be zero.
*/
- cpumask_copy(data->cpumask_ipi, data->cpumask);
+ cpumask_copy(cfd->cpumask_ipi, cfd->cpumask);
- for_each_cpu(cpu, data->cpumask) {
- struct call_single_data *csd = per_cpu_ptr(data->csd, cpu);
+ for_each_cpu(cpu, cfd->cpumask) {
+ struct call_single_data *csd = per_cpu_ptr(cfd->csd, cpu);
struct call_single_queue *dst =
&per_cpu(call_single_queue, cpu);
unsigned long flags;
@@ -433,12 +436,13 @@ void smp_call_function_many(const struct cpumask *mask,
}
/* Send a message to all CPUs in the map */
- arch_send_call_function_ipi_mask(data->cpumask_ipi);
+ arch_send_call_function_ipi_mask(cfd->cpumask_ipi);
if (wait) {
- for_each_cpu(cpu, data->cpumask) {
- struct call_single_data *csd =
- per_cpu_ptr(data->csd, cpu);
+ for_each_cpu(cpu, cfd->cpumask) {
+ struct call_single_data *csd;
+
+ csd = per_cpu_ptr(cfd->csd, cpu);
csd_lock_wait(csd);
}
}
diff --git a/kernel/sys.c b/kernel/sys.c
index fd2b5259ad7a..ef29bebebad4 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -1804,7 +1804,6 @@ SYSCALL_DEFINE1(umask, int, mask)
return mask;
}
-#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
{
struct fd exe;
@@ -1998,17 +1997,12 @@ out:
return error;
}
+#ifdef CONFIG_CHECKPOINT_RESTORE
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return put_user(me->clear_child_tid, tid_addr);
}
-
-#else /* CONFIG_CHECKPOINT_RESTORE */
-static int prctl_set_mm(int opt, unsigned long addr,
- unsigned long arg4, unsigned long arg5)
-{
- return -EINVAL;
-}
+#else
static int prctl_get_tid_address(struct task_struct *me, int __user **tid_addr)
{
return -EINVAL;
@@ -2199,9 +2193,8 @@ SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";
-static int __orderly_poweroff(void)
+static int __orderly_poweroff(bool force)
{
- int argc;
char **argv;
static char *envp[] = {
"HOME=/",
@@ -2210,20 +2203,40 @@ static int __orderly_poweroff(void)
};
int ret;
- argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
- if (argv == NULL) {
+ argv = argv_split(GFP_KERNEL, poweroff_cmd, NULL);
+ if (argv) {
+ ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+ argv_free(argv);
+ } else {
printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
- __func__, poweroff_cmd);
- return -ENOMEM;
+ __func__, poweroff_cmd);
+ ret = -ENOMEM;
}
- ret = call_usermodehelper_fns(argv[0], argv, envp, UMH_WAIT_EXEC,
- NULL, NULL, NULL);
- argv_free(argv);
+ if (ret && force) {
+ printk(KERN_WARNING "Failed to start orderly shutdown: "
+ "forcing the issue\n");
+ /*
+ * I guess this should try to kick off some daemon to sync and
+ * poweroff asap. Or not even bother syncing if we're doing an
+ * emergency shutdown?
+ */
+ emergency_sync();
+ kernel_power_off();
+ }
return ret;
}
+static bool poweroff_force;
+
+static void poweroff_work_func(struct work_struct *work)
+{
+ __orderly_poweroff(poweroff_force);
+}
+
+static DECLARE_WORK(poweroff_work, poweroff_work_func);
+
/**
* orderly_poweroff - Trigger an orderly system poweroff
* @force: force poweroff if command execution fails
@@ -2233,21 +2246,9 @@ static int __orderly_poweroff(void)
*/
int orderly_poweroff(bool force)
{
- int ret = __orderly_poweroff();
-
- if (ret && force) {
- printk(KERN_WARNING "Failed to start orderly shutdown: "
- "forcing the issue\n");
-
- /*
- * I guess this should try to kick off some daemon to sync and
- * poweroff asap. Or not even bother syncing if we're doing an
- * emergency shutdown?
- */
- emergency_sync();
- kernel_power_off();
- }
-
- return ret;
+ if (force) /* do not override the pending "true" */
+ poweroff_force = true;
+ schedule_work(&poweroff_work);
+ return 0;
}
EXPORT_SYMBOL_GPL(orderly_poweroff);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index afc1dc60f3f8..3dadde52253c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -106,7 +106,6 @@ extern unsigned int core_pipe_limit;
#endif
extern int pid_max;
extern int pid_max_min, pid_max_max;
-extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
extern int compat_log;
extern int latencytop_enabled;
diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
index f8b11a283171..12d6ebbfdd83 100644
--- a/kernel/test_kprobes.c
+++ b/kernel/test_kprobes.c
@@ -365,7 +365,7 @@ int init_test_probes(void)
target2 = kprobe_target2;
do {
- rand1 = random32();
+ rand1 = prandom_u32();
} while (rand1 <= div_factor);
printk(KERN_INFO "Kprobe smoke test started\n");
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index af5a7e9f164b..7ccf16f0bcbc 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -133,7 +133,6 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu);
int i;
- SEQ_printf(m, "\n");
SEQ_printf(m, "cpu: %d\n", cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
SEQ_printf(m, " clock %d:\n", i);
@@ -187,6 +186,7 @@ static void print_cpu(struct seq_file *m, int cpu, u64 now)
#undef P
#undef P_ns
+ SEQ_printf(m, "\n");
}
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -195,7 +195,6 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
{
struct clock_event_device *dev = td->evtdev;
- SEQ_printf(m, "\n");
SEQ_printf(m, "Tick Device: mode: %d\n", td->mode);
if (cpu < 0)
SEQ_printf(m, "Broadcast device\n");
@@ -230,12 +229,11 @@ print_tickdevice(struct seq_file *m, struct tick_device *td, int cpu)
print_name_offset(m, dev->event_handler);
SEQ_printf(m, "\n");
SEQ_printf(m, " retries: %lu\n", dev->retries);
+ SEQ_printf(m, "\n");
}
-static void timer_list_show_tickdevices(struct seq_file *m)
+static void timer_list_show_tickdevices_header(struct seq_file *m)
{
- int cpu;
-
#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
print_tickdevice(m, tick_get_broadcast_device(), -1);
SEQ_printf(m, "tick_broadcast_mask: %08lx\n",
@@ -246,12 +244,7 @@ static void timer_list_show_tickdevices(struct seq_file *m)
#endif
SEQ_printf(m, "\n");
#endif
- for_each_online_cpu(cpu)
- print_tickdevice(m, tick_get_device(cpu), cpu);
- SEQ_printf(m, "\n");
}
-#else
-static void timer_list_show_tickdevices(struct seq_file *m) { }
#endif
static int timer_list_show(struct seq_file *m, void *v)
@@ -259,34 +252,113 @@ static int timer_list_show(struct seq_file *m, void *v)
u64 now = ktime_to_ns(ktime_get());
int cpu;
- SEQ_printf(m, "Timer List Version: v0.7\n");
- SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n", HRTIMER_MAX_CLOCK_BASES);
- SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
-
- for_each_online_cpu(cpu)
+ if (v == (void *)1) {
+ SEQ_printf(m, "Timer List Version: v0.7\n");
+ SEQ_printf(m, "HRTIMER_MAX_CLOCK_BASES: %d\n",
+ HRTIMER_MAX_CLOCK_BASES);
+ SEQ_printf(m, "now at %Ld nsecs\n", (unsigned long long)now);
+ SEQ_printf(m, "\n");
+ } else if (v < (void *)(unsigned long)(nr_cpu_ids + 2)) {
+ cpu = (unsigned long)(v - 2);
print_cpu(m, cpu, now);
+ }
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ else if (v == (void *)(unsigned long)nr_cpu_ids + 2) {
+ timer_list_show_tickdevices_header(m);
+ } else {
+ cpu = (unsigned long)(v - 3 - nr_cpu_ids);
+ print_tickdevice(m, tick_get_device(cpu), cpu);
+ }
+#endif
+ return 0;
+}
- SEQ_printf(m, "\n");
- timer_list_show_tickdevices(m);
+/*
+ * This iterator really needs some explanation since it is offset and has
+ * two passes, one of which is controlled by a config option.
+ * In hotpluggable systems some cpus, including cpu 0 and the last cpu, may
+ * be missing so we have to use cpumask_* to iterate over the cpus.
+ * For the first pass:
+ * It returns 1 for the header position.
+ * For cpu 0 it returns 2 and the final possible cpu would be nr_cpu_ids + 1.
+ * On the second pass:
+ * It returns nr_cpu_ids + 1 for the second header position.
+ * For cpu 0 it returns nr_cpu_ids + 2
+ * The final possible cpu would be nr_cpu_ids + nr_cpu_ids + 2.
+ * It is also important to remember that cpumask_next returns >= nr_cpu_ids if
+ * no further cpus set.
+ */
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+ unsigned long n = *offset;
- return 0;
+ if (n == 0)
+ return (void *) 1;
+
+ if (n < nr_cpu_ids + 1) {
+ n = cpumask_next(n - 2, cpu_online_mask);
+ if (n >= nr_cpu_ids)
+ n = nr_cpu_ids;
+ *offset = n + 1;
+ return (void *)(unsigned long)(n + 2);
+ }
+
+#ifdef CONFIG_GENERIC_CLOCKEVENTS
+ if (n == nr_cpu_ids + 1)
+ return (void *)(unsigned long)(nr_cpu_ids + 2);
+
+ if (n < nr_cpu_ids * 2 + 2) {
+ n -= (nr_cpu_ids + 2);
+ n = cpumask_next(n - 1, cpu_online_mask);
+ if (n >= nr_cpu_ids)
+ return NULL;
+ *offset = n + 2 + nr_cpu_ids;
+ return (void *)(unsigned long)(n + 3 + nr_cpu_ids);
+ }
+#endif
+
+ return NULL;
+}
+
+static void *timer_list_next(struct seq_file *file, void *data, loff_t *offset)
+{
+ (*offset)++;
+ return timer_list_start(file, offset);
}
+static void timer_list_stop(struct seq_file *file, void *data)
+{
+}
+
+static const struct seq_operations timer_list_sops = {
+ .start = timer_list_start,
+ .next = timer_list_next,
+ .stop = timer_list_stop,
+ .show = timer_list_show,
+};
+
void sysrq_timer_list_show(void)
{
timer_list_show(NULL, NULL);
}
+static int timer_list_release(struct inode *inode, struct file *filep)
+{
+ seq_release(inode, filep);
+
+ return 0;
+}
+
static int timer_list_open(struct inode *inode, struct file *filp)
{
- return single_open(filp, timer_list_show, NULL);
+ return seq_open(filp, &timer_list_sops);
}
static const struct file_operations timer_list_fops = {
.open = timer_list_open,
.read = seq_read,
.llseek = seq_lseek,
- .release = single_release,
+ .release = timer_list_release,
};
static int __init init_timer_list_procfs(void)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 4a944676358e..ea741c32d596 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -239,10 +239,12 @@ static void watchdog_overflow_callback(struct perf_event *event,
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- if (hardlockup_panic)
+ if (hardlockup_panic) {
+ trigger_all_cpu_backtrace();
panic("Watchdog detected hard LOCKUP on cpu %d", this_cpu);
- else
+ } else {
WARN(1, "Watchdog detected hard LOCKUP on cpu %d", this_cpu);
+ }
__this_cpu_write(hard_watchdog_warn, true);
return;
@@ -323,8 +325,10 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
else
dump_stack();
- if (softlockup_panic)
+ if (softlockup_panic) {
+ trigger_all_cpu_backtrace();
panic("softlockup: hung tasks");
+ }
__this_cpu_write(soft_watchdog_warn, true);
} else
__this_cpu_write(soft_watchdog_warn, false);
@@ -517,6 +521,11 @@ int proc_dowatchdog(struct ctl_table *table, int write,
return ret;
set_sample_period();
+ /*
+ * Watchdog threads shouldn't be enabled if they are
+ * disabled. The 'watchdog_disabled' variable check in
+ * watchdog_*_all_cpus() function takes care of this.
+ */
if (watchdog_enabled && watchdog_thresh)
watchdog_enable_all_cpus();
else
diff --git a/lib/Kconfig b/lib/Kconfig
index 3958dc4389f9..7bfdbbcb6abd 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -189,6 +189,15 @@ config LZO_COMPRESS
config LZO_DECOMPRESS
tristate
+config LZ4_COMPRESS
+ tristate
+
+config LZ4HC_COMPRESS
+ tristate
+
+config LZ4_DECOMPRESS
+ tristate
+
source "lib/xz/Kconfig"
#
@@ -213,6 +222,10 @@ config DECOMPRESS_LZO
select LZO_DECOMPRESS
tristate
+config DECOMPRESS_LZ4
+ select LZ4_DECOMPRESS
+ tristate
+
#
# Generic allocator support is selected if needed
#
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 28be08c09bab..ae805189e8d6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1292,6 +1292,24 @@ config LATENCYTOP
Enable this option if you want to use the LatencyTOP tool
to find out which userspace is blocking on what kernel operations.
+config ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ bool
+
+config DEBUG_STRICT_USER_COPY_CHECKS
+ bool "Strict user copy size checks"
+ depends on ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS
+ depends on DEBUG_KERNEL && !TRACE_BRANCH_PROFILING
+ help
+ Enabling this option turns a certain set of sanity checks for user
+ copy operations into compile time failures.
+
+ The copy_from_user() etc checks are there to help test if there
+ are sufficient security checks on the length argument of
+ the copy operation, by having gcc prove that the argument is
+ within bounds.
+
+ If unsure, say N.
+
source mm/Kconfig.debug
source kernel/trace/Kconfig
diff --git a/lib/Makefile b/lib/Makefile
index d7946ff75b2e..10facc6ed156 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,8 +13,9 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
is_single_threaded.o plist.o decompress.o kobject_uevent.o \
- earlycpio.o
+ earlycpio.o percpu-refcount.o
+obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
lib-$(CONFIG_MMU) += ioremap.o
lib-$(CONFIG_SMP) += cpumask.o
@@ -72,6 +73,9 @@ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/
obj-$(CONFIG_BCH) += bch.o
obj-$(CONFIG_LZO_COMPRESS) += lzo/
obj-$(CONFIG_LZO_DECOMPRESS) += lzo/
+obj-$(CONFIG_LZ4_COMPRESS) += lz4/
+obj-$(CONFIG_LZ4HC_COMPRESS) += lz4/
+obj-$(CONFIG_LZ4_DECOMPRESS) += lz4/
obj-$(CONFIG_XZ_DEC) += xz/
obj-$(CONFIG_RAID6_PQ) += raid6/
@@ -80,6 +84,7 @@ lib-$(CONFIG_DECOMPRESS_BZIP2) += decompress_bunzip2.o
lib-$(CONFIG_DECOMPRESS_LZMA) += decompress_unlzma.o
lib-$(CONFIG_DECOMPRESS_XZ) += decompress_unxz.o
lib-$(CONFIG_DECOMPRESS_LZO) += decompress_unlzo.o
+lib-$(CONFIG_DECOMPRESS_LZ4) += decompress_unlz4.o
obj-$(CONFIG_TEXTSEARCH) += textsearch.o
obj-$(CONFIG_TEXTSEARCH_KMP) += ts_kmp.o
diff --git a/lib/argv_split.c b/lib/argv_split.c
index 1e9a6cbc3689..e927ed0e18a8 100644
--- a/lib/argv_split.c
+++ b/lib/argv_split.c
@@ -8,23 +8,17 @@
#include <linux/slab.h>
#include <linux/export.h>
-static const char *skip_arg(const char *cp)
-{
- while (*cp && !isspace(*cp))
- cp++;
-
- return cp;
-}
-
static int count_argc(const char *str)
{
int count = 0;
+ bool was_space;
- while (*str) {
- str = skip_spaces(str);
- if (*str) {
+ for (was_space = true; *str; str++) {
+ if (isspace(*str)) {
+ was_space = true;
+ } else if (was_space) {
+ was_space = false;
count++;
- str = skip_arg(str);
}
}
@@ -39,10 +33,8 @@ static int count_argc(const char *str)
*/
void argv_free(char **argv)
{
- char **p;
- for (p = argv; *p; p++)
- kfree(*p);
-
+ argv--;
+ kfree(argv[0]);
kfree(argv);
}
EXPORT_SYMBOL(argv_free);
@@ -59,43 +51,44 @@ EXPORT_SYMBOL(argv_free);
* considered to be a single argument separator. The returned array
* is always NULL-terminated. Returns NULL on memory allocation
* failure.
+ *
+ * The source string at `str' may be undergoing concurrent alteration via
+ * userspace sysctl activity (at least). The argv_split() implementation
+ * attempts to handle this gracefully by taking a local copy to work on.
*/
char **argv_split(gfp_t gfp, const char *str, int *argcp)
{
- int argc = count_argc(str);
- char **argv = kzalloc(sizeof(*argv) * (argc+1), gfp);
- char **argvp;
-
- if (argv == NULL)
- goto out;
-
- if (argcp)
- *argcp = argc;
-
- argvp = argv;
-
- while (*str) {
- str = skip_spaces(str);
-
- if (*str) {
- const char *p = str;
- char *t;
-
- str = skip_arg(str);
+ char *argv_str;
+ bool was_space;
+ char **argv, **argv_ret;
+ int argc;
+
+ argv_str = kstrndup(str, KMALLOC_MAX_SIZE - 1, gfp);
+ if (!argv_str)
+ return NULL;
+
+ argc = count_argc(argv_str);
+ argv = kmalloc(sizeof(*argv) * (argc + 2), gfp);
+ if (!argv) {
+ kfree(argv_str);
+ return NULL;
+ }
- t = kstrndup(p, str-p, gfp);
- if (t == NULL)
- goto fail;
- *argvp++ = t;
+ *argv = argv_str;
+ argv_ret = ++argv;
+ for (was_space = true; *argv_str; argv_str++) {
+ if (isspace(*argv_str)) {
+ was_space = true;
+ *argv_str = 0;
+ } else if (was_space) {
+ was_space = false;
+ *argv++ = argv_str;
}
}
- *argvp = NULL;
-
- out:
- return argv;
+ *argv = NULL;
- fail:
- argv_free(argv);
- return NULL;
+ if (argcp)
+ *argcp = argc;
+ return argv_ret;
}
EXPORT_SYMBOL(argv_split);
diff --git a/lib/bust_spinlocks.c b/lib/bust_spinlocks.c
index 9681d54b95d1..f8e0e5367398 100644
--- a/lib/bust_spinlocks.c
+++ b/lib/bust_spinlocks.c
@@ -8,6 +8,7 @@
*/
#include <linux/kernel.h>
+#include <linux/printk.h>
#include <linux/spinlock.h>
#include <linux/tty.h>
#include <linux/wait.h>
@@ -28,5 +29,3 @@ void __attribute__((weak)) bust_spinlocks(int yes)
wake_up_klogd();
}
}
-
-
diff --git a/lib/decompress.c b/lib/decompress.c
index 31a804277282..c70810ea8590 100644
--- a/lib/decompress.c
+++ b/lib/decompress.c
@@ -11,6 +11,7 @@
#include <linux/decompress/unxz.h>
#include <linux/decompress/inflate.h>
#include <linux/decompress/unlzo.h>
+#include <linux/decompress/unlz4.h>
#include <linux/types.h>
#include <linux/string.h>
@@ -31,6 +32,9 @@
#ifndef CONFIG_DECOMPRESS_LZO
# define unlzo NULL
#endif
+#ifndef CONFIG_DECOMPRESS_LZ4
+# define unlz4 NULL
+#endif
struct compress_format {
unsigned char magic[2];
@@ -45,6 +49,7 @@ static const struct compress_format compressed_formats[] __initdata = {
{ {0x5d, 0x00}, "lzma", unlzma },
{ {0xfd, 0x37}, "xz", unxz },
{ {0x89, 0x4c}, "lzo", unlzo },
+ { {0x02, 0x21}, "lz4", unlz4 },
{ {0, 0}, NULL, NULL }
};
diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c
new file mode 100644
index 000000000000..3e67cfad16ad
--- /dev/null
+++ b/lib/decompress_unlz4.c
@@ -0,0 +1,187 @@
+/*
+ * Wrapper for decompressing LZ4-compressed kernel, initramfs, and initrd
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifdef STATIC
+#define PREBOOT
+#include "lz4/lz4_decompress.c"
+#else
+#include <linux/decompress/unlz4.h>
+#endif
+#include <linux/types.h>
+#include <linux/lz4.h>
+#include <linux/decompress/mm.h>
+#include <linux/compiler.h>
+
+#include <asm/unaligned.h>
+
+/*
+ * Note: Uncompressed chunk size is used in the compressor side
+ * (userspace side for compression).
+ * It is hardcoded because there is not proper way to extract it
+ * from the binary stream which is generated by the preliminary
+ * version of LZ4 tool so far.
+ */
+#define LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE (8 << 20)
+#define ARCHIVE_MAGICNUMBER 0x184C2102
+
+STATIC inline int INIT unlz4(u8 *input, int in_len,
+ int (*fill) (void *, unsigned int),
+ int (*flush) (void *, unsigned int),
+ u8 *output, int *posp,
+ void (*error) (char *x))
+{
+ int ret = -1;
+ size_t chunksize = 0;
+ size_t uncomp_chunksize = LZ4_DEFAULT_UNCOMPRESSED_CHUNK_SIZE;
+ u8 *inp;
+ u8 *inp_start;
+ u8 *outp;
+ int size = in_len;
+#ifdef PREBOOT
+ size_t out_len = get_unaligned_le32(input + in_len);
+#endif
+ size_t dest_len;
+
+
+ if (output) {
+ outp = output;
+ } else if (!flush) {
+ error("NULL output pointer and no flush function provided");
+ goto exit_0;
+ } else {
+ outp = large_malloc(uncomp_chunksize);
+ if (!outp) {
+ error("Could not allocate output buffer");
+ goto exit_0;
+ }
+ }
+
+ if (input && fill) {
+ error("Both input pointer and fill function provided,");
+ goto exit_1;
+ } else if (input) {
+ inp = input;
+ } else if (!fill) {
+ error("NULL input pointer and missing fill function");
+ goto exit_1;
+ } else {
+ inp = large_malloc(lz4_compressbound(uncomp_chunksize));
+ if (!inp) {
+ error("Could not allocate input buffer");
+ goto exit_1;
+ }
+ }
+ inp_start = inp;
+
+ if (posp)
+ *posp = 0;
+
+ if (fill)
+ fill(inp, 4);
+
+ chunksize = get_unaligned_le32(inp);
+ if (chunksize == ARCHIVE_MAGICNUMBER) {
+ inp += 4;
+ size -= 4;
+ } else {
+ error("invalid header");
+ goto exit_2;
+ }
+
+ if (posp)
+ *posp += 4;
+
+ for (;;) {
+
+ if (fill)
+ fill(inp, 4);
+
+ chunksize = get_unaligned_le32(inp);
+ if (chunksize == ARCHIVE_MAGICNUMBER) {
+ inp += 4;
+ size -= 4;
+ if (posp)
+ *posp += 4;
+ continue;
+ }
+ inp += 4;
+ size -= 4;
+
+ if (posp)
+ *posp += 4;
+
+ if (fill) {
+ if (chunksize > lz4_compressbound(uncomp_chunksize)) {
+ error("chunk length is longer than allocated");
+ goto exit_2;
+ }
+ fill(inp, chunksize);
+ }
+#ifdef PREBOOT
+ if (out_len >= uncomp_chunksize) {
+ dest_len = uncomp_chunksize;
+ out_len -= dest_len;
+ } else
+ dest_len = out_len;
+ ret = lz4_decompress(inp, &chunksize, outp, dest_len);
+#else
+ dest_len = uncomp_chunksize;
+ ret = lz4_decompress_unknownoutputsize(inp, chunksize, outp,
+ &dest_len);
+#endif
+ if (ret < 0) {
+ error("Decoding failed");
+ goto exit_2;
+ }
+
+ if (flush && flush(outp, dest_len) != dest_len)
+ goto exit_2;
+ if (output)
+ outp += dest_len;
+ if (posp)
+ *posp += chunksize;
+
+ size -= chunksize;
+
+ if (size == 0)
+ break;
+ else if (size < 0) {
+ error("data corrupted");
+ goto exit_2;
+ }
+
+ inp += chunksize;
+ if (fill)
+ inp = inp_start;
+ }
+
+ ret = 0;
+exit_2:
+ if (!input)
+ large_free(inp_start);
+exit_1:
+ if (!output)
+ large_free(outp);
+exit_0:
+ return ret;
+}
+
+#ifdef PREBOOT
+STATIC int INIT decompress(unsigned char *buf, int in_len,
+ int(*fill)(void*, unsigned int),
+ int(*flush)(void*, unsigned int),
+ unsigned char *output,
+ int *posp,
+ void(*error)(char *x)
+ )
+{
+ return unlz4(buf, in_len - 4, fill, flush, output, posp, error);
+}
+#endif
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 5e396accd3d0..d87a17a819d0 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -862,17 +862,21 @@ static void check_unmap(struct dma_debug_entry *ref)
entry = bucket_find_exact(bucket, ref);
if (!entry) {
+ /* must drop lock before calling dma_mapping_error */
+ put_hash_bucket(bucket, &flags);
+
if (dma_mapping_error(ref->dev, ref->dev_addr)) {
err_printk(ref->dev, NULL,
- "DMA-API: device driver tries "
- "to free an invalid DMA memory address\n");
- return;
+ "DMA-API: device driver tries to free an "
+ "invalid DMA memory address\n");
+ } else {
+ err_printk(ref->dev, NULL,
+ "DMA-API: device driver tries to free DMA "
+ "memory it has not allocated [device "
+ "address=0x%016llx] [size=%llu bytes]\n",
+ ref->dev_addr, ref->size);
}
- err_printk(ref->dev, NULL, "DMA-API: device driver tries "
- "to free DMA memory it has not allocated "
- "[device address=0x%016llx] [size=%llu bytes]\n",
- ref->dev_addr, ref->size);
- goto out;
+ return;
}
if (ref->size != entry->size) {
@@ -936,7 +940,6 @@ static void check_unmap(struct dma_debug_entry *ref)
hash_bucket_del(entry);
dma_entry_free(entry);
-out:
put_hash_bucket(bucket, &flags);
}
@@ -1082,13 +1085,27 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
ref.dev = dev;
ref.dev_addr = dma_addr;
bucket = get_hash_bucket(&ref, &flags);
- entry = bucket_find_exact(bucket, &ref);
- if (!entry)
- goto out;
+ list_for_each_entry(entry, &bucket->list, list) {
+ if (!exact_match(&ref, entry))
+ continue;
+
+ /*
+ * The same physical address can be mapped multiple
+ * times. Without a hardware IOMMU this results in the
+ * same device addresses being put into the dma-debug
+ * hash multiple times too. This can result in false
+ * positives being reported. Therefore we implement a
+ * best-fit algorithm here which updates the first entry
+ * from the hash which fits the reference value and is
+ * not currently listed as being checked.
+ */
+ if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
+ entry->map_err_type = MAP_ERR_CHECKED;
+ break;
+ }
+ }
- entry->map_err_type = MAP_ERR_CHECKED;
-out:
put_hash_bucket(bucket, &flags);
}
EXPORT_SYMBOL(debug_dma_mapping_error);
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index f7210ad6cffd..c5c7a762b850 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -122,7 +122,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size)
return false;
}
- if (attr->probability <= random32() % 100)
+ if (attr->probability <= prandom_u32() % 100)
return false;
if (!fail_stacktrace(attr))
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index fc2eeb7cb2ea..1ef4cc344977 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -1,3 +1,9 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr.bueso@hp.com>
+ *
+ * Based on the shift-and-subtract algorithm for computing integer
+ * square root from Guy L. Steele.
+ */
#include <linux/kernel.h>
#include <linux/export.h>
@@ -10,23 +16,23 @@
*/
unsigned long int_sqrt(unsigned long x)
{
- unsigned long op, res, one;
+ unsigned long b, m, y = 0;
- op = x;
- res = 0;
+ if (x <= 1)
+ return x;
- one = 1UL << (BITS_PER_LONG - 2);
- while (one > op)
- one >>= 2;
+ m = 1UL << (BITS_PER_LONG - 2);
+ while (m != 0) {
+ b = y + m;
+ y >>= 1;
- while (one != 0) {
- if (op >= res + one) {
- op = op - (res + one);
- res = res + 2 * one;
+ if (x >= b) {
+ x -= b;
+ y += m;
}
- res /= 2;
- one /= 4;
+ m >>= 2;
}
- return res;
+
+ return y;
}
EXPORT_SYMBOL(int_sqrt);
diff --git a/lib/list_sort.c b/lib/list_sort.c
index d7325c6b103f..1183fa70a44d 100644
--- a/lib/list_sort.c
+++ b/lib/list_sort.c
@@ -229,7 +229,7 @@ static int __init list_sort_test(void)
goto exit;
}
/* force some equivalencies */
- el->value = random32() % (TEST_LIST_LEN/3);
+ el->value = prandom_u32() % (TEST_LIST_LEN / 3);
el->serial = i;
el->poison1 = TEST_POISON1;
el->poison2 = TEST_POISON2;
diff --git a/lib/lz4/Makefile b/lib/lz4/Makefile
new file mode 100644
index 000000000000..8085d04e9309
--- /dev/null
+++ b/lib/lz4/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_LZ4_COMPRESS) += lz4_compress.o
+obj-$(CONFIG_LZ4HC_COMPRESS) += lz4hc_compress.o
+obj-$(CONFIG_LZ4_DECOMPRESS) += lz4_decompress.o
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
new file mode 100644
index 000000000000..fd94058bd7f9
--- /dev/null
+++ b/lib/lz4/lz4_compress.c
@@ -0,0 +1,443 @@
+/*
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2012, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ * - LZ4 source repository : http://code.google.com/p/lz4/
+ *
+ * Changed for kernel use by:
+ * Chanho Min <chanho.min@lge.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/lz4.h>
+#include <asm/unaligned.h>
+#include "lz4defs.h"
+
+/*
+ * LZ4_compressCtx :
+ * -----------------
+ * Compress 'isize' bytes from 'source' into an output buffer 'dest' of
+ * maximum size 'maxOutputSize'. * If it cannot achieve it, compression
+ * will stop, and result of the function will be zero.
+ * return : the number of bytes written in buffer 'dest', or 0 if the
+ * compression fails
+ */
+static inline int lz4_compressctx(void *ctx,
+ const char *source,
+ char *dest,
+ int isize,
+ int maxoutputsize)
+{
+ HTYPE *hashtable = (HTYPE *)ctx;
+ const u8 *ip = (u8 *)source;
+#if LZ4_ARCH64
+ const BYTE * const base = ip;
+#else
+ const int base = 0;
+#endif
+ const u8 *anchor = ip;
+ const u8 *const iend = ip + isize;
+ const u8 *const mflimit = iend - MFLIMIT;
+ #define MATCHLIMIT (iend - LASTLITERALS)
+
+ u8 *op = (u8 *) dest;
+ u8 *const oend = op + maxoutputsize;
+ int length;
+ const int skipstrength = SKIPSTRENGTH;
+ u32 forwardh;
+ int lastrun;
+
+ /* Init */
+ if (isize < MINLENGTH)
+ goto _last_literals;
+
+ memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+
+ /* First Byte */
+ hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
+ ip++;
+ forwardh = LZ4_HASH_VALUE(ip);
+
+ /* Main Loop */
+ for (;;) {
+ int findmatchattempts = (1U << skipstrength) + 3;
+ const u8 *forwardip = ip;
+ const u8 *ref;
+ u8 *token;
+
+ /* Find a match */
+ do {
+ u32 h = forwardh;
+ int step = findmatchattempts++ >> skipstrength;
+ ip = forwardip;
+ forwardip = ip + step;
+
+ if (unlikely(forwardip > mflimit))
+ goto _last_literals;
+
+ forwardh = LZ4_HASH_VALUE(forwardip);
+ ref = base + hashtable[h];
+ hashtable[h] = ip - base;
+ } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
+
+ /* Catch up */
+ while ((ip > anchor) && (ref > (u8 *)source) &&
+ unlikely(ip[-1] == ref[-1])) {
+ ip--;
+ ref--;
+ }
+
+ /* Encode Literal length */
+ length = (int)(ip - anchor);
+ token = op++;
+ /* check output limit */
+ if (unlikely(op + length + (2 + 1 + LASTLITERALS) +
+ (length >> 8) > oend))
+ return 0;
+
+ if (length >= (int)RUN_MASK) {
+ int len;
+ *token = (RUN_MASK << ML_BITS);
+ len = length - RUN_MASK;
+ for (; len > 254 ; len -= 255)
+ *op++ = 255;
+ *op++ = (u8)len;
+ } else
+ *token = (length << ML_BITS);
+
+ /* Copy Literals */
+ LZ4_BLINDCOPY(anchor, op, length);
+_next_match:
+ /* Encode Offset */
+ LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+
+ /* Start Counting */
+ ip += MINMATCH;
+ /* MinMatch verified */
+ ref += MINMATCH;
+ anchor = ip;
+ while (likely(ip < MATCHLIMIT - (STEPSIZE - 1))) {
+ #if LZ4_ARCH64
+ u64 diff = A64(ref) ^ A64(ip);
+ #else
+ u32 diff = A32(ref) ^ A32(ip);
+ #endif
+ if (!diff) {
+ ip += STEPSIZE;
+ ref += STEPSIZE;
+ continue;
+ }
+ ip += LZ4_NBCOMMONBYTES(diff);
+ goto _endcount;
+ }
+ #if LZ4_ARCH64
+ if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
+ ip += 4;
+ ref += 4;
+ }
+ #endif
+ if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
+ ip += 2;
+ ref += 2;
+ }
+ if ((ip < MATCHLIMIT) && (*ref == *ip))
+ ip++;
+_endcount:
+ /* Encode MatchLength */
+ length = (int)(ip - anchor);
+ /* Check output limit */
+ if (unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend))
+ return 0;
+ if (length >= (int)ML_MASK) {
+ *token += ML_MASK;
+ length -= ML_MASK;
+ for (; length > 509 ; length -= 510) {
+ *op++ = 255;
+ *op++ = 255;
+ }
+ if (length > 254) {
+ length -= 255;
+ *op++ = 255;
+ }
+ *op++ = (u8)length;
+ } else
+ *token += length;
+
+ /* Test end of chunk */
+ if (ip > mflimit) {
+ anchor = ip;
+ break;
+ }
+
+ /* Fill table */
+ hashtable[LZ4_HASH_VALUE(ip-2)] = ip - 2 - base;
+
+ /* Test next position */
+ ref = base + hashtable[LZ4_HASH_VALUE(ip)];
+ hashtable[LZ4_HASH_VALUE(ip)] = ip - base;
+ if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
+ token = op++;
+ *token = 0;
+ goto _next_match;
+ }
+
+ /* Prepare next loop */
+ anchor = ip++;
+ forwardh = LZ4_HASH_VALUE(ip);
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ lastrun = (int)(iend - anchor);
+ if (((char *)op - dest) + lastrun + 1
+ + ((lastrun + 255 - RUN_MASK) / 255) > (u32)maxoutputsize)
+ return 0;
+
+ if (lastrun >= (int)RUN_MASK) {
+ *op++ = (RUN_MASK << ML_BITS);
+ lastrun -= RUN_MASK;
+ for (; lastrun > 254 ; lastrun -= 255)
+ *op++ = 255;
+ *op++ = (u8)lastrun;
+ } else
+ *op++ = (lastrun << ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend - anchor;
+
+ /* End */
+ return (int)(((char *)op) - dest);
+}
+
+static inline int lz4_compress64kctx(void *ctx,
+ const char *source,
+ char *dest,
+ int isize,
+ int maxoutputsize)
+{
+ u16 *hashtable = (u16 *)ctx;
+ const u8 *ip = (u8 *) source;
+ const u8 *anchor = ip;
+ const u8 *const base = ip;
+ const u8 *const iend = ip + isize;
+ const u8 *const mflimit = iend - MFLIMIT;
+ #define MATCHLIMIT (iend - LASTLITERALS)
+
+ u8 *op = (u8 *) dest;
+ u8 *const oend = op + maxoutputsize;
+ int len, length;
+ const int skipstrength = SKIPSTRENGTH;
+ u32 forwardh;
+ int lastrun;
+
+ /* Init */
+ if (isize < MINLENGTH)
+ goto _last_literals;
+
+ memset((void *)hashtable, 0, LZ4_MEM_COMPRESS);
+
+ /* First Byte */
+ ip++;
+ forwardh = LZ4_HASH64K_VALUE(ip);
+
+ /* Main Loop */
+ for (;;) {
+ int findmatchattempts = (1U << skipstrength) + 3;
+ const u8 *forwardip = ip;
+ const u8 *ref;
+ u8 *token;
+
+ /* Find a match */
+ do {
+ u32 h = forwardh;
+ int step = findmatchattempts++ >> skipstrength;
+ ip = forwardip;
+ forwardip = ip + step;
+
+ if (forwardip > mflimit)
+ goto _last_literals;
+
+ forwardh = LZ4_HASH64K_VALUE(forwardip);
+ ref = base + hashtable[h];
+ hashtable[h] = (u16)(ip - base);
+ } while (A32(ref) != A32(ip));
+
+ /* Catch up */
+ while ((ip > anchor) && (ref > (u8 *)source)
+ && (ip[-1] == ref[-1])) {
+ ip--;
+ ref--;
+ }
+
+ /* Encode Literal length */
+ length = (int)(ip - anchor);
+ token = op++;
+ /* Check output limit */
+ if (unlikely(op + length + (2 + 1 + LASTLITERALS)
+ + (length >> 8) > oend))
+ return 0;
+ if (length >= (int)RUN_MASK) {
+ *token = (RUN_MASK << ML_BITS);
+ len = length - RUN_MASK;
+ for (; len > 254 ; len -= 255)
+ *op++ = 255;
+ *op++ = (u8)len;
+ } else
+ *token = (length << ML_BITS);
+
+ /* Copy Literals */
+ LZ4_BLINDCOPY(anchor, op, length);
+
+_next_match:
+ /* Encode Offset */
+ LZ4_WRITE_LITTLEENDIAN_16(op, (u16)(ip - ref));
+
+ /* Start Counting */
+ ip += MINMATCH;
+ /* MinMatch verified */
+ ref += MINMATCH;
+ anchor = ip;
+
+ while (ip < MATCHLIMIT - (STEPSIZE - 1)) {
+ #if LZ4_ARCH64
+ u64 diff = A64(ref) ^ A64(ip);
+ #else
+ u32 diff = A32(ref) ^ A32(ip);
+ #endif
+
+ if (!diff) {
+ ip += STEPSIZE;
+ ref += STEPSIZE;
+ continue;
+ }
+ ip += LZ4_NBCOMMONBYTES(diff);
+ goto _endcount;
+ }
+ #if LZ4_ARCH64
+ if ((ip < (MATCHLIMIT - 3)) && (A32(ref) == A32(ip))) {
+ ip += 4;
+ ref += 4;
+ }
+ #endif
+ if ((ip < (MATCHLIMIT - 1)) && (A16(ref) == A16(ip))) {
+ ip += 2;
+ ref += 2;
+ }
+ if ((ip < MATCHLIMIT) && (*ref == *ip))
+ ip++;
+_endcount:
+
+ /* Encode MatchLength */
+ len = (int)(ip - anchor);
+ /* Check output limit */
+ if (unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend))
+ return 0;
+ if (len >= (int)ML_MASK) {
+ *token += ML_MASK;
+ len -= ML_MASK;
+ for (; len > 509 ; len -= 510) {
+ *op++ = 255;
+ *op++ = 255;
+ }
+ if (len > 254) {
+ len -= 255;
+ *op++ = 255;
+ }
+ *op++ = (u8)len;
+ } else
+ *token += len;
+
+ /* Test end of chunk */
+ if (ip > mflimit) {
+ anchor = ip;
+ break;
+ }
+
+ /* Fill table */
+ hashtable[LZ4_HASH64K_VALUE(ip-2)] = (u16)(ip - 2 - base);
+
+ /* Test next position */
+ ref = base + hashtable[LZ4_HASH64K_VALUE(ip)];
+ hashtable[LZ4_HASH64K_VALUE(ip)] = (u16)(ip - base);
+ if (A32(ref) == A32(ip)) {
+ token = op++;
+ *token = 0;
+ goto _next_match;
+ }
+
+ /* Prepare next loop */
+ anchor = ip++;
+ forwardh = LZ4_HASH64K_VALUE(ip);
+ }
+
+_last_literals:
+ /* Encode Last Literals */
+ lastrun = (int)(iend - anchor);
+ if (op + lastrun + 1 + (lastrun - RUN_MASK + 255) / 255 > oend)
+ return 0;
+ if (lastrun >= (int)RUN_MASK) {
+ *op++ = (RUN_MASK << ML_BITS);
+ lastrun -= RUN_MASK;
+ for (; lastrun > 254 ; lastrun -= 255)
+ *op++ = 255;
+ *op++ = (u8)lastrun;
+ } else
+ *op++ = (lastrun << ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend - anchor;
+ /* End */
+ return (int)(((char *)op) - dest);
+}
+
+int lz4_compress(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+ int ret = -1;
+ int out_len = 0;
+
+ if (src_len < LZ4_64KLIMIT)
+ out_len = lz4_compress64kctx(wrkmem, src, dst, src_len,
+ lz4_compressbound(src_len));
+ else
+ out_len = lz4_compressctx(wrkmem, src, dst, src_len,
+ lz4_compressbound(src_len));
+
+ if (out_len < 0)
+ goto exit;
+
+ *dst_len = out_len;
+
+ return 0;
+exit:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(lz4_compress);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
new file mode 100644
index 000000000000..d3414eae73a1
--- /dev/null
+++ b/lib/lz4/lz4_decompress.c
@@ -0,0 +1,326 @@
+/*
+ * LZ4 Decompressor for Linux kernel
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * Based on LZ4 implementation by Yann Collet.
+ *
+ * LZ4 - Fast LZ compression algorithm
+ * Copyright (C) 2011-2012, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ * - LZ4 source repository : http://code.google.com/p/lz4/
+ */
+
+#ifndef STATIC
+#include <linux/module.h>
+#include <linux/kernel.h>
+#endif
+#include <linux/lz4.h>
+
+#include <asm/unaligned.h>
+
+#include "lz4defs.h"
+
+static int lz4_uncompress(const char *source, char *dest, int osize)
+{
+ const BYTE *ip = (const BYTE *) source;
+ const BYTE *ref;
+ BYTE *op = (BYTE *) dest;
+ BYTE * const oend = op + osize;
+ BYTE *cpy;
+ unsigned token;
+ size_t length;
+ size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+ size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
+ while (1) {
+
+ /* get runlength */
+ token = *ip++;
+ length = (token >> ML_BITS);
+ if (length == RUN_MASK) {
+ size_t len;
+
+ len = *ip++;
+ for (; len == 255; length += 255)
+ len = *ip++;
+ length += len;
+ }
+
+ /* copy literals */
+ cpy = op + length;
+ if (unlikely(cpy > oend - COPYLENGTH)) {
+ /*
+ * Error: not enough place for another match
+ * (min 4) + 5 literals
+ */
+ if (cpy != oend)
+ goto _output_error;
+
+ memcpy(op, ip, length);
+ ip += length;
+ break; /* EOF */
+ }
+ LZ4_WILDCOPY(ip, op, cpy);
+ ip -= (op - cpy);
+ op = cpy;
+
+ /* get offset */
+ LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
+ ip += 2;
+
+ /* Error: offset create reference outside destination buffer */
+ if (unlikely(ref < (BYTE *const) dest))
+ goto _output_error;
+
+ /* get matchlength */
+ length = token & ML_MASK;
+ if (length == ML_MASK) {
+ for (; *ip == 255; length += 255)
+ ip++;
+ length += *ip++;
+ }
+
+ /* copy repeated sequence */
+ if (unlikely((op - ref) < STEPSIZE)) {
+#if LZ4_ARCH64
+ size_t dec64 = dec64table[op - ref];
+#else
+ const int dec64 = 0;
+#endif
+ op[0] = ref[0];
+ op[1] = ref[1];
+ op[2] = ref[2];
+ op[3] = ref[3];
+ op += 4;
+ ref += 4;
+ ref -= dec32table[op-ref];
+ PUT4(ref, op);
+ op += STEPSIZE - 4;
+ ref -= dec64;
+ } else {
+ LZ4_COPYSTEP(ref, op);
+ }
+ cpy = op + length - (STEPSIZE - 4);
+ if (cpy > (oend - COPYLENGTH)) {
+
+ /* Error: request to write beyond destination buffer */
+ if (cpy > oend)
+ goto _output_error;
+ LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+ while (op < cpy)
+ *op++ = *ref++;
+ op = cpy;
+ /*
+ * Check EOF (should never happen, since last 5 bytes
+ * are supposed to be literals)
+ */
+ if (op == oend)
+ goto _output_error;
+ continue;
+ }
+ LZ4_SECURECOPY(ref, op, cpy);
+ op = cpy; /* correction */
+ }
+ /* end of decoding */
+ return (int) (((char *)ip) - source);
+
+ /* write overflow error detected */
+_output_error:
+ return (int) (-(((char *)ip) - source));
+}
+
+static int lz4_uncompress_unknownoutputsize(const char *source, char *dest,
+ int isize, size_t maxoutputsize)
+{
+ const BYTE *ip = (const BYTE *) source;
+ const BYTE *const iend = ip + isize;
+ const BYTE *ref;
+
+
+ BYTE *op = (BYTE *) dest;
+ BYTE * const oend = op + maxoutputsize;
+ BYTE *cpy;
+
+ size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
+#if LZ4_ARCH64
+ size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3};
+#endif
+
+ /* Main Loop */
+ while (ip < iend) {
+
+ unsigned token;
+ size_t length;
+
+ /* get runlength */
+ token = *ip++;
+ length = (token >> ML_BITS);
+ if (length == RUN_MASK) {
+ int s = 255;
+ while ((ip < iend) && (s == 255)) {
+ s = *ip++;
+ length += s;
+ }
+ }
+ /* copy literals */
+ cpy = op + length;
+ if ((cpy > oend - COPYLENGTH) ||
+ (ip + length > iend - COPYLENGTH)) {
+
+ if (cpy > oend)
+ goto _output_error;/* writes beyond buffer */
+
+ if (ip + length != iend)
+ goto _output_error;/*
+ * Error: LZ4 format requires
+ * to consume all input
+ * at this stage
+ */
+ memcpy(op, ip, length);
+ op += length;
+ break;/* Necessarily EOF, due to parsing restrictions */
+ }
+ LZ4_WILDCOPY(ip, op, cpy);
+ ip -= (op - cpy);
+ op = cpy;
+
+ /* get offset */
+ LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
+ ip += 2;
+ if (ref < (BYTE * const) dest)
+ goto _output_error;
+ /*
+ * Error : offset creates reference
+ * outside of destination buffer
+ */
+
+ /* get matchlength */
+ length = (token & ML_MASK);
+ if (length == ML_MASK) {
+ while (ip < iend) {
+ int s = *ip++;
+ length += s;
+ if (s == 255)
+ continue;
+ break;
+ }
+ }
+
+ /* copy repeated sequence */
+ if (unlikely((op - ref) < STEPSIZE)) {
+#if LZ4_ARCH64
+ size_t dec64 = dec64table[op - ref];
+#else
+ const int dec64 = 0;
+#endif
+ op[0] = ref[0];
+ op[1] = ref[1];
+ op[2] = ref[2];
+ op[3] = ref[3];
+ op += 4;
+ ref += 4;
+ ref -= dec32table[op - ref];
+ PUT4(ref, op);
+ op += STEPSIZE - 4;
+ ref -= dec64;
+ } else {
+ LZ4_COPYSTEP(ref, op);
+ }
+ cpy = op + length - (STEPSIZE-4);
+ if (cpy > oend - COPYLENGTH) {
+ if (cpy > oend)
+ goto _output_error; /* write outside of buf */
+
+ LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
+ while (op < cpy)
+ *op++ = *ref++;
+ op = cpy;
+ /*
+ * Check EOF (should never happen, since last 5 bytes
+ * are supposed to be literals)
+ */
+ if (op == oend)
+ goto _output_error;
+ continue;
+ }
+ LZ4_SECURECOPY(ref, op, cpy);
+ op = cpy; /* correction */
+ }
+ /* end of decoding */
+ return (int) (((char *) op) - dest);
+
+ /* write overflow error detected */
+_output_error:
+ return (int) (-(((char *) ip) - source));
+}
+
+int lz4_decompress(const char *src, size_t *src_len, char *dest,
+ size_t actual_dest_len)
+{
+ int ret = -1;
+ int input_len = 0;
+
+ input_len = lz4_uncompress(src, dest, actual_dest_len);
+ if (input_len < 0)
+ goto exit_0;
+ *src_len = input_len;
+
+ return 0;
+exit_0:
+ return ret;
+}
+#ifndef STATIC
+EXPORT_SYMBOL_GPL(lz4_decompress);
+#endif
+
+int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
+ char *dest, size_t *dest_len)
+{
+ int ret = -1;
+ int out_len = 0;
+
+ out_len = lz4_uncompress_unknownoutputsize(src, dest, src_len,
+ *dest_len);
+ if (out_len < 0)
+ goto exit_0;
+ *dest_len = out_len;
+
+ return 0;
+exit_0:
+ return ret;
+}
+#ifndef STATIC
+EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4 Decompressor");
+#endif
diff --git a/lib/lz4/lz4defs.h b/lib/lz4/lz4defs.h
new file mode 100644
index 000000000000..abcecdc2d0f2
--- /dev/null
+++ b/lib/lz4/lz4defs.h
@@ -0,0 +1,156 @@
+/*
+ * lz4defs.h -- architecture specific defines
+ *
+ * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Detects 64 bits mode
+ */
+#if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) \
+ || defined(__ppc64__) || defined(__LP64__))
+#define LZ4_ARCH64 1
+#else
+#define LZ4_ARCH64 0
+#endif
+
+/*
+ * Architecture-specific macros
+ */
+#define BYTE u8
+typedef struct _U16_S { u16 v; } U16_S;
+typedef struct _U32_S { u32 v; } U32_S;
+typedef struct _U64_S { u64 v; } U64_S;
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) \
+ || defined(CONFIG_ARM) && __LINUX_ARM_ARCH__ >= 6 \
+ && defined(ARM_EFFICIENT_UNALIGNED_ACCESS)
+
+#define A16(x) (((U16_S *)(x))->v)
+#define A32(x) (((U32_S *)(x))->v)
+#define A64(x) (((U64_S *)(x))->v)
+
+#define PUT4(s, d) (A32(d) = A32(s))
+#define PUT8(s, d) (A64(d) = A64(s))
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
+ do { \
+ A16(p) = v; \
+ p += 2; \
+ } while (0)
+#else /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+
+#define A64(x) get_unaligned((u64 *)&(((U16_S *)(x))->v))
+#define A32(x) get_unaligned((u32 *)&(((U16_S *)(x))->v))
+#define A16(x) get_unaligned((u16 *)&(((U16_S *)(x))->v))
+
+#define PUT4(s, d) \
+ put_unaligned(get_unaligned((const u32 *) s), (u32 *) d)
+#define PUT8(s, d) \
+ put_unaligned(get_unaligned((const u64 *) s), (u64 *) d)
+
+#define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
+ do { \
+ put_unaligned(v, (u16 *)(p)); \
+ p += 2; \
+ } while (0)
+#endif
+
+#define COPYLENGTH 8
+#define ML_BITS 4
+#define ML_MASK ((1U << ML_BITS) - 1)
+#define RUN_BITS (8 - ML_BITS)
+#define RUN_MASK ((1U << RUN_BITS) - 1)
+#define MEMORY_USAGE 14
+#define MINMATCH 4
+#define SKIPSTRENGTH 6
+#define LASTLITERALS 5
+#define MFLIMIT (COPYLENGTH + MINMATCH)
+#define MINLENGTH (MFLIMIT + 1)
+#define MAXD_LOG 16
+#define MAXD (1 << MAXD_LOG)
+#define MAXD_MASK (u32)(MAXD - 1)
+#define MAX_DISTANCE (MAXD - 1)
+#define HASH_LOG (MAXD_LOG - 1)
+#define HASHTABLESIZE (1 << HASH_LOG)
+#define MAX_NB_ATTEMPTS 256
+#define OPTIMAL_ML (int)((ML_MASK-1)+MINMATCH)
+#define LZ4_64KLIMIT ((1<<16) + (MFLIMIT - 1))
+#define HASHLOG64K ((MEMORY_USAGE - 2) + 1)
+#define HASH64KTABLESIZE (1U << HASHLOG64K)
+#define LZ4_HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - (MEMORY_USAGE-2)))
+#define LZ4_HASH64K_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - HASHLOG64K))
+#define HASH_VALUE(p) (((A32(p)) * 2654435761U) >> \
+ ((MINMATCH * 8) - HASH_LOG))
+
+#if LZ4_ARCH64/* 64-bit */
+#define STEPSIZE 8
+
+#define LZ4_COPYSTEP(s, d) \
+ do { \
+ PUT8(s, d); \
+ d += 8; \
+ s += 8; \
+ } while (0)
+
+#define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
+
+#define LZ4_SECURECOPY(s, d, e) \
+ do { \
+ if (d < e) { \
+ LZ4_WILDCOPY(s, d, e); \
+ } \
+ } while (0)
+#define HTYPE u32
+
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clzll(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctzll(val) >> 3)
+#endif
+
+#else /* 32-bit */
+#define STEPSIZE 4
+
+#define LZ4_COPYSTEP(s, d) \
+ do { \
+ PUT4(s, d); \
+ d += 4; \
+ s += 4; \
+ } while (0)
+
+#define LZ4_COPYPACKET(s, d) \
+ do { \
+ LZ4_COPYSTEP(s, d); \
+ LZ4_COPYSTEP(s, d); \
+ } while (0)
+
+#define LZ4_SECURECOPY LZ4_WILDCOPY
+#define HTYPE const u8*
+
+#ifdef __BIG_ENDIAN
+#define LZ4_NBCOMMONBYTES(val) (__builtin_clz(val) >> 3)
+#else
+#define LZ4_NBCOMMONBYTES(val) (__builtin_ctz(val) >> 3)
+#endif
+
+#endif
+
+#define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
+ (d = s - get_unaligned_le16(p))
+
+#define LZ4_WILDCOPY(s, d, e) \
+ do { \
+ LZ4_COPYPACKET(s, d); \
+ } while (d < e)
+
+#define LZ4_BLINDCOPY(s, d, l) \
+ do { \
+ u8 *e = (d) + l; \
+ LZ4_WILDCOPY(s, d, e); \
+ d = e; \
+ } while (0)
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
new file mode 100644
index 000000000000..a9a9c2a00c5c
--- /dev/null
+++ b/lib/lz4/lz4hc_compress.c
@@ -0,0 +1,539 @@
+/*
+ * LZ4 HC - High Compression Mode of LZ4
+ * Copyright (C) 2011-2012, Yann Collet.
+ * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * You can contact the author at :
+ * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
+ * - LZ4 source repository : http://code.google.com/p/lz4/
+ *
+ * Changed for kernel use by:
+ * Chanho Min <chanho.min@lge.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/lz4.h>
+#include <asm/unaligned.h>
+#include "lz4defs.h"
+
+struct lz4hc_data {
+ const u8 *base;
+ HTYPE hashtable[HASHTABLESIZE];
+ u16 chaintable[MAXD];
+ const u8 *nexttoupdate;
+} __attribute__((__packed__));
+
+static inline int lz4hc_init(struct lz4hc_data *hc4, const u8 *base)
+{
+ memset((void *)hc4->hashtable, 0, sizeof(hc4->hashtable));
+ memset(hc4->chaintable, 0xFF, sizeof(hc4->chaintable));
+
+#if LZ4_ARCH64
+ hc4->nexttoupdate = base + 1;
+#else
+ hc4->nexttoupdate = base;
+#endif
+ hc4->base = base;
+ return 1;
+}
+
+/* Update chains up to ip (excluded) */
+static inline void lz4hc_insert(struct lz4hc_data *hc4, const u8 *ip)
+{
+ u16 *chaintable = hc4->chaintable;
+ HTYPE *hashtable = hc4->hashtable;
+#if LZ4_ARCH64
+ const BYTE * const base = hc4->base;
+#else
+ const int base = 0;
+#endif
+
+ while (hc4->nexttoupdate < ip) {
+ const u8 *p = hc4->nexttoupdate;
+ size_t delta = p - (hashtable[HASH_VALUE(p)] + base);
+ if (delta > MAX_DISTANCE)
+ delta = MAX_DISTANCE;
+ chaintable[(size_t)(p) & MAXD_MASK] = (u16)delta;
+ hashtable[HASH_VALUE(p)] = (p) - base;
+ hc4->nexttoupdate++;
+ }
+}
+
+static inline size_t lz4hc_commonlength(const u8 *p1, const u8 *p2,
+ const u8 *const matchlimit)
+{
+ const u8 *p1t = p1;
+
+ while (p1t < matchlimit - (STEPSIZE - 1)) {
+#if LZ4_ARCH64
+ u64 diff = A64(p2) ^ A64(p1t);
+#else
+ u32 diff = A32(p2) ^ A32(p1t);
+#endif
+ if (!diff) {
+ p1t += STEPSIZE;
+ p2 += STEPSIZE;
+ continue;
+ }
+ p1t += LZ4_NBCOMMONBYTES(diff);
+ return p1t - p1;
+ }
+#if LZ4_ARCH64
+ if ((p1t < (matchlimit-3)) && (A32(p2) == A32(p1t))) {
+ p1t += 4;
+ p2 += 4;
+ }
+#endif
+
+ if ((p1t < (matchlimit - 1)) && (A16(p2) == A16(p1t))) {
+ p1t += 2;
+ p2 += 2;
+ }
+ if ((p1t < matchlimit) && (*p2 == *p1t))
+ p1t++;
+ return p1t - p1;
+}
+
+static inline int lz4hc_insertandfindbestmatch(struct lz4hc_data *hc4,
+ const u8 *ip, const u8 *const matchlimit, const u8 **matchpos)
+{
+ u16 *const chaintable = hc4->chaintable;
+ HTYPE *const hashtable = hc4->hashtable;
+ const u8 *ref;
+#if LZ4_ARCH64
+ const BYTE * const base = hc4->base;
+#else
+ const int base = 0;
+#endif
+ int nbattempts = MAX_NB_ATTEMPTS;
+ size_t repl = 0, ml = 0;
+ u16 delta;
+
+ /* HC4 match finder */
+ lz4hc_insert(hc4, ip);
+ ref = hashtable[HASH_VALUE(ip)] + base;
+
+ /* potential repetition */
+ if (ref >= ip-4) {
+ /* confirmed */
+ if (A32(ref) == A32(ip)) {
+ delta = (u16)(ip-ref);
+ repl = ml = lz4hc_commonlength(ip + MINMATCH,
+ ref + MINMATCH, matchlimit) + MINMATCH;
+ *matchpos = ref;
+ }
+ ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+ }
+
+ while ((ref >= ip - MAX_DISTANCE) && nbattempts) {
+ nbattempts--;
+ if (*(ref + ml) == *(ip + ml)) {
+ if (A32(ref) == A32(ip)) {
+ size_t mlt =
+ lz4hc_commonlength(ip + MINMATCH,
+ ref + MINMATCH, matchlimit) + MINMATCH;
+ if (mlt > ml) {
+ ml = mlt;
+ *matchpos = ref;
+ }
+ }
+ }
+ ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+ }
+
+ /* Complete table */
+ if (repl) {
+ const BYTE *ptr = ip;
+ const BYTE *end;
+ end = ip + repl - (MINMATCH-1);
+ /* Pre-Load */
+ while (ptr < end - delta) {
+ chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
+ ptr++;
+ }
+ do {
+ chaintable[(size_t)(ptr) & MAXD_MASK] = delta;
+ /* Head of chain */
+ hashtable[HASH_VALUE(ptr)] = (ptr) - base;
+ ptr++;
+ } while (ptr < end);
+ hc4->nexttoupdate = end;
+ }
+
+ return (int)ml;
+}
+
+static inline int lz4hc_insertandgetwidermatch(struct lz4hc_data *hc4,
+ const u8 *ip, const u8 *startlimit, const u8 *matchlimit, int longest,
+ const u8 **matchpos, const u8 **startpos)
+{
+ u16 *const chaintable = hc4->chaintable;
+ HTYPE *const hashtable = hc4->hashtable;
+#if LZ4_ARCH64
+ const BYTE * const base = hc4->base;
+#else
+ const int base = 0;
+#endif
+ const u8 *ref;
+ int nbattempts = MAX_NB_ATTEMPTS;
+ int delta = (int)(ip - startlimit);
+
+ /* First Match */
+ lz4hc_insert(hc4, ip);
+ ref = hashtable[HASH_VALUE(ip)] + base;
+
+ while ((ref >= ip - MAX_DISTANCE) && (ref >= hc4->base)
+ && (nbattempts)) {
+ nbattempts--;
+ if (*(startlimit + longest) == *(ref - delta + longest)) {
+ if (A32(ref) == A32(ip)) {
+ const u8 *reft = ref + MINMATCH;
+ const u8 *ipt = ip + MINMATCH;
+ const u8 *startt = ip;
+
+ while (ipt < matchlimit-(STEPSIZE - 1)) {
+ #if LZ4_ARCH64
+ u64 diff = A64(reft) ^ A64(ipt);
+ #else
+ u32 diff = A32(reft) ^ A32(ipt);
+ #endif
+
+ if (!diff) {
+ ipt += STEPSIZE;
+ reft += STEPSIZE;
+ continue;
+ }
+ ipt += LZ4_NBCOMMONBYTES(diff);
+ goto _endcount;
+ }
+ #if LZ4_ARCH64
+ if ((ipt < (matchlimit - 3))
+ && (A32(reft) == A32(ipt))) {
+ ipt += 4;
+ reft += 4;
+ }
+ ipt += 2;
+ #endif
+ if ((ipt < (matchlimit - 1))
+ && (A16(reft) == A16(ipt))) {
+ reft += 2;
+ }
+ if ((ipt < matchlimit) && (*reft == *ipt))
+ ipt++;
+_endcount:
+ reft = ref;
+
+ while ((startt > startlimit)
+ && (reft > hc4->base)
+ && (startt[-1] == reft[-1])) {
+ startt--;
+ reft--;
+ }
+
+ if ((ipt - startt) > longest) {
+ longest = (int)(ipt - startt);
+ *matchpos = reft;
+ *startpos = startt;
+ }
+ }
+ }
+ ref -= (size_t)chaintable[(size_t)(ref) & MAXD_MASK];
+ }
+ return longest;
+}
+
+static inline int lz4_encodesequence(const u8 **ip, u8 **op, const u8 **anchor,
+ int ml, const u8 *ref)
+{
+ int length, len;
+ u8 *token;
+
+ /* Encode Literal length */
+ length = (int)(*ip - *anchor);
+ token = (*op)++;
+ if (length >= (int)RUN_MASK) {
+ *token = (RUN_MASK << ML_BITS);
+ len = length - RUN_MASK;
+ for (; len > 254 ; len -= 255)
+ *(*op)++ = 255;
+ *(*op)++ = (u8)len;
+ } else
+ *token = (length << ML_BITS);
+
+ /* Copy Literals */
+ LZ4_BLINDCOPY(*anchor, *op, length);
+
+ /* Encode Offset */
+ LZ4_WRITE_LITTLEENDIAN_16(*op, (u16)(*ip - ref));
+
+ /* Encode MatchLength */
+ len = (int)(ml - MINMATCH);
+ if (len >= (int)ML_MASK) {
+ *token += ML_MASK;
+ len -= ML_MASK;
+ for (; len > 509 ; len -= 510) {
+ *(*op)++ = 255;
+ *(*op)++ = 255;
+ }
+ if (len > 254) {
+ len -= 255;
+ *(*op)++ = 255;
+ }
+ *(*op)++ = (u8)len;
+ } else
+ *token += len;
+
+ /* Prepare next loop */
+ *ip += ml;
+ *anchor = *ip;
+
+ return 0;
+}
+
+int lz4_compresshcctx(struct lz4hc_data *ctx,
+ const char *source,
+ char *dest,
+ int isize)
+{
+ const u8 *ip = (const u8 *)source;
+ const u8 *anchor = ip;
+ const u8 *const iend = ip + isize;
+ const u8 *const mflimit = iend - MFLIMIT;
+ const u8 *const matchlimit = (iend - LASTLITERALS);
+
+ u8 *op = (u8 *)dest;
+
+ int ml, ml2, ml3, ml0;
+ const u8 *ref = NULL;
+ const u8 *start2 = NULL;
+ const u8 *ref2 = NULL;
+ const u8 *start3 = NULL;
+ const u8 *ref3 = NULL;
+ const u8 *start0;
+ const u8 *ref0;
+ int lastrun;
+
+ ip++;
+
+ /* Main Loop */
+ while (ip < mflimit) {
+ ml = lz4hc_insertandfindbestmatch(ctx, ip, matchlimit, (&ref));
+ if (!ml) {
+ ip++;
+ continue;
+ }
+
+ /* saved, in case we would skip too much */
+ start0 = ip;
+ ref0 = ref;
+ ml0 = ml;
+_search2:
+ if (ip+ml < mflimit)
+ ml2 = lz4hc_insertandgetwidermatch(ctx, ip + ml - 2,
+ ip + 1, matchlimit, ml, &ref2, &start2);
+ else
+ ml2 = ml;
+ /* No better match */
+ if (ml2 == ml) {
+ lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ continue;
+ }
+
+ if (start0 < ip) {
+ /* empirical */
+ if (start2 < ip + ml0) {
+ ip = start0;
+ ref = ref0;
+ ml = ml0;
+ }
+ }
+ /*
+ * Here, start0==ip
+ * First Match too small : removed
+ */
+ if ((start2 - ip) < 3) {
+ ml = ml2;
+ ip = start2;
+ ref = ref2;
+ goto _search2;
+ }
+
+_search3:
+ /*
+ * Currently we have :
+ * ml2 > ml1, and
+ * ip1+3 <= ip2 (usually < ip1+ml1)
+ */
+ if ((start2 - ip) < OPTIMAL_ML) {
+ int correction;
+ int new_ml = ml;
+ if (new_ml > OPTIMAL_ML)
+ new_ml = OPTIMAL_ML;
+ if (ip + new_ml > start2 + ml2 - MINMATCH)
+ new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
+ correction = new_ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ }
+ /*
+ * Now, we have start2 = ip+new_ml,
+ * with new_ml=min(ml, OPTIMAL_ML=18)
+ */
+ if (start2 + ml2 < mflimit)
+ ml3 = lz4hc_insertandgetwidermatch(ctx,
+ start2 + ml2 - 3, start2, matchlimit,
+ ml2, &ref3, &start3);
+ else
+ ml3 = ml2;
+
+ /* No better match : 2 sequences to encode */
+ if (ml3 == ml2) {
+ /* ip & ref are known; Now for ml */
+ if (start2 < ip+ml)
+ ml = (int)(start2 - ip);
+
+ /* Now, encode 2 sequences */
+ lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ ip = start2;
+ lz4_encodesequence(&ip, &op, &anchor, ml2, ref2);
+ continue;
+ }
+
+ /* Not enough space for match 2 : remove it */
+ if (start3 < ip + ml + 3) {
+ /*
+ * can write Seq1 immediately ==> Seq2 is removed,
+ * so Seq3 becomes Seq1
+ */
+ if (start3 >= (ip + ml)) {
+ if (start2 < ip + ml) {
+ int correction =
+ (int)(ip + ml - start2);
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ if (ml2 < MINMATCH) {
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ }
+ }
+
+ lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+ ip = start3;
+ ref = ref3;
+ ml = ml3;
+
+ start0 = start2;
+ ref0 = ref2;
+ ml0 = ml2;
+ goto _search2;
+ }
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+ goto _search3;
+ }
+
+ /*
+ * OK, now we have 3 ascending matches; let's write at least
+ * the first one ip & ref are known; Now for ml
+ */
+ if (start2 < ip + ml) {
+ if ((start2 - ip) < (int)ML_MASK) {
+ int correction;
+ if (ml > OPTIMAL_ML)
+ ml = OPTIMAL_ML;
+ if (ip + ml > start2 + ml2 - MINMATCH)
+ ml = (int)(start2 - ip) + ml2
+ - MINMATCH;
+ correction = ml - (int)(start2 - ip);
+ if (correction > 0) {
+ start2 += correction;
+ ref2 += correction;
+ ml2 -= correction;
+ }
+ } else
+ ml = (int)(start2 - ip);
+ }
+ lz4_encodesequence(&ip, &op, &anchor, ml, ref);
+
+ ip = start2;
+ ref = ref2;
+ ml = ml2;
+
+ start2 = start3;
+ ref2 = ref3;
+ ml2 = ml3;
+
+ goto _search3;
+ }
+
+ /* Encode Last Literals */
+ lastrun = (int)(iend - anchor);
+ if (lastrun >= (int)RUN_MASK) {
+ *op++ = (RUN_MASK << ML_BITS);
+ lastrun -= RUN_MASK;
+ for (; lastrun > 254 ; lastrun -= 255)
+ *op++ = 255;
+ *op++ = (u8) lastrun;
+ } else
+ *op++ = (lastrun << ML_BITS);
+ memcpy(op, anchor, iend - anchor);
+ op += iend - anchor;
+ /* End */
+ return (int) (((char *)op) - dest);
+}
+
+int lz4hc_compress(const unsigned char *src, size_t src_len,
+ unsigned char *dst, size_t *dst_len, void *wrkmem)
+{
+ int ret = -1;
+ int out_len = 0;
+
+ struct lz4hc_data *hc4 = (struct lz4hc_data *)wrkmem;
+ lz4hc_init(hc4, (const u8 *)src);
+ out_len = lz4_compresshcctx((struct lz4hc_data *)hc4, (const u8 *)src,
+ (char *)dst, (int)src_len);
+
+ if (out_len < 0)
+ goto exit;
+
+ *dst_len = out_len;
+ return 0;
+
+exit:
+ return ret;
+}
+EXPORT_SYMBOL_GPL(lz4hc_compress);
+
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("LZ4HC compressor");
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
new file mode 100644
index 000000000000..79c61580a211
--- /dev/null
+++ b/lib/percpu-refcount.c
@@ -0,0 +1,243 @@
+#define pr_fmt(fmt) "%s: " fmt "\n", __func__
+
+#include <linux/kernel.h>
+#include <linux/jiffies.h>
+#include <linux/percpu-refcount.h>
+#include <linux/rcupdate.h>
+
+/*
+ * A percpu refcount can be in 4 different modes. The state is tracked in the
+ * low two bits of percpu_ref->pcpu_count:
+ *
+ * PCPU_REF_NONE - the initial state, no percpu counters allocated.
+ *
+ * PCPU_REF_PTR - using percpu counters for the refcount.
+ *
+ * PCPU_REF_DYING - we're shutting down so get()/put() should use the embedded
+ * atomic counter, but we're not finished updating the atomic counter from the
+ * percpu counters - this means that percpu_ref_put() can't check for the ref
+ * hitting 0 yet.
+ *
+ * PCPU_REF_DEAD - we've finished the teardown sequence, percpu_ref_put() should
+ * now check for the ref hitting 0.
+ *
+ * In PCPU_REF_NONE mode, we need to count the number of times percpu_ref_get()
+ * is called; this is done with the high bits of the raw atomic counter. We also
+ * track the time, in jiffies, when the get count last wrapped - this is done
+ * with the remaining bits of percpu_ref->percpu_count.
+ *
+ * So, when percpu_ref_get() is called it increments the get count and checks if
+ * it wrapped; if it did, it checks if the last time it wrapped was less than
+ * one second ago; if so, we want to allocate percpu counters.
+ *
+ * PCPU_COUNT_BITS determines the threshold where we convert to percpu: of the
+ * raw 64 bit counter, we use PCPU_COUNT_BITS for the refcount, and the
+ * remaining (high) bits to count the number of times percpu_ref_get() has been
+ * called. It's currently (completely arbitrarily) 16384 times in one second.
+ *
+ * Percpu mode (PCPU_REF_PTR):
+ *
+ * In percpu mode all we do on get and put is increment or decrement the cpu
+ * local counter, which is a 32 bit unsigned int.
+ *
+ * Note that all the gets() could be happening on one cpu, and all the puts() on
+ * another - the individual cpu counters can wrap (potentially many times).
+ *
+ * But this is fine because we don't need to check for the ref hitting 0 in
+ * percpu mode; before we set the state to PCPU_REF_DEAD we simply sum up all
+ * the percpu counters and add them to the atomic counter. Since addition and
+ * subtraction in modular arithmatic is still associative, the result will be
+ * correct.
+ */
+
+#define PCPU_COUNT_BITS 50
+#define PCPU_COUNT_MASK ((1LL << PCPU_COUNT_BITS) - 1)
+
+#define PCPU_STATUS_BITS 2
+#define PCPU_STATUS_MASK ((1 << PCPU_STATUS_BITS) - 1)
+
+#define PCPU_REF_PTR 0
+#define PCPU_REF_NONE 1
+#define PCPU_REF_DYING 2
+#define PCPU_REF_DEAD 3
+
+#define REF_STATUS(count) (count & PCPU_STATUS_MASK)
+
+/**
+ * percpu_ref_init - initialize a dynamic percpu refcount
+ *
+ * Initializes the refcount in single atomic counter mode with a refcount of 1;
+ * analagous to atomic_set(ref, 1).
+ */
+void percpu_ref_init(struct percpu_ref *ref)
+{
+ unsigned long now = jiffies;
+
+ atomic64_set(&ref->count, 1);
+
+ now <<= PCPU_STATUS_BITS;
+ now |= PCPU_REF_NONE;
+
+ ref->pcpu_count = now;
+}
+
+static void percpu_ref_alloc(struct percpu_ref *ref, unsigned long pcpu_count)
+{
+ unsigned long new, now = jiffies;
+
+ now <<= PCPU_STATUS_BITS;
+ now |= PCPU_REF_NONE;
+
+ if (now - pcpu_count <= HZ << PCPU_STATUS_BITS) {
+ rcu_read_unlock();
+ new = (unsigned long) alloc_percpu(unsigned);
+ rcu_read_lock();
+
+ if (!new)
+ goto update_time;
+
+ BUG_ON(new & PCPU_STATUS_MASK);
+
+ if (cmpxchg(&ref->pcpu_count, pcpu_count, new) != pcpu_count)
+ free_percpu((void __percpu *) new);
+ else
+ pr_debug("created");
+ } else {
+update_time:
+ new = now;
+ cmpxchg(&ref->pcpu_count, pcpu_count, new);
+ }
+}
+
+void __percpu_ref_get(struct percpu_ref *ref, bool alloc)
+{
+ unsigned long pcpu_count;
+ uint64_t v;
+
+ pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+ if (REF_STATUS(pcpu_count) == PCPU_REF_PTR) {
+ /* for rcu - we're not using rcu_dereference() */
+ smp_read_barrier_depends();
+ __this_cpu_inc(*((unsigned __percpu *) pcpu_count));
+ } else {
+ v = atomic64_add_return(1 + (1ULL << PCPU_COUNT_BITS),
+ &ref->count);
+
+ if (!(v >> PCPU_COUNT_BITS) &&
+ REF_STATUS(pcpu_count) == PCPU_REF_NONE && alloc)
+ percpu_ref_alloc(ref, pcpu_count);
+ }
+}
+
+/**
+ * percpu_ref_put - decrement a dynamic percpu refcount
+ *
+ * Returns true if the result is 0, otherwise false; only checks for the ref
+ * hitting 0 after percpu_ref_kill() has been called. Analagous to
+ * atomic_dec_and_test().
+ */
+int percpu_ref_put(struct percpu_ref *ref)
+{
+ unsigned long pcpu_count;
+ uint64_t v;
+ int ret = 0;
+
+ rcu_read_lock();
+
+ pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+ switch (REF_STATUS(pcpu_count)) {
+ case PCPU_REF_PTR:
+ /* for rcu - we're not using rcu_dereference() */
+ smp_read_barrier_depends();
+ __this_cpu_dec(*((unsigned __percpu *) pcpu_count));
+ break;
+ case PCPU_REF_NONE:
+ case PCPU_REF_DYING:
+ atomic64_dec(&ref->count);
+ break;
+ case PCPU_REF_DEAD:
+ v = atomic64_dec_return(&ref->count);
+ v &= PCPU_COUNT_MASK;
+
+ ret = v == 0;
+ break;
+ }
+
+ rcu_read_unlock();
+
+ return ret;
+}
+
+/**
+ * percpu_ref_kill - prepare a dynamic percpu refcount for teardown
+ *
+ * Must be called before dropping the initial ref, so that percpu_ref_put()
+ * knows to check for the refcount hitting 0. If the refcount was in percpu
+ * mode, converts it back to single atomic counter mode.
+ *
+ * Returns true the first time called on @ref and false if @ref is already
+ * shutting down, so it may be used by the caller for synchronizing other parts
+ * of a two stage shutdown.
+ */
+int percpu_ref_kill(struct percpu_ref *ref)
+{
+ unsigned long old, new, status, pcpu_count;
+
+ pcpu_count = ACCESS_ONCE(ref->pcpu_count);
+
+ do {
+ status = REF_STATUS(pcpu_count);
+
+ switch (status) {
+ case PCPU_REF_PTR:
+ new = PCPU_REF_DYING;
+ break;
+ case PCPU_REF_NONE:
+ new = PCPU_REF_DEAD;
+ break;
+ case PCPU_REF_DYING:
+ case PCPU_REF_DEAD:
+ return 0;
+ }
+
+ old = pcpu_count;
+ pcpu_count = cmpxchg(&ref->pcpu_count, old, new);
+ } while (pcpu_count != old);
+
+ if (status == PCPU_REF_PTR) {
+ unsigned count = 0, cpu;
+
+ synchronize_rcu();
+
+ for_each_possible_cpu(cpu)
+ count += *per_cpu_ptr((unsigned __percpu *) pcpu_count, cpu);
+
+ pr_debug("global %lli pcpu %i",
+ atomic64_read(&ref->count) & PCPU_COUNT_MASK,
+ (int) count);
+
+ atomic64_add((int) count, &ref->count);
+ smp_wmb();
+ /* Between setting global count and setting PCPU_REF_DEAD */
+ ref->pcpu_count = PCPU_REF_DEAD;
+
+ free_percpu((unsigned __percpu *) pcpu_count);
+ }
+
+ return 1;
+}
+
+/**
+ * percpu_ref_dead - check if a dynamic percpu refcount is shutting down
+ *
+ * Returns true if percpu_ref_kill() has been called on @ref, false otherwise.
+ */
+int percpu_ref_dead(struct percpu_ref *ref)
+{
+ unsigned status = REF_STATUS(ref->pcpu_count);
+
+ return status == PCPU_REF_DYING ||
+ status == PCPU_REF_DEAD;
+}
diff --git a/lib/show_mem.c b/lib/show_mem.c
index 4407f8c9b1f7..b7c72311ad0c 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -18,6 +18,9 @@ void show_mem(unsigned int filter)
printk("Mem-Info:\n");
show_free_areas(filter);
+ if (filter & SHOW_MEM_FILTER_PAGE_COUNT)
+ return;
+
for_each_online_pgdat(pgdat) {
unsigned long i, flags;
diff --git a/arch/s390/lib/usercopy.c b/lib/usercopy.c
index 14b363fec8a2..4f5b1ddbcd25 100644
--- a/arch/s390/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,5 +1,6 @@
-#include <linux/module.h>
+#include <linux/export.h>
#include <linux/bug.h>
+#include <linux/uaccess.h>
void copy_from_user_overflow(void)
{
diff --git a/lib/uuid.c b/lib/uuid.c
index 52a6fe6387de..398821e4dce1 100644
--- a/lib/uuid.c
+++ b/lib/uuid.c
@@ -25,13 +25,7 @@
static void __uuid_gen_common(__u8 b[16])
{
- int i;
- u32 r;
-
- for (i = 0; i < 4; i++) {
- r = random32();
- memcpy(b + i * 4, &r, 4);
- }
+ prandom_bytes(b, 16);
/* reversion 0b10 */
b[8] = (b[8] & 0x3F) | 0x80;
}
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 07dbc8ec46cf..2c8ce496804f 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -242,6 +242,7 @@ bool balloon_page_isolate(struct page *page)
if (__is_movable_balloon_page(page) &&
page_count(page) == 2) {
__isolate_balloon_page(page);
+ balloon_event_count(COMPACTBALLOONISOLATED);
unlock_page(page);
return true;
}
@@ -265,6 +266,7 @@ void balloon_page_putback(struct page *page)
__putback_balloon_page(page);
/* drop the extra ref count taken for page isolation */
put_page(page);
+ balloon_event_count(COMPACTBALLOONRETURNED);
} else {
WARN_ON(1);
dump_page(page);
diff --git a/mm/bounce.c b/mm/bounce.c
index 5f8901768602..a5c2ec3589cb 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -181,32 +181,13 @@ static void bounce_end_io_read_isa(struct bio *bio, int err)
#ifdef CONFIG_NEED_BOUNCE_POOL
static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
{
- struct page *page;
- struct backing_dev_info *bdi;
- struct address_space *mapping;
- struct bio_vec *from;
- int i;
-
if (bio_data_dir(bio) != WRITE)
return 0;
if (!bdi_cap_stable_pages_required(&q->backing_dev_info))
return 0;
- /*
- * Based on the first page that has a valid mapping, decide whether or
- * not we have to employ bounce buffering to guarantee stable pages.
- */
- bio_for_each_segment(from, bio, i) {
- page = from->bv_page;
- mapping = page_mapping(page);
- if (!mapping)
- continue;
- bdi = mapping->backing_dev_info;
- return mapping->host->i_sb->s_flags & MS_SNAP_STABLE;
- }
-
- return 0;
+ return test_bit(BIO_SNAP_STABLE, &bio->bi_flags);
}
#else
static int must_snapshot_stable_pages(struct request_queue *q, struct bio *bio)
diff --git a/mm/cleancache.c b/mm/cleancache.c
index d76ba74be2d0..5875f48ce279 100644
--- a/mm/cleancache.c
+++ b/mm/cleancache.c
@@ -19,20 +19,10 @@
#include <linux/cleancache.h>
/*
- * This global enablement flag may be read thousands of times per second
- * by cleancache_get/put/invalidate even on systems where cleancache_ops
- * is not claimed (e.g. cleancache is config'ed on but remains
- * disabled), so is preferred to the slower alternative: a function
- * call that checks a non-global.
- */
-int cleancache_enabled __read_mostly;
-EXPORT_SYMBOL(cleancache_enabled);
-
-/*
* cleancache_ops is set by cleancache_ops_register to contain the pointers
* to the cleancache "backend" implementation functions.
*/
-static struct cleancache_ops cleancache_ops __read_mostly;
+static struct cleancache_ops *cleancache_ops __read_mostly;
/*
* Counters available via /sys/kernel/debug/frontswap (if debugfs is
@@ -45,15 +35,101 @@ static u64 cleancache_puts;
static u64 cleancache_invalidates;
/*
- * register operations for cleancache, returning previous thus allowing
- * detection of multiple backends and possible nesting
+ * When no backend is registered all calls to init_fs and init_shared_fs
+ * are registered and fake poolids (FAKE_FS_POOLID_OFFSET or
+ * FAKE_SHARED_FS_POOLID_OFFSET, plus offset in the respective array
+ * [shared_|]fs_poolid_map) are given to the respective super block
+ * (sb->cleancache_poolid) and no tmem_pools are created. When a backend
+ * registers with cleancache the previous calls to init_fs and init_shared_fs
+ * are executed to create tmem_pools and set the respective poolids. While no
+ * backend is registered all "puts", "gets" and "flushes" are ignored or failed.
+ */
+#define MAX_INITIALIZABLE_FS 32
+#define FAKE_FS_POOLID_OFFSET 1000
+#define FAKE_SHARED_FS_POOLID_OFFSET 2000
+
+#define FS_NO_BACKEND (-1)
+#define FS_UNKNOWN (-2)
+static int fs_poolid_map[MAX_INITIALIZABLE_FS];
+static int shared_fs_poolid_map[MAX_INITIALIZABLE_FS];
+static char *uuids[MAX_INITIALIZABLE_FS];
+/*
+ * Mutex for the [shared_|]fs_poolid_map to guard against multiple threads
+ * invoking umount (and ending in __cleancache_invalidate_fs) and also multiple
+ * threads calling mount (and ending up in __cleancache_init_[shared|]fs).
+ */
+static DEFINE_MUTEX(poolid_mutex);
+/*
+ * When set to false (default) all calls to the cleancache functions, except
+ * the __cleancache_invalidate_fs and __cleancache_init_[shared|]fs are guarded
+ * by the if (!cleancache_ops) return. This means multiple threads (from
+ * different filesystems) will be checking cleancache_ops. The usage of a
+ * bool instead of a atomic_t or a bool guarded by a spinlock is OK - we are
+ * OK if the time between the backend's have been initialized (and
+ * cleancache_ops has been set to not NULL) and when the filesystems start
+ * actually calling the backends. The inverse (when unloading) is obviously
+ * not good - but this shim does not do that (yet).
+ */
+
+/*
+ * The backends and filesystems work all asynchronously. This is b/c the
+ * backends can be built as modules.
+ * The usual sequence of events is:
+ * a) mount / -> __cleancache_init_fs is called. We set the
+ * [shared_|]fs_poolid_map and uuids for.
+ *
+ * b). user does I/Os -> we call the rest of __cleancache_* functions
+ * which return immediately as cleancache_ops is false.
+ *
+ * c). modprobe zcache -> cleancache_register_ops. We init the backend
+ * and set cleancache_ops to true, and for any fs_poolid_map
+ * (which is set by __cleancache_init_fs) we initialize the poolid.
+ *
+ * d). user does I/Os -> now that cleancache_ops is true all the
+ * __cleancache_* functions can call the backend. They all check
+ * that fs_poolid_map is valid and if so invoke the backend.
+ *
+ * e). umount / -> __cleancache_invalidate_fs, the fs_poolid_map is
+ * reset (which is the second check in the __cleancache_* ops
+ * to call the backend).
+ *
+ * The sequence of event could also be c), followed by a), and d). and e). The
+ * c) would not happen anymore. There is also the chance of c), and one thread
+ * doing a) + d), and another doing e). For that case we depend on the
+ * filesystem calling __cleancache_invalidate_fs in the proper sequence (so
+ * that it handles all I/Os before it invalidates the fs (which is last part
+ * of unmounting process).
+ *
+ * Note: The acute reader will notice that there is no "rmmod zcache" case.
+ * This is b/c the functionality for that is not yet implemented and when
+ * done, will require some extra locking not yet devised.
+ */
+
+/*
+ * Register operations for cleancache, returning previous thus allowing
+ * detection of multiple backends and possible nesting.
*/
-struct cleancache_ops cleancache_register_ops(struct cleancache_ops *ops)
+struct cleancache_ops *cleancache_register_ops(struct cleancache_ops *ops)
{
- struct cleancache_ops old = cleancache_ops;
+ struct cleancache_ops *old = cleancache_ops;
+ int i;
- cleancache_ops = *ops;
- cleancache_enabled = 1;
+ mutex_lock(&poolid_mutex);
+ for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
+ if (fs_poolid_map[i] == FS_NO_BACKEND)
+ fs_poolid_map[i] = ops->init_fs(PAGE_SIZE);
+ if (shared_fs_poolid_map[i] == FS_NO_BACKEND)
+ shared_fs_poolid_map[i] = ops->init_shared_fs
+ (uuids[i], PAGE_SIZE);
+ }
+ /*
+ * We MUST set cleancache_ops _after_ we have called the backends
+ * init_fs or init_shared_fs functions. Otherwise the compiler might
+ * re-order where cleancache_ops is set in this function.
+ */
+ barrier();
+ cleancache_ops = ops;
+ mutex_unlock(&poolid_mutex);
return old;
}
EXPORT_SYMBOL(cleancache_register_ops);
@@ -61,15 +137,42 @@ EXPORT_SYMBOL(cleancache_register_ops);
/* Called by a cleancache-enabled filesystem at time of mount */
void __cleancache_init_fs(struct super_block *sb)
{
- sb->cleancache_poolid = (*cleancache_ops.init_fs)(PAGE_SIZE);
+ int i;
+
+ mutex_lock(&poolid_mutex);
+ for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
+ if (fs_poolid_map[i] == FS_UNKNOWN) {
+ sb->cleancache_poolid = i + FAKE_FS_POOLID_OFFSET;
+ if (cleancache_ops)
+ fs_poolid_map[i] = cleancache_ops->init_fs(PAGE_SIZE);
+ else
+ fs_poolid_map[i] = FS_NO_BACKEND;
+ break;
+ }
+ }
+ mutex_unlock(&poolid_mutex);
}
EXPORT_SYMBOL(__cleancache_init_fs);
/* Called by a cleancache-enabled clustered filesystem at time of mount */
void __cleancache_init_shared_fs(char *uuid, struct super_block *sb)
{
- sb->cleancache_poolid =
- (*cleancache_ops.init_shared_fs)(uuid, PAGE_SIZE);
+ int i;
+
+ mutex_lock(&poolid_mutex);
+ for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
+ if (shared_fs_poolid_map[i] == FS_UNKNOWN) {
+ sb->cleancache_poolid = i + FAKE_SHARED_FS_POOLID_OFFSET;
+ uuids[i] = uuid;
+ if (cleancache_ops)
+ shared_fs_poolid_map[i] = cleancache_ops->init_shared_fs
+ (uuid, PAGE_SIZE);
+ else
+ shared_fs_poolid_map[i] = FS_NO_BACKEND;
+ break;
+ }
+ }
+ mutex_unlock(&poolid_mutex);
}
EXPORT_SYMBOL(__cleancache_init_shared_fs);
@@ -99,27 +202,53 @@ static int cleancache_get_key(struct inode *inode,
}
/*
+ * Returns a pool_id that is associated with a given fake poolid.
+ */
+static int get_poolid_from_fake(int fake_pool_id)
+{
+ if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET)
+ return shared_fs_poolid_map[fake_pool_id -
+ FAKE_SHARED_FS_POOLID_OFFSET];
+ else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET)
+ return fs_poolid_map[fake_pool_id - FAKE_FS_POOLID_OFFSET];
+ return FS_NO_BACKEND;
+}
+
+/*
* "Get" data from cleancache associated with the poolid/inode/index
* that were specified when the data was put to cleanache and, if
* successful, use it to fill the specified page with data and return 0.
* The pageframe is unchanged and returns -1 if the get fails.
* Page must be locked by caller.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
*/
int __cleancache_get_page(struct page *page)
{
int ret = -1;
int pool_id;
+ int fake_pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };
+ if (!cleancache_ops) {
+ cleancache_failed_gets++;
+ goto out;
+ }
+
VM_BUG_ON(!PageLocked(page));
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
- if (pool_id < 0)
+ fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
+ if (fake_pool_id < 0)
goto out;
+ pool_id = get_poolid_from_fake(fake_pool_id);
if (cleancache_get_key(page->mapping->host, &key) < 0)
goto out;
- ret = (*cleancache_ops.get_page)(pool_id, key, page->index, page);
+ if (pool_id >= 0)
+ ret = cleancache_ops->get_page(pool_id,
+ key, page->index, page);
if (ret == 0)
cleancache_succ_gets++;
else
@@ -134,17 +263,32 @@ EXPORT_SYMBOL(__cleancache_get_page);
* (previously-obtained per-filesystem) poolid and the page's,
* inode and page index. Page must be locked. Note that a put_page
* always "succeeds", though a subsequent get_page may succeed or fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
*/
void __cleancache_put_page(struct page *page)
{
int pool_id;
+ int fake_pool_id;
struct cleancache_filekey key = { .u.key = { 0 } };
+ if (!cleancache_ops) {
+ cleancache_puts++;
+ return;
+ }
+
VM_BUG_ON(!PageLocked(page));
- pool_id = page->mapping->host->i_sb->cleancache_poolid;
+ fake_pool_id = page->mapping->host->i_sb->cleancache_poolid;
+ if (fake_pool_id < 0)
+ return;
+
+ pool_id = get_poolid_from_fake(fake_pool_id);
+
if (pool_id >= 0 &&
- cleancache_get_key(page->mapping->host, &key) >= 0) {
- (*cleancache_ops.put_page)(pool_id, key, page->index, page);
+ cleancache_get_key(page->mapping->host, &key) >= 0) {
+ cleancache_ops->put_page(pool_id, key, page->index, page);
cleancache_puts++;
}
}
@@ -153,19 +297,31 @@ EXPORT_SYMBOL(__cleancache_put_page);
/*
* Invalidate any data from cleancache associated with the poolid and the
* page's inode and page index so that a subsequent "get" will fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
*/
void __cleancache_invalidate_page(struct address_space *mapping,
struct page *page)
{
/* careful... page->mapping is NULL sometimes when this is called */
- int pool_id = mapping->host->i_sb->cleancache_poolid;
+ int pool_id;
+ int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };
- if (pool_id >= 0) {
+ if (!cleancache_ops)
+ return;
+
+ if (fake_pool_id >= 0) {
+ pool_id = get_poolid_from_fake(fake_pool_id);
+ if (pool_id < 0)
+ return;
+
VM_BUG_ON(!PageLocked(page));
if (cleancache_get_key(mapping->host, &key) >= 0) {
- (*cleancache_ops.invalidate_page)(pool_id,
- key, page->index);
+ cleancache_ops->invalidate_page(pool_id,
+ key, page->index);
cleancache_invalidates++;
}
}
@@ -176,34 +332,63 @@ EXPORT_SYMBOL(__cleancache_invalidate_page);
* Invalidate all data from cleancache associated with the poolid and the
* mappings's inode so that all subsequent gets to this poolid/inode
* will fail.
+ *
+ * The function has two checks before any action is taken - whether
+ * a backend is registered and whether the sb->cleancache_poolid
+ * is correct.
*/
void __cleancache_invalidate_inode(struct address_space *mapping)
{
- int pool_id = mapping->host->i_sb->cleancache_poolid;
+ int pool_id;
+ int fake_pool_id = mapping->host->i_sb->cleancache_poolid;
struct cleancache_filekey key = { .u.key = { 0 } };
+ if (!cleancache_ops)
+ return;
+
+ if (fake_pool_id < 0)
+ return;
+
+ pool_id = get_poolid_from_fake(fake_pool_id);
+
if (pool_id >= 0 && cleancache_get_key(mapping->host, &key) >= 0)
- (*cleancache_ops.invalidate_inode)(pool_id, key);
+ cleancache_ops->invalidate_inode(pool_id, key);
}
EXPORT_SYMBOL(__cleancache_invalidate_inode);
/*
* Called by any cleancache-enabled filesystem at time of unmount;
- * note that pool_id is surrendered and may be reutrned by a subsequent
- * cleancache_init_fs or cleancache_init_shared_fs
+ * note that pool_id is surrendered and may be returned by a subsequent
+ * cleancache_init_fs or cleancache_init_shared_fs.
*/
void __cleancache_invalidate_fs(struct super_block *sb)
{
- if (sb->cleancache_poolid >= 0) {
- int old_poolid = sb->cleancache_poolid;
- sb->cleancache_poolid = -1;
- (*cleancache_ops.invalidate_fs)(old_poolid);
+ int index;
+ int fake_pool_id = sb->cleancache_poolid;
+ int old_poolid = fake_pool_id;
+
+ mutex_lock(&poolid_mutex);
+ if (fake_pool_id >= FAKE_SHARED_FS_POOLID_OFFSET) {
+ index = fake_pool_id - FAKE_SHARED_FS_POOLID_OFFSET;
+ old_poolid = shared_fs_poolid_map[index];
+ shared_fs_poolid_map[index] = FS_UNKNOWN;
+ uuids[index] = NULL;
+ } else if (fake_pool_id >= FAKE_FS_POOLID_OFFSET) {
+ index = fake_pool_id - FAKE_FS_POOLID_OFFSET;
+ old_poolid = fs_poolid_map[index];
+ fs_poolid_map[index] = FS_UNKNOWN;
}
+ sb->cleancache_poolid = -1;
+ if (cleancache_ops)
+ cleancache_ops->invalidate_fs(old_poolid);
+ mutex_unlock(&poolid_mutex);
}
EXPORT_SYMBOL(__cleancache_invalidate_fs);
static int __init init_cleancache(void)
{
+ int i;
+
#ifdef CONFIG_DEBUG_FS
struct dentry *root = debugfs_create_dir("cleancache", NULL);
if (root == NULL)
@@ -215,6 +400,10 @@ static int __init init_cleancache(void)
debugfs_create_u64("invalidates", S_IRUGO,
root, &cleancache_invalidates);
#endif
+ for (i = 0; i < MAX_INITIALIZABLE_FS; i++) {
+ fs_poolid_map[i] = FS_UNKNOWN;
+ shared_fs_poolid_map[i] = FS_UNKNOWN;
+ }
return 0;
}
module_init(init_cleancache)
diff --git a/mm/dmapool.c b/mm/dmapool.c
index c69781e97cf9..668f26316e2e 100644
--- a/mm/dmapool.c
+++ b/mm/dmapool.c
@@ -132,6 +132,7 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
{
struct dma_pool *retval;
size_t allocation;
+ int node;
if (align == 0) {
align = 1;
@@ -156,7 +157,9 @@ struct dma_pool *dma_pool_create(const char *name, struct device *dev,
return NULL;
}
- retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, dev_to_node(dev));
+ node = WARN_ON(!dev) ? -1 : dev_to_node(dev);
+
+ retval = kmalloc_node(sizeof(*retval), GFP_KERNEL, node);
if (!retval)
return retval;
diff --git a/mm/filemap.c b/mm/filemap.c
index e1979fdca805..4ebaf95eb583 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -35,6 +35,9 @@
#include <linux/cleancache.h>
#include "internal.h"
+#define CREATE_TRACE_POINTS
+#include <trace/events/filemap.h>
+
/*
* FIXME: remove all knowledge of the buffer layer from the core VM
*/
@@ -113,6 +116,7 @@ void __delete_from_page_cache(struct page *page)
{
struct address_space *mapping = page->mapping;
+ trace_mm_filemap_delete_from_page_cache(page);
/*
* if we're uptodate, flush out into the cleancache, otherwise
* invalidate any existing cleancache entries. We can't leave
@@ -464,6 +468,7 @@ int add_to_page_cache_locked(struct page *page, struct address_space *mapping,
mapping->nrpages++;
__inc_zone_page_state(page, NR_FILE_PAGES);
spin_unlock_irq(&mapping->tree_lock);
+ trace_mm_filemap_add_to_page_cache(page);
} else {
page->mapping = NULL;
/* Leave page->index set: truncation relies upon it */
@@ -2528,7 +2533,8 @@ ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
BUG_ON(iocb->ki_pos != pos);
- sb_start_write(inode->i_sb);
+ if (!sb_start_file_write(file))
+ return -EAGAIN;
mutex_lock(&inode->i_mutex);
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
mutex_unlock(&inode->i_mutex);
diff --git a/mm/frontswap.c b/mm/frontswap.c
index 2890e67d6026..538367ef1372 100644
--- a/mm/frontswap.c
+++ b/mm/frontswap.c
@@ -24,15 +24,7 @@
* frontswap_ops is set by frontswap_register_ops to contain the pointers
* to the frontswap "backend" implementation functions.
*/
-static struct frontswap_ops frontswap_ops __read_mostly;
-
-/*
- * This global enablement flag reduces overhead on systems where frontswap_ops
- * has not been registered, so is preferred to the slower alternative: a
- * function call that checks a non-global.
- */
-bool frontswap_enabled __read_mostly;
-EXPORT_SYMBOL(frontswap_enabled);
+static struct frontswap_ops *frontswap_ops __read_mostly;
/*
* If enabled, frontswap_store will return failure even on success. As
@@ -80,16 +72,70 @@ static inline void inc_frontswap_succ_stores(void) { }
static inline void inc_frontswap_failed_stores(void) { }
static inline void inc_frontswap_invalidates(void) { }
#endif
+
+/*
+ * Due to the asynchronous nature of the backends loading potentially
+ * _after_ the swap system has been activated, we have chokepoints
+ * on all frontswap functions to not call the backend until the backend
+ * has registered.
+ *
+ * Specifically when no backend is registered (nobody called
+ * frontswap_register_ops) all calls to frontswap_init (which is done via
+ * swapon -> enable_swap_info -> frontswap_init) are registered and remembered
+ * (via the setting of need_init bitmap) but fail to create tmem_pools. When a
+ * backend registers with frontswap at some later point the previous
+ * calls to frontswap_init are executed (by iterating over the need_init
+ * bitmap) to create tmem_pools and set the respective poolids. All of that is
+ * guarded by us using atomic bit operations on the 'need_init' bitmap.
+ *
+ * This would not guards us against the user deciding to call swapoff right as
+ * we are calling the backend to initialize (so swapon is in action).
+ * Fortunatly for us, the swapon_mutex has been taked by the callee so we are
+ * OK. The other scenario where calls to frontswap_store (called via
+ * swap_writepage) is racing with frontswap_invalidate_area (called via
+ * swapoff) is again guarded by the swap subsystem.
+ *
+ * While no backend is registered all calls to frontswap_[store|load|
+ * invalidate_area|invalidate_page] are ignored or fail.
+ *
+ * The time between the backend being registered and the swap file system
+ * calling the backend (via the frontswap_* functions) is indeterminate as
+ * frontswap_ops is not atomic_t (or a value guarded by a spinlock).
+ * That is OK as we are comfortable missing some of these calls to the newly
+ * registered backend.
+ *
+ * Obviously the opposite (unloading the backend) must be done after all
+ * the frontswap_[store|load|invalidate_area|invalidate_page] start
+ * ignorning or failing the requests - at which point frontswap_ops
+ * would have to be made in some fashion atomic.
+ */
+static DECLARE_BITMAP(need_init, MAX_SWAPFILES);
+
/*
* Register operations for frontswap, returning previous thus allowing
* detection of multiple backends and possible nesting.
*/
-struct frontswap_ops frontswap_register_ops(struct frontswap_ops *ops)
+struct frontswap_ops *frontswap_register_ops(struct frontswap_ops *ops)
{
- struct frontswap_ops old = frontswap_ops;
-
- frontswap_ops = *ops;
- frontswap_enabled = true;
+ struct frontswap_ops *old = frontswap_ops;
+ int i;
+
+ for (i = 0; i < MAX_SWAPFILES; i++) {
+ if (test_and_clear_bit(i, need_init)) {
+ struct swap_info_struct *sis = swap_info[i];
+ /* __frontswap_init _should_ have set it! */
+ if (!sis->frontswap_map)
+ return ERR_PTR(-EINVAL);
+ ops->init(i);
+ }
+ }
+ /*
+ * We MUST have frontswap_ops set _after_ the frontswap_init's
+ * have been called. Otherwise __frontswap_store might fail. Hence
+ * the barrier to make sure compiler does not re-order us.
+ */
+ barrier();
+ frontswap_ops = ops;
return old;
}
EXPORT_SYMBOL(frontswap_register_ops);
@@ -115,20 +161,48 @@ EXPORT_SYMBOL(frontswap_tmem_exclusive_gets);
/*
* Called when a swap device is swapon'd.
*/
-void __frontswap_init(unsigned type)
+void __frontswap_init(unsigned type, unsigned long *map)
{
struct swap_info_struct *sis = swap_info[type];
BUG_ON(sis == NULL);
- if (sis->frontswap_map == NULL)
+
+ /*
+ * p->frontswap is a bitmap that we MUST have to figure out which page
+ * has gone in frontswap. Without it there is no point of continuing.
+ */
+ if (WARN_ON(!map))
return;
- frontswap_ops.init(type);
+ /*
+ * Irregardless of whether the frontswap backend has been loaded
+ * before this function or it will be later, we _MUST_ have the
+ * p->frontswap set to something valid to work properly.
+ */
+ frontswap_map_set(sis, map);
+ if (frontswap_ops)
+ frontswap_ops->init(type);
+ else {
+ BUG_ON(type > MAX_SWAPFILES);
+ set_bit(type, need_init);
+ }
}
EXPORT_SYMBOL(__frontswap_init);
-static inline void __frontswap_clear(struct swap_info_struct *sis, pgoff_t offset)
+bool __frontswap_test(struct swap_info_struct *sis,
+ pgoff_t offset)
+{
+ bool ret = false;
+
+ if (frontswap_ops && sis->frontswap_map)
+ ret = test_bit(offset, sis->frontswap_map);
+ return ret;
+}
+EXPORT_SYMBOL(__frontswap_test);
+
+static inline void __frontswap_clear(struct swap_info_struct *sis,
+ pgoff_t offset)
{
- frontswap_clear(sis, offset);
+ clear_bit(offset, sis->frontswap_map);
atomic_dec(&sis->frontswap_pages);
}
@@ -147,13 +221,20 @@ int __frontswap_store(struct page *page)
struct swap_info_struct *sis = swap_info[type];
pgoff_t offset = swp_offset(entry);
+ /*
+ * Return if no backend registed.
+ * Don't need to inc frontswap_failed_stores here.
+ */
+ if (!frontswap_ops)
+ return ret;
+
BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL);
- if (frontswap_test(sis, offset))
+ if (__frontswap_test(sis, offset))
dup = 1;
- ret = frontswap_ops.store(type, offset, page);
+ ret = frontswap_ops->store(type, offset, page);
if (ret == 0) {
- frontswap_set(sis, offset);
+ set_bit(offset, sis->frontswap_map);
inc_frontswap_succ_stores();
if (!dup)
atomic_inc(&sis->frontswap_pages);
@@ -188,13 +269,16 @@ int __frontswap_load(struct page *page)
BUG_ON(!PageLocked(page));
BUG_ON(sis == NULL);
- if (frontswap_test(sis, offset))
- ret = frontswap_ops.load(type, offset, page);
+ /*
+ * __frontswap_test() will check whether there is backend registered
+ */
+ if (__frontswap_test(sis, offset))
+ ret = frontswap_ops->load(type, offset, page);
if (ret == 0) {
inc_frontswap_loads();
if (frontswap_tmem_exclusive_gets_enabled) {
SetPageDirty(page);
- frontswap_clear(sis, offset);
+ __frontswap_clear(sis, offset);
}
}
return ret;
@@ -210,8 +294,11 @@ void __frontswap_invalidate_page(unsigned type, pgoff_t offset)
struct swap_info_struct *sis = swap_info[type];
BUG_ON(sis == NULL);
- if (frontswap_test(sis, offset)) {
- frontswap_ops.invalidate_page(type, offset);
+ /*
+ * __frontswap_test() will check whether there is backend registered
+ */
+ if (__frontswap_test(sis, offset)) {
+ frontswap_ops->invalidate_page(type, offset);
__frontswap_clear(sis, offset);
inc_frontswap_invalidates();
}
@@ -226,12 +313,15 @@ void __frontswap_invalidate_area(unsigned type)
{
struct swap_info_struct *sis = swap_info[type];
- BUG_ON(sis == NULL);
- if (sis->frontswap_map == NULL)
- return;
- frontswap_ops.invalidate_area(type);
- atomic_set(&sis->frontswap_pages, 0);
- memset(sis->frontswap_map, 0, sis->max / sizeof(long));
+ if (frontswap_ops) {
+ BUG_ON(sis == NULL);
+ if (sis->frontswap_map == NULL)
+ return;
+ frontswap_ops->invalidate_area(type);
+ atomic_set(&sis->frontswap_pages, 0);
+ memset(sis->frontswap_map, 0, sis->max / sizeof(long));
+ }
+ clear_bit(type, need_init);
}
EXPORT_SYMBOL(__frontswap_invalidate_area);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c65a8a583d13..a333350dfe03 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2124,8 +2124,12 @@ int hugetlb_report_node_meminfo(int nid, char *buf)
/* Return the number pages of memory we physically have, in PAGE_SIZE units. */
unsigned long hugetlb_total_pages(void)
{
- struct hstate *h = &default_hstate;
- return h->nr_huge_pages * pages_per_huge_page(h);
+ struct hstate *h;
+ unsigned long nr_total_pages = 0;
+
+ for_each_hstate(h)
+ nr_total_pages += h->nr_huge_pages * pages_per_huge_page(h);
+ return nr_total_pages;
}
static int hugetlb_acct_memory(struct hstate *h, long delta)
@@ -2243,10 +2247,11 @@ static pte_t make_huge_pte(struct vm_area_struct *vma, struct page *page,
pte_t entry;
if (writable) {
- entry =
- pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
+ entry = huge_pte_mkwrite(huge_pte_mkdirty(mk_huge_pte(page,
+ vma->vm_page_prot)));
} else {
- entry = huge_pte_wrprotect(mk_pte(page, vma->vm_page_prot));
+ entry = huge_pte_wrprotect(mk_huge_pte(page,
+ vma->vm_page_prot));
}
entry = pte_mkyoung(entry);
entry = pte_mkhuge(entry);
@@ -2260,7 +2265,7 @@ static void set_huge_ptep_writable(struct vm_area_struct *vma,
{
pte_t entry;
- entry = pte_mkwrite(pte_mkdirty(huge_ptep_get(ptep)));
+ entry = huge_pte_mkwrite(huge_pte_mkdirty(huge_ptep_get(ptep)));
if (huge_ptep_set_access_flags(vma, address, ptep, entry, 1))
update_mmu_cache(vma, address, ptep);
}
@@ -2375,7 +2380,7 @@ again:
* HWPoisoned hugepage is already unmapped and dropped reference
*/
if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) {
- pte_clear(mm, address, ptep);
+ huge_pte_clear(mm, address, ptep);
continue;
}
@@ -2399,7 +2404,7 @@ again:
pte = huge_ptep_get_and_clear(mm, address, ptep);
tlb_remove_tlb_entry(tlb, ptep, address);
- if (pte_dirty(pte))
+ if (huge_pte_dirty(pte))
set_page_dirty(page);
page_remove_rmap(page);
@@ -2852,7 +2857,7 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
* page now as it is used to determine if a reservation has been
* consumed.
*/
- if ((flags & FAULT_FLAG_WRITE) && !pte_write(entry)) {
+ if ((flags & FAULT_FLAG_WRITE) && !huge_pte_write(entry)) {
if (vma_needs_reservation(h, vma, address) < 0) {
ret = VM_FAULT_OOM;
goto out_mutex;
@@ -2882,12 +2887,12 @@ int hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (flags & FAULT_FLAG_WRITE) {
- if (!pte_write(entry)) {
+ if (!huge_pte_write(entry)) {
ret = hugetlb_cow(mm, vma, address, ptep, entry,
pagecache_page);
goto out_page_table_lock;
}
- entry = pte_mkdirty(entry);
+ entry = huge_pte_mkdirty(entry);
}
entry = pte_mkyoung(entry);
if (huge_ptep_set_access_flags(vma, address, ptep, entry,
@@ -2958,7 +2963,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
}
if (absent ||
- ((flags & FOLL_WRITE) && !pte_write(huge_ptep_get(pte)))) {
+ ((flags & FOLL_WRITE) && !huge_pte_write(huge_ptep_get(pte)))) {
int ret;
spin_unlock(&mm->page_table_lock);
@@ -3028,7 +3033,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
}
if (!huge_pte_none(huge_ptep_get(ptep))) {
pte = huge_ptep_get_and_clear(mm, address, ptep);
- pte = pte_mkhuge(pte_modify(pte, newprot));
+ pte = pte_mkhuge(huge_pte_modify(pte, newprot));
pte = arch_make_huge_pte(pte, vma, NULL, 0);
set_huge_pte_at(mm, address, ptep, pte);
pages++;
diff --git a/mm/memblock.c b/mm/memblock.c
index b8d9147e5c08..2cce8b3e76ed 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -771,6 +771,9 @@ static phys_addr_t __init memblock_alloc_base_nid(phys_addr_t size,
{
phys_addr_t found;
+ if (WARN_ON(!align))
+ align = __alignof__(long long);
+
/* align @size to avoid excessive fragmentation on reserved array */
size = round_up(size, align);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2b552224f5cf..f60854668698 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -152,8 +152,13 @@ struct mem_cgroup_stat_cpu {
};
struct mem_cgroup_reclaim_iter {
- /* css_id of the last scanned hierarchy member */
- int position;
+ /*
+ * last scanned hierarchy member. Valid only if last_dead_count
+ * matches memcg->dead_count of the hierarchy root group.
+ */
+ struct mem_cgroup *last_visited;
+ unsigned long last_dead_count;
+
/* scan generation, increased every round-trip */
unsigned int generation;
};
@@ -310,14 +315,31 @@ struct mem_cgroup {
/* thresholds for mem+swap usage. RCU-protected */
struct mem_cgroup_thresholds memsw_thresholds;
- /* For oom notifier event fd */
- struct list_head oom_notify;
+ union {
+ /* For oom notifier event fd */
+ struct list_head oom_notify;
+ /*
+ * we can only trigger an oom event if the memcg is alive.
+ * so we will reuse this field to hook the memcg in the list
+ * of dead memcgs.
+ */
+ struct list_head dead;
+ };
- /*
- * Should we move charges of a task when a task is moved into this
- * mem_cgroup ? And what type of charges should we move ?
- */
- unsigned long move_charge_at_immigrate;
+ union {
+ /*
+ * Should we move charges of a task when a task is moved into
+ * this mem_cgroup ? And what type of charges should we move ?
+ */
+ unsigned long move_charge_at_immigrate;
+
+ /*
+ * We are no longer concerned about moving charges after memcg
+ * is dead. So we will fill this up with its name, to aid
+ * debugging.
+ */
+ char *memcg_name;
+ };
/*
* set > 0 if pages under this cgroup are moving to other cgroup.
*/
@@ -335,6 +357,7 @@ struct mem_cgroup {
struct mem_cgroup_stat_cpu nocpu_base;
spinlock_t pcp_counter_lock;
+ atomic_t dead_count;
#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET)
struct tcp_memcontrol tcp_mem;
#endif
@@ -369,6 +392,55 @@ static size_t memcg_size(void)
nr_node_ids * sizeof(struct mem_cgroup_per_node);
}
+#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY
+static LIST_HEAD(dangling_memcgs);
+static DEFINE_MUTEX(dangling_memcgs_mutex);
+
+static inline void memcg_dangling_free(struct mem_cgroup *memcg)
+{
+ mutex_lock(&dangling_memcgs_mutex);
+ list_del(&memcg->dead);
+ mutex_unlock(&dangling_memcgs_mutex);
+ free_pages((unsigned long)memcg->memcg_name, 0);
+}
+
+static inline void memcg_dangling_add(struct mem_cgroup *memcg)
+{
+ /*
+ * cgroup.c will do page-sized allocations most of the time,
+ * so we'll just follow the pattern. Also, __get_free_pages
+ * is a better interface than kmalloc for us here, because
+ * we'd like this memory to be always billed to the root cgroup,
+ * not to the process removing the memcg. While kmalloc would
+ * require us to wrap it into memcg_stop/resume_kmem_account,
+ * with __get_free_pages we just don't pass the memcg flag.
+ */
+ memcg->memcg_name = (char *)__get_free_pages(GFP_KERNEL, 0);
+
+ /*
+ * we will, in general, just ignore failures. No need to go crazy,
+ * being this just a debugging interface. It is nice to copy a memcg
+ * name over, but if we (unlikely) can't, just the address will do
+ */
+ if (!memcg->memcg_name)
+ goto add_list;
+
+ if (cgroup_path(memcg->css.cgroup, memcg->memcg_name, PAGE_SIZE) < 0) {
+ free_pages((unsigned long)memcg->memcg_name, 0);
+ memcg->memcg_name = NULL;
+ }
+
+add_list:
+ INIT_LIST_HEAD(&memcg->dead);
+ mutex_lock(&dangling_memcgs_mutex);
+ list_add(&memcg->dead, &dangling_memcgs);
+ mutex_unlock(&dangling_memcgs_mutex);
+}
+#else
+static inline void memcg_dangling_free(struct mem_cgroup *memcg) {}
+static inline void memcg_dangling_add(struct mem_cgroup *memcg) {}
+#endif
+
/* internal only representation about the status of kmem accounting. */
enum {
KMEM_ACCOUNTED_ACTIVE = 0, /* accounted by this cgroup itself */
@@ -1067,6 +1139,51 @@ struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm)
return memcg;
}
+/*
+ * Returns a next (in a pre-order walk) alive memcg (with elevated css
+ * ref. count) or NULL if the whole root's subtree has been visited.
+ *
+ * helper function to be used by mem_cgroup_iter
+ */
+static struct mem_cgroup *__mem_cgroup_iter_next(struct mem_cgroup *root,
+ struct mem_cgroup *last_visited)
+{
+ struct cgroup *prev_cgroup, *next_cgroup;
+
+ /*
+ * Root is not visited by cgroup iterators so it needs an
+ * explicit visit.
+ */
+ if (!last_visited)
+ return root;
+
+ prev_cgroup = (last_visited == root) ? NULL
+ : last_visited->css.cgroup;
+skip_node:
+ next_cgroup = cgroup_next_descendant_pre(
+ prev_cgroup, root->css.cgroup);
+
+ /*
+ * Even if we found a group we have to make sure it is
+ * alive. css && !memcg means that the groups should be
+ * skipped and we should continue the tree walk.
+ * last_visited css is safe to use because it is
+ * protected by css_get and the tree walk is rcu safe.
+ */
+ if (next_cgroup) {
+ struct mem_cgroup *mem = mem_cgroup_from_cont(
+ next_cgroup);
+ if (css_tryget(&mem->css))
+ return mem;
+ else {
+ prev_cgroup = next_cgroup;
+ goto skip_node;
+ }
+ }
+
+ return NULL;
+}
+
/**
* mem_cgroup_iter - iterate over memory cgroup hierarchy
* @root: hierarchy root
@@ -1089,7 +1206,8 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup_reclaim_cookie *reclaim)
{
struct mem_cgroup *memcg = NULL;
- int id = 0;
+ struct mem_cgroup *last_visited = NULL;
+ unsigned long uninitialized_var(dead_count);
if (mem_cgroup_disabled())
return NULL;
@@ -1098,20 +1216,17 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
root = root_mem_cgroup;
if (prev && !reclaim)
- id = css_id(&prev->css);
-
- if (prev && prev != root)
- css_put(&prev->css);
+ last_visited = prev;
if (!root->use_hierarchy && root != root_mem_cgroup) {
if (prev)
- return NULL;
+ goto out_css_put;
return root;
}
+ rcu_read_lock();
while (!memcg) {
struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
- struct cgroup_subsys_state *css;
if (reclaim) {
int nid = zone_to_nid(reclaim->zone);
@@ -1120,31 +1235,60 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
mz = mem_cgroup_zoneinfo(root, nid, zid);
iter = &mz->reclaim_iter[reclaim->priority];
- if (prev && reclaim->generation != iter->generation)
- return NULL;
- id = iter->position;
+ last_visited = iter->last_visited;
+ if (prev && reclaim->generation != iter->generation) {
+ iter->last_visited = NULL;
+ goto out_unlock;
+ }
+
+ /*
+ * If the dead_count mismatches, a destruction
+ * has happened or is happening concurrently.
+ * If the dead_count matches, a destruction
+ * might still happen concurrently, but since
+ * we checked under RCU, that destruction
+ * won't free the object until we release the
+ * RCU reader lock. Thus, the dead_count
+ * check verifies the pointer is still valid,
+ * css_tryget() verifies the cgroup pointed to
+ * is alive.
+ */
+ dead_count = atomic_read(&root->dead_count);
+ smp_rmb();
+ last_visited = iter->last_visited;
+ if (last_visited) {
+ if ((dead_count != iter->last_dead_count) ||
+ !css_tryget(&last_visited->css)) {
+ last_visited = NULL;
+ }
+ }
}
- rcu_read_lock();
- css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id);
- if (css) {
- if (css == &root->css || css_tryget(css))
- memcg = mem_cgroup_from_css(css);
- } else
- id = 0;
- rcu_read_unlock();
+ memcg = __mem_cgroup_iter_next(root, last_visited);
if (reclaim) {
- iter->position = id;
- if (!css)
+ if (last_visited)
+ css_put(&last_visited->css);
+
+ iter->last_visited = memcg;
+ smp_wmb();
+ iter->last_dead_count = dead_count;
+
+ if (!memcg)
iter->generation++;
else if (!prev && memcg)
reclaim->generation = iter->generation;
}
- if (prev && !css)
- return NULL;
+ if (prev && !memcg)
+ goto out_unlock;
}
+out_unlock:
+ rcu_read_unlock();
+out_css_put:
+ if (prev && prev != root)
+ css_put(&prev->css);
+
return memcg;
}
@@ -4947,9 +5091,6 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
- if (!do_swap_account && type == _MEMSWAP)
- return -EOPNOTSUPP;
-
switch (type) {
case _MEM:
if (name == RES_USAGE)
@@ -4974,6 +5115,107 @@ static ssize_t mem_cgroup_read(struct cgroup *cont, struct cftype *cft,
return simple_read_from_buffer(buf, nbytes, ppos, str, len);
}
+#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY
+static void
+mem_cgroup_dangling_swap(struct mem_cgroup *memcg, struct seq_file *m)
+{
+#ifdef CONFIG_MEMCG_SWAP
+ u64 kmem;
+ u64 memsw;
+
+ /*
+ * kmem will also propagate here, so we are only interested in the
+ * difference. See comment in mem_cgroup_reparent_charges for details.
+ *
+ * We could save this value for later consumption by kmem reports, but
+ * there is not a lot of problem if the figures differ slightly.
+ */
+ kmem = res_counter_read_u64(&memcg->kmem, RES_USAGE);
+ memsw = res_counter_read_u64(&memcg->memsw, RES_USAGE) - kmem;
+ seq_printf(m, "\t%llu swap bytes\n", memsw);
+#endif
+}
+
+
+static void
+mem_cgroup_dangling_tcp(struct mem_cgroup *memcg, struct seq_file *m)
+{
+#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM)
+ struct tcp_memcontrol *tcp = &memcg->tcp_mem;
+ s64 tcp_socks;
+ u64 tcp_bytes;
+
+ tcp_socks = percpu_counter_sum_positive(&tcp->tcp_sockets_allocated);
+ tcp_bytes = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
+ seq_printf(m, "\t%llu tcp bytes", tcp_bytes);
+ /*
+ * if tcp_bytes == 0, tcp_socks != 0 is a bug. One more reason to print
+ * it!
+ */
+ if (tcp_bytes || tcp_socks)
+ seq_printf(m, ", in %lld sockets", tcp_socks);
+ seq_printf(m, "\n");
+
+#endif
+}
+
+static void
+mem_cgroup_dangling_kmem(struct mem_cgroup *memcg, struct seq_file *m)
+{
+#ifdef CONFIG_MEMCG_KMEM
+ u64 kmem;
+ struct memcg_cache_params *params;
+
+ kmem = res_counter_read_u64(&memcg->kmem, RES_USAGE);
+ seq_printf(m, "\t%llu kmem bytes", kmem);
+
+ /* list below may not be initialized, so not even try */
+ if (!kmem)
+ return;
+
+ seq_printf(m, " in caches");
+ mutex_lock(&memcg->slab_caches_mutex);
+ list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
+ struct kmem_cache *s = memcg_params_to_cache(params);
+
+ seq_printf(m, " %s", s->name);
+ }
+ mutex_unlock(&memcg->slab_caches_mutex);
+ seq_printf(m, "\n");
+#endif
+}
+
+/*
+ * After a memcg is destroyed, it may still be kept around in memory.
+ * Currently, the two main reasons for it are swap entries, and kernel memory.
+ * Because they will be freed assynchronously, they will pin the memcg structure
+ * and its resources until the last reference goes away.
+ *
+ * This root-only file will show information about which users
+ */
+static int mem_cgroup_dangling_read(struct cgroup *cont, struct cftype *cft,
+ struct seq_file *m)
+{
+ struct mem_cgroup *memcg;
+
+ mutex_lock(&dangling_memcgs_mutex);
+
+ list_for_each_entry(memcg, &dangling_memcgs, dead) {
+ if (memcg->memcg_name)
+ seq_printf(m, "%s:\n", memcg->memcg_name);
+ else
+ seq_printf(m, "%p (name lost):\n", memcg);
+
+ mem_cgroup_dangling_swap(memcg, m);
+ mem_cgroup_dangling_tcp(memcg, m);
+ mem_cgroup_dangling_kmem(memcg, m);
+ }
+
+ mutex_unlock(&dangling_memcgs_mutex);
+ return 0;
+}
+#endif
+
static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
{
int ret = -EINVAL;
@@ -5084,9 +5326,6 @@ static int mem_cgroup_write(struct cgroup *cont, struct cftype *cft,
type = MEMFILE_TYPE(cft->private);
name = MEMFILE_ATTR(cft->private);
- if (!do_swap_account && type == _MEMSWAP)
- return -EOPNOTSUPP;
-
switch (name) {
case RES_LIMIT:
if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
@@ -5163,9 +5402,6 @@ static int mem_cgroup_reset(struct cgroup *cont, unsigned int event)
type = MEMFILE_TYPE(event);
name = MEMFILE_ATTR(event);
- if (!do_swap_account && type == _MEMSWAP)
- return -EOPNOTSUPP;
-
switch (name) {
case RES_MAX_USAGE:
if (type == _MEM)
@@ -5875,6 +6111,14 @@ static struct cftype mem_cgroup_files[] = {
},
#endif
#endif
+
+#ifdef CONFIG_MEMCG_DEBUG_ASYNC_DESTROY
+ {
+ .name = "dangling_memcgs",
+ .read_seq_string = mem_cgroup_dangling_read,
+ .flags = CFTYPE_ONLY_ON_ROOT,
+ },
+#endif
{ }, /* terminate */
};
@@ -6024,6 +6268,8 @@ static void free_work(struct work_struct *work)
struct mem_cgroup *memcg;
memcg = container_of(work, struct mem_cgroup, work_freeing);
+
+ memcg_dangling_free(memcg);
__mem_cgroup_free(memcg);
}
@@ -6184,10 +6430,29 @@ mem_cgroup_css_online(struct cgroup *cont)
return error;
}
+/*
+ * Announce all parents that a group from their hierarchy is gone.
+ */
+static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
+{
+ struct mem_cgroup *parent = memcg;
+
+ while ((parent = parent_mem_cgroup(parent)))
+ atomic_inc(&parent->dead_count);
+
+ /*
+ * if the root memcg is not hierarchical we have to check it
+ * explicitely.
+ */
+ if (!root_mem_cgroup->use_hierarchy)
+ atomic_inc(&root_mem_cgroup->dead_count);
+}
+
static void mem_cgroup_css_offline(struct cgroup *cont)
{
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
+ mem_cgroup_invalidate_reclaim_iterators(memcg);
mem_cgroup_reparent_charges(memcg);
mem_cgroup_destroy_all_caches(memcg);
}
@@ -6198,6 +6463,7 @@ static void mem_cgroup_css_free(struct cgroup *cont)
kmem_cgroup_destroy(memcg);
+ memcg_dangling_add(memcg);
mem_cgroup_put(memcg);
}
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index df0694c6adef..ceb0c7f1932f 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -785,10 +785,10 @@ static struct page_state {
{ sc|dirty, sc, "clean swapcache", me_swapcache_clean },
{ mlock|dirty, mlock|dirty, "dirty mlocked LRU", me_pagecache_dirty },
- { mlock, mlock, "clean mlocked LRU", me_pagecache_clean },
+ { mlock|dirty, mlock, "clean mlocked LRU", me_pagecache_clean },
{ unevict|dirty, unevict|dirty, "dirty unevictable LRU", me_pagecache_dirty },
- { unevict, unevict, "clean unevictable LRU", me_pagecache_clean },
+ { unevict|dirty, unevict, "clean unevictable LRU", me_pagecache_clean },
{ lru|dirty, lru|dirty, "dirty LRU", me_pagecache_dirty },
{ lru|dirty, lru, "clean LRU", me_pagecache_clean },
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9597eec8239d..3e2ab7ba0d81 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1613,7 +1613,7 @@ int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
/**
* walk_memory_range - walks through all mem sections in [start_pfn, end_pfn)
* @start_pfn: start pfn of the memory range
- * @end_pfn: end pft of the memory range
+ * @end_pfn: end pfn of the memory range
* @arg: argument passed to func
* @func: callback for each memory section walked
*
diff --git a/mm/migrate.c b/mm/migrate.c
index 3bbaf5d230b0..4e3dab51f8fa 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -876,6 +876,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
dec_zone_page_state(page, NR_ISOLATED_ANON +
page_is_file_cache(page));
balloon_page_free(page);
+ balloon_event_count(COMPACTBALLOONMIGRATED);
return MIGRATEPAGE_SUCCESS;
}
out:
diff --git a/mm/mmap.c b/mm/mmap.c
index 2664a47cec93..65e274a21a0d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1816,15 +1816,6 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
}
#endif
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
- /*
- * Is this a new hole at the lowest possible address?
- */
- if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
- mm->free_area_cache = addr;
-}
-
/*
* This mmap-allocator allocates new areas top-down from below the
* stack's low limit (the base):
@@ -1881,19 +1872,6 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
}
#endif
-void arch_unmap_area_topdown(struct mm_struct *mm, unsigned long addr)
-{
- /*
- * Is this a new hole at the highest possible address?
- */
- if (addr > mm->free_area_cache)
- mm->free_area_cache = addr;
-
- /* dont allow allocations above current base */
- if (mm->free_area_cache > mm->mmap_base)
- mm->free_area_cache = mm->mmap_base;
-}
-
unsigned long
get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
unsigned long pgoff, unsigned long flags)
@@ -1933,9 +1911,6 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
{
struct vm_area_struct *vma = NULL;
- if (WARN_ON_ONCE(!mm)) /* Remove this in linux-3.6 */
- return NULL;
-
/* Check the cache first. */
/* (Cache hit rate is typically around 35%.) */
vma = mm->mmap_cache;
@@ -2303,7 +2278,7 @@ static void unmap_region(struct mm_struct *mm,
update_hiwater_rss(mm);
unmap_vmas(&tlb, vma, start, end);
free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
- next ? next->vm_start : 0);
+ next ? next->vm_start : USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, start, end);
}
@@ -2317,7 +2292,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
{
struct vm_area_struct **insertion_point;
struct vm_area_struct *tail_vma = NULL;
- unsigned long addr;
insertion_point = (prev ? &prev->vm_next : &mm->mmap);
vma->vm_prev = NULL;
@@ -2334,11 +2308,6 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
} else
mm->highest_vm_end = prev ? prev->vm_end : 0;
tail_vma->vm_next = NULL;
- if (mm->unmap_area == arch_unmap_area)
- addr = prev ? prev->vm_end : mm->mmap_base;
- else
- addr = vma ? vma->vm_start : mm->mmap_base;
- mm->unmap_area(mm, addr);
mm->mmap_cache = NULL; /* Kill the cache. */
}
@@ -2683,7 +2652,7 @@ void exit_mmap(struct mm_struct *mm)
/* Use -1 here to ensure all VMAs in the mm are unmapped */
unmap_vmas(&tlb, vma, 0, -1);
- free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+ free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING);
tlb_finish_mmu(&tlb, 0, -1);
/*
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 3dcfaf4ed355..8a8cd0265e52 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -14,9 +14,6 @@
* use_mm
* Makes the calling kernel thread take on the specified
* mm context.
- * Called by the retry thread execute retries within the
- * iocb issuer's mm context, so that copy_from/to_user
- * operations work seamlessly for aio.
* (Note: this routine is intended to be called only
* from a kernel thread context)
*/
diff --git a/mm/nommu.c b/mm/nommu.c
index 66737e0584ae..3cc034cc122c 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -228,8 +228,7 @@ int follow_pfn(struct vm_area_struct *vma, unsigned long address,
}
EXPORT_SYMBOL(follow_pfn);
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
+LIST_HEAD(vmap_area_list);
void vfree(const void *addr)
{
@@ -1858,10 +1857,6 @@ unsigned long arch_get_unmapped_area(struct file *file, unsigned long addr,
return -ENOMEM;
}
-void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
-{
-}
-
void unmap_mapping_range(struct address_space *mapping,
loff_t const holebegin, loff_t const holelen,
int even_cows)
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index efe68148f621..4514ad7415c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2311,10 +2311,6 @@ void wait_for_stable_page(struct page *page)
if (!bdi_cap_stable_pages_required(bdi))
return;
-#ifdef CONFIG_NEED_BOUNCE_POOL
- if (mapping->host->i_sb->s_flags & MS_SNAP_STABLE)
- return;
-#endif /* CONFIG_NEED_BOUNCE_POOL */
wait_on_page_writeback(page);
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 8fcced7823fa..cc14c7a64ba2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2002,6 +2002,13 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...)
return;
/*
+ * Walking all memory to count page types is very expensive and should
+ * be inhibited in non-blockable contexts.
+ */
+ if (!(gfp_mask & __GFP_WAIT))
+ filter |= SHOW_MEM_FILTER_PAGE_COUNT;
+
+ /*
* This documents exceptions given to allocations in certain
* contexts that are allowed to allocate outside current's set
* of allowed nodes.
@@ -3900,8 +3907,11 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
* exist on hotplugged memory.
*/
if (context == MEMMAP_EARLY) {
- if (!early_pfn_valid(pfn))
+ if (!early_pfn_valid(pfn)) {
+ pfn = ALIGN(pfn + MAX_ORDER_NR_PAGES,
+ MAX_ORDER_NR_PAGES) - 1;
continue;
+ }
if (!early_pfn_in_nid(pfn, nid))
continue;
}
@@ -5113,6 +5123,35 @@ early_param("movablecore", cmdline_parse_movablecore);
#endif /* CONFIG_HAVE_MEMBLOCK_NODE_MAP */
+unsigned long free_reserved_area(unsigned long start, unsigned long end,
+ int poison, char *s)
+{
+ unsigned long pages, pos;
+
+ pos = start = PAGE_ALIGN(start);
+ end &= PAGE_MASK;
+ for (pages = 0; pos < end; pos += PAGE_SIZE, pages++) {
+ if (poison)
+ memset((void *)pos, poison, PAGE_SIZE);
+ free_reserved_page(virt_to_page(pos));
+ }
+
+ if (pages && s)
+ pr_info("Freeing %s memory: %ldK (%lx - %lx)\n",
+ s, pages << (PAGE_SHIFT - 10), start, end);
+
+ return pages;
+}
+
+#ifdef CONFIG_HIGHMEM
+void free_highmem_page(struct page *page)
+{
+ __free_reserved_page(page);
+ totalram_pages++;
+ totalhigh_pages++;
+}
+#endif
+
/**
* set_dma_reserve - set the specified number of pages reserved in the first zone
* @new_dma_reserve: The number of pages to mark reserved
diff --git a/mm/page_io.c b/mm/page_io.c
index 78eee32ee486..c535d395a440 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -20,6 +20,7 @@
#include <linux/buffer_head.h>
#include <linux/writeback.h>
#include <linux/frontswap.h>
+#include <linux/aio.h>
#include <asm/pgtable.h>
static struct bio *get_swap_bio(gfp_t gfp_flags,
diff --git a/mm/rmap.c b/mm/rmap.c
index 807c96bf0dc6..6280da86b5d6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1513,6 +1513,9 @@ static int try_to_unmap_file(struct page *page, enum ttu_flags flags)
unsigned long max_nl_size = 0;
unsigned int mapcount;
+ if (PageHuge(page))
+ pgoff = page->index << compound_order(page);
+
mutex_lock(&mapping->i_mmap_mutex);
vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
unsigned long address = vma_address(page, vma);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1c44af71fcf5..5e6a8422658b 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -25,11 +25,13 @@
#include <linux/init.h>
#include <linux/vfs.h>
#include <linux/mount.h>
+#include <linux/ramfs.h>
#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/swap.h>
+#include <linux/aio.h>
static struct vfsmount *shm_mnt;
@@ -2830,8 +2832,6 @@ out4:
* effectively equivalent, but much lighter weight.
*/
-#include <linux/ramfs.h>
-
static struct file_system_type shmem_fs_type = {
.name = "tmpfs",
.mount = ramfs_mount,
@@ -2931,11 +2931,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
d_instantiate(path.dentry, inode);
inode->i_size = size;
clear_nlink(inode); /* It is unlinked */
-#ifndef CONFIG_MMU
res = ERR_PTR(ramfs_nommu_expand_for_mapping(inode, size));
if (IS_ERR(res))
goto put_dentry;
-#endif
res = alloc_file(&path, FMODE_WRITE | FMODE_READ,
&shmem_file_operations);
diff --git a/mm/swap.c b/mm/swap.c
index 8a529a01e8fc..92a9be551846 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -30,6 +30,7 @@
#include <linux/backing-dev.h>
#include <linux/memcontrol.h>
#include <linux/gfp.h>
+#include <linux/uio.h>
#include "internal.h"
diff --git a/mm/swapfile.c b/mm/swapfile.c
index a1f7772a01fc..6c340d908b27 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1509,8 +1509,7 @@ static int setup_swap_extents(struct swap_info_struct *sis, sector_t *span)
}
static void _enable_swap_info(struct swap_info_struct *p, int prio,
- unsigned char *swap_map,
- unsigned long *frontswap_map)
+ unsigned char *swap_map)
{
int i, prev;
@@ -1519,7 +1518,6 @@ static void _enable_swap_info(struct swap_info_struct *p, int prio,
else
p->prio = --least_priority;
p->swap_map = swap_map;
- frontswap_map_set(p, frontswap_map);
p->flags |= SWP_WRITEOK;
atomic_long_add(p->pages, &nr_swap_pages);
total_swap_pages += p->pages;
@@ -1542,10 +1540,10 @@ static void enable_swap_info(struct swap_info_struct *p, int prio,
unsigned char *swap_map,
unsigned long *frontswap_map)
{
+ frontswap_init(p->type, frontswap_map);
spin_lock(&swap_lock);
spin_lock(&p->lock);
- _enable_swap_info(p, prio, swap_map, frontswap_map);
- frontswap_init(p->type);
+ _enable_swap_info(p, prio, swap_map);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
}
@@ -1554,7 +1552,7 @@ static void reinsert_swap_info(struct swap_info_struct *p)
{
spin_lock(&swap_lock);
spin_lock(&p->lock);
- _enable_swap_info(p, p->prio, p->swap_map, frontswap_map_get(p));
+ _enable_swap_info(p, p->prio, p->swap_map);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
}
@@ -1563,6 +1561,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
{
struct swap_info_struct *p = NULL;
unsigned char *swap_map;
+ unsigned long *frontswap_map;
struct file *swap_file, *victim;
struct address_space *mapping;
struct inode *inode;
@@ -1662,12 +1661,14 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
swap_map = p->swap_map;
p->swap_map = NULL;
p->flags = 0;
- frontswap_invalidate_area(type);
+ frontswap_map = frontswap_map_get(p);
+ frontswap_map_set(p, NULL);
spin_unlock(&p->lock);
spin_unlock(&swap_lock);
+ frontswap_invalidate_area(type);
mutex_unlock(&swapon_mutex);
vfree(swap_map);
- vfree(frontswap_map_get(p));
+ vfree(frontswap_map);
/* Destroy swap account informatin */
swap_cgroup_swapoff(type);
@@ -2120,7 +2121,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
if (p->bdev) {
if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
p->flags |= SWP_SOLIDSTATE;
- p->cluster_next = 1 + (random32() % p->highest_bit);
+ p->cluster_next = 1 + (prandom_u32() % p->highest_bit);
}
if ((swap_flags & SWAP_FLAG_DISCARD) && discard_swap(p) == 0)
p->flags |= SWP_DISCARDABLE;
diff --git a/mm/util.c b/mm/util.c
index ab1424dbe2e6..7441c41d00f6 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -295,7 +295,6 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
{
mm->mmap_base = TASK_UNMAPPED_BASE;
mm->get_unmapped_area = arch_get_unmapped_area;
- mm->unmap_area = arch_unmap_area;
}
#endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 0f751f2068c3..72043d6c88c0 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -249,19 +249,9 @@ EXPORT_SYMBOL(vmalloc_to_pfn);
#define VM_LAZY_FREEING 0x02
#define VM_VM_AREA 0x04
-struct vmap_area {
- unsigned long va_start;
- unsigned long va_end;
- unsigned long flags;
- struct rb_node rb_node; /* address sorted rbtree */
- struct list_head list; /* address sorted list */
- struct list_head purge_list; /* "lazy purge" list */
- struct vm_struct *vm;
- struct rcu_head rcu_head;
-};
-
static DEFINE_SPINLOCK(vmap_area_lock);
-static LIST_HEAD(vmap_area_list);
+/* Export for kexec only */
+LIST_HEAD(vmap_area_list);
static struct rb_root vmap_area_root = RB_ROOT;
/* The vmap cache globals are protected by vmap_area_lock */
@@ -313,7 +303,7 @@ static void __insert_vmap_area(struct vmap_area *va)
rb_link_node(&va->rb_node, parent, p);
rb_insert_color(&va->rb_node, &vmap_area_root);
- /* address-sort this list so it is usable like the vmlist */
+ /* address-sort this list */
tmp = rb_prev(&va->rb_node);
if (tmp) {
struct vmap_area *prev;
@@ -1125,6 +1115,7 @@ void *vm_map_ram(struct page **pages, unsigned int count, int node, pgprot_t pro
}
EXPORT_SYMBOL(vm_map_ram);
+static struct vm_struct *vmlist __initdata;
/**
* vm_area_add_early - add vmap area early during boot
* @vm: vm_struct to add
@@ -1283,41 +1274,35 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
}
EXPORT_SYMBOL_GPL(map_vm_area);
-/*** Old vmalloc interfaces ***/
-DEFINE_RWLOCK(vmlist_lock);
-struct vm_struct *vmlist;
-
static void setup_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, const void *caller)
{
+ spin_lock(&vmap_area_lock);
vm->flags = flags;
vm->addr = (void *)va->va_start;
vm->size = va->va_end - va->va_start;
vm->caller = caller;
va->vm = vm;
va->flags |= VM_VM_AREA;
+ spin_unlock(&vmap_area_lock);
}
-static void insert_vmalloc_vmlist(struct vm_struct *vm)
+static void clear_vm_unlist(struct vm_struct *vm)
{
- struct vm_struct *tmp, **p;
-
+ /*
+ * Before removing VM_UNLIST,
+ * we should make sure that vm has proper values.
+ * Pair with smp_rmb() in show_numa_info().
+ */
+ smp_wmb();
vm->flags &= ~VM_UNLIST;
- write_lock(&vmlist_lock);
- for (p = &vmlist; (tmp = *p) != NULL; p = &tmp->next) {
- if (tmp->addr >= vm->addr)
- break;
- }
- vm->next = *p;
- *p = vm;
- write_unlock(&vmlist_lock);
}
static void insert_vmalloc_vm(struct vm_struct *vm, struct vmap_area *va,
unsigned long flags, const void *caller)
{
setup_vmalloc_vm(vm, va, flags, caller);
- insert_vmalloc_vmlist(vm);
+ clear_vm_unlist(vm);
}
static struct vm_struct *__get_vm_area_node(unsigned long size,
@@ -1360,10 +1345,9 @@ static struct vm_struct *__get_vm_area_node(unsigned long size,
/*
* When this function is called from __vmalloc_node_range,
- * we do not add vm_struct to vmlist here to avoid
- * accessing uninitialized members of vm_struct such as
- * pages and nr_pages fields. They will be set later.
- * To distinguish it from others, we use a VM_UNLIST flag.
+ * we add VM_UNLIST flag to avoid accessing uninitialized
+ * members of vm_struct such as pages and nr_pages fields.
+ * They will be set later.
*/
if (flags & VM_UNLIST)
setup_vmalloc_vm(area, va, flags, caller);
@@ -1447,19 +1431,10 @@ struct vm_struct *remove_vm_area(const void *addr)
if (va && va->flags & VM_VM_AREA) {
struct vm_struct *vm = va->vm;
- if (!(vm->flags & VM_UNLIST)) {
- struct vm_struct *tmp, **p;
- /*
- * remove from list and disallow access to
- * this vm_struct before unmap. (address range
- * confliction is maintained by vmap.)
- */
- write_lock(&vmlist_lock);
- for (p = &vmlist; (tmp = *p) != vm; p = &tmp->next)
- ;
- *p = tmp->next;
- write_unlock(&vmlist_lock);
- }
+ spin_lock(&vmap_area_lock);
+ va->vm = NULL;
+ va->flags &= ~VM_VM_AREA;
+ spin_unlock(&vmap_area_lock);
vmap_debug_free_range(va->va_start, va->va_end);
free_unmap_vmap_area(va);
@@ -1680,10 +1655,11 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
return NULL;
/*
- * In this function, newly allocated vm_struct is not added
- * to vmlist at __get_vm_area_node(). so, it is added here.
+ * In this function, newly allocated vm_struct has VM_UNLIST flag.
+ * It means that vm_struct is not fully initialized.
+ * Now, it is fully initialized, so remove this flag here.
*/
- insert_vmalloc_vmlist(area);
+ clear_vm_unlist(area);
/*
* A ref_count = 3 is needed because the vm_struct and vmap_area
@@ -2005,7 +1981,8 @@ static int aligned_vwrite(char *buf, char *addr, unsigned long count)
long vread(char *buf, char *addr, unsigned long count)
{
- struct vm_struct *tmp;
+ struct vmap_area *va;
+ struct vm_struct *vm;
char *vaddr, *buf_start = buf;
unsigned long buflen = count;
unsigned long n;
@@ -2014,10 +1991,17 @@ long vread(char *buf, char *addr, unsigned long count)
if ((unsigned long) addr + count < count)
count = -(unsigned long) addr;
- read_lock(&vmlist_lock);
- for (tmp = vmlist; count && tmp; tmp = tmp->next) {
- vaddr = (char *) tmp->addr;
- if (addr >= vaddr + tmp->size - PAGE_SIZE)
+ spin_lock(&vmap_area_lock);
+ list_for_each_entry(va, &vmap_area_list, list) {
+ if (!count)
+ break;
+
+ if (!(va->flags & VM_VM_AREA))
+ continue;
+
+ vm = va->vm;
+ vaddr = (char *) vm->addr;
+ if (addr >= vaddr + vm->size - PAGE_SIZE)
continue;
while (addr < vaddr) {
if (count == 0)
@@ -2027,10 +2011,10 @@ long vread(char *buf, char *addr, unsigned long count)
addr++;
count--;
}
- n = vaddr + tmp->size - PAGE_SIZE - addr;
+ n = vaddr + vm->size - PAGE_SIZE - addr;
if (n > count)
n = count;
- if (!(tmp->flags & VM_IOREMAP))
+ if (!(vm->flags & VM_IOREMAP))
aligned_vread(buf, addr, n);
else /* IOREMAP area is treated as memory hole */
memset(buf, 0, n);
@@ -2039,7 +2023,7 @@ long vread(char *buf, char *addr, unsigned long count)
count -= n;
}
finished:
- read_unlock(&vmlist_lock);
+ spin_unlock(&vmap_area_lock);
if (buf == buf_start)
return 0;
@@ -2078,7 +2062,8 @@ finished:
long vwrite(char *buf, char *addr, unsigned long count)
{
- struct vm_struct *tmp;
+ struct vmap_area *va;
+ struct vm_struct *vm;
char *vaddr;
unsigned long n, buflen;
int copied = 0;
@@ -2088,10 +2073,17 @@ long vwrite(char *buf, char *addr, unsigned long count)
count = -(unsigned long) addr;
buflen = count;
- read_lock(&vmlist_lock);
- for (tmp = vmlist; count && tmp; tmp = tmp->next) {
- vaddr = (char *) tmp->addr;
- if (addr >= vaddr + tmp->size - PAGE_SIZE)
+ spin_lock(&vmap_area_lock);
+ list_for_each_entry(va, &vmap_area_list, list) {
+ if (!count)
+ break;
+
+ if (!(va->flags & VM_VM_AREA))
+ continue;
+
+ vm = va->vm;
+ vaddr = (char *) vm->addr;
+ if (addr >= vaddr + vm->size - PAGE_SIZE)
continue;
while (addr < vaddr) {
if (count == 0)
@@ -2100,10 +2092,10 @@ long vwrite(char *buf, char *addr, unsigned long count)
addr++;
count--;
}
- n = vaddr + tmp->size - PAGE_SIZE - addr;
+ n = vaddr + vm->size - PAGE_SIZE - addr;
if (n > count)
n = count;
- if (!(tmp->flags & VM_IOREMAP)) {
+ if (!(vm->flags & VM_IOREMAP)) {
aligned_vwrite(buf, addr, n);
copied++;
}
@@ -2112,7 +2104,7 @@ long vwrite(char *buf, char *addr, unsigned long count)
count -= n;
}
finished:
- read_unlock(&vmlist_lock);
+ spin_unlock(&vmap_area_lock);
if (!copied)
return 0;
return buflen;
@@ -2519,19 +2511,19 @@ void pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms)
#ifdef CONFIG_PROC_FS
static void *s_start(struct seq_file *m, loff_t *pos)
- __acquires(&vmlist_lock)
+ __acquires(&vmap_area_lock)
{
loff_t n = *pos;
- struct vm_struct *v;
+ struct vmap_area *va;
- read_lock(&vmlist_lock);
- v = vmlist;
- while (n > 0 && v) {
+ spin_lock(&vmap_area_lock);
+ va = list_entry((&vmap_area_list)->next, typeof(*va), list);
+ while (n > 0 && &va->list != &vmap_area_list) {
n--;
- v = v->next;
+ va = list_entry(va->list.next, typeof(*va), list);
}
- if (!n)
- return v;
+ if (!n && &va->list != &vmap_area_list)
+ return va;
return NULL;
@@ -2539,16 +2531,20 @@ static void *s_start(struct seq_file *m, loff_t *pos)
static void *s_next(struct seq_file *m, void *p, loff_t *pos)
{
- struct vm_struct *v = p;
+ struct vmap_area *va = p, *next;
++*pos;
- return v->next;
+ next = list_entry(va->list.next, typeof(*va), list);
+ if (&next->list != &vmap_area_list)
+ return next;
+
+ return NULL;
}
static void s_stop(struct seq_file *m, void *p)
- __releases(&vmlist_lock)
+ __releases(&vmap_area_lock)
{
- read_unlock(&vmlist_lock);
+ spin_unlock(&vmap_area_lock);
}
static void show_numa_info(struct seq_file *m, struct vm_struct *v)
@@ -2559,6 +2555,11 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
if (!counters)
return;
+ /* Pair with smp_wmb() in clear_vm_unlist() */
+ smp_rmb();
+ if (v->flags & VM_UNLIST)
+ return;
+
memset(counters, 0, nr_node_ids * sizeof(unsigned int));
for (nr = 0; nr < v->nr_pages; nr++)
@@ -2572,7 +2573,20 @@ static void show_numa_info(struct seq_file *m, struct vm_struct *v)
static int s_show(struct seq_file *m, void *p)
{
- struct vm_struct *v = p;
+ struct vmap_area *va = p;
+ struct vm_struct *v;
+
+ if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
+ return 0;
+
+ if (!(va->flags & VM_VM_AREA)) {
+ seq_printf(m, "0x%pK-0x%pK %7ld vm_map_ram\n",
+ (void *)va->va_start, (void *)va->va_end,
+ va->va_end - va->va_start);
+ return 0;
+ }
+
+ v = va->vm;
seq_printf(m, "0x%pK-0x%pK %7ld",
v->addr, v->addr + v->size, v->size);
@@ -2645,5 +2659,53 @@ static int __init proc_vmalloc_init(void)
return 0;
}
module_init(proc_vmalloc_init);
+
+void get_vmalloc_info(struct vmalloc_info *vmi)
+{
+ struct vmap_area *va;
+ unsigned long free_area_size;
+ unsigned long prev_end;
+
+ vmi->used = 0;
+ vmi->largest_chunk = 0;
+
+ prev_end = VMALLOC_START;
+
+ spin_lock(&vmap_area_lock);
+
+ if (list_empty(&vmap_area_list)) {
+ vmi->largest_chunk = VMALLOC_TOTAL;
+ goto out;
+ }
+
+ list_for_each_entry(va, &vmap_area_list, list) {
+ unsigned long addr = va->va_start;
+
+ /*
+ * Some archs keep another range for modules in vmalloc space
+ */
+ if (addr < VMALLOC_START)
+ continue;
+ if (addr >= VMALLOC_END)
+ break;
+
+ if (va->flags & (VM_LAZY_FREE | VM_LAZY_FREEING))
+ continue;
+
+ vmi->used += (va->va_end - va->va_start);
+
+ free_area_size = addr - prev_end;
+ if (vmi->largest_chunk < free_area_size)
+ vmi->largest_chunk = free_area_size;
+
+ prev_end = va->va_end;
+ }
+
+ if (VMALLOC_END - prev_end > vmi->largest_chunk)
+ vmi->largest_chunk = VMALLOC_END - prev_end;
+
+out:
+ spin_unlock(&vmap_area_lock);
+}
#endif
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 88c5fed8b9a4..df78d17aa59d 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2619,7 +2619,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
bool pgdat_is_balanced = false;
int i;
int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */
- unsigned long total_scanned;
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
@@ -2639,7 +2638,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
.gfp_mask = sc.gfp_mask,
};
loop_again:
- total_scanned = 0;
sc.priority = DEF_PRIORITY;
sc.nr_reclaimed = 0;
sc.may_writepage = !laptop_mode;
@@ -2730,7 +2728,6 @@ loop_again:
order, sc.gfp_mask,
&nr_soft_scanned);
sc.nr_reclaimed += nr_soft_reclaimed;
- total_scanned += nr_soft_scanned;
/*
* We put equal pressure on every zone, unless
@@ -2765,7 +2762,6 @@ loop_again:
reclaim_state->reclaimed_slab = 0;
nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages);
sc.nr_reclaimed += reclaim_state->reclaimed_slab;
- total_scanned += sc.nr_scanned;
if (nr_slab == 0 && !zone_reclaimable(zone))
zone->all_unreclaimable = 1;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e1d8ed172c42..292b1cf785e0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -792,7 +792,14 @@ const char * const vmstat_text[] = {
"compact_stall",
"compact_fail",
"compact_success",
-#endif
+
+#ifdef CONFIG_BALLOON_COMPACTION
+ "compact_balloon_isolated",
+ "compact_balloon_migrated",
+ "compact_balloon_returned",
+#endif /* CONFIG_BALLOON_COMPACTION */
+
+#endif /* CONFIG_COMPACTION */
#ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 6048fc1da1c2..5c217427a669 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -2198,7 +2198,7 @@ static inline int f_pick(struct pktgen_dev *pkt_dev)
pkt_dev->curfl = 0; /*reset */
}
} else {
- flow = random32() % pkt_dev->cflows;
+ flow = prandom_u32() % pkt_dev->cflows;
pkt_dev->curfl = flow;
if (pkt_dev->flows[flow].count > pkt_dev->lflow) {
@@ -2246,7 +2246,7 @@ static void set_cur_queue_map(struct pktgen_dev *pkt_dev)
else if (pkt_dev->queue_map_min <= pkt_dev->queue_map_max) {
__u16 t;
if (pkt_dev->flags & F_QUEUE_MAP_RND) {
- t = random32() %
+ t = prandom_u32() %
(pkt_dev->queue_map_max -
pkt_dev->queue_map_min + 1)
+ pkt_dev->queue_map_min;
@@ -2278,7 +2278,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACSRC_RND)
- mc = random32() % pkt_dev->src_mac_count;
+ mc = prandom_u32() % pkt_dev->src_mac_count;
else {
mc = pkt_dev->cur_src_mac_offset++;
if (pkt_dev->cur_src_mac_offset >=
@@ -2304,7 +2304,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__u32 tmp;
if (pkt_dev->flags & F_MACDST_RND)
- mc = random32() % pkt_dev->dst_mac_count;
+ mc = prandom_u32() % pkt_dev->dst_mac_count;
else {
mc = pkt_dev->cur_dst_mac_offset++;
@@ -2331,21 +2331,21 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < pkt_dev->nr_labels; i++)
if (pkt_dev->labels[i] & MPLS_STACK_BOTTOM)
pkt_dev->labels[i] = MPLS_STACK_BOTTOM |
- ((__force __be32)random32() &
+ ((__force __be32)prandom_u32() &
htonl(0x000fffff));
}
if ((pkt_dev->flags & F_VID_RND) && (pkt_dev->vlan_id != 0xffff)) {
- pkt_dev->vlan_id = random32() & (4096-1);
+ pkt_dev->vlan_id = prandom_u32() & (4096 - 1);
}
if ((pkt_dev->flags & F_SVID_RND) && (pkt_dev->svlan_id != 0xffff)) {
- pkt_dev->svlan_id = random32() & (4096 - 1);
+ pkt_dev->svlan_id = prandom_u32() & (4096 - 1);
}
if (pkt_dev->udp_src_min < pkt_dev->udp_src_max) {
if (pkt_dev->flags & F_UDPSRC_RND)
- pkt_dev->cur_udp_src = random32() %
+ pkt_dev->cur_udp_src = prandom_u32() %
(pkt_dev->udp_src_max - pkt_dev->udp_src_min)
+ pkt_dev->udp_src_min;
@@ -2358,7 +2358,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->udp_dst_min < pkt_dev->udp_dst_max) {
if (pkt_dev->flags & F_UDPDST_RND) {
- pkt_dev->cur_udp_dst = random32() %
+ pkt_dev->cur_udp_dst = prandom_u32() %
(pkt_dev->udp_dst_max - pkt_dev->udp_dst_min)
+ pkt_dev->udp_dst_min;
} else {
@@ -2375,7 +2375,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (imn < imx) {
__u32 t;
if (pkt_dev->flags & F_IPSRC_RND)
- t = random32() % (imx - imn) + imn;
+ t = prandom_u32() % (imx - imn) + imn;
else {
t = ntohl(pkt_dev->cur_saddr);
t++;
@@ -2396,17 +2396,15 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
__be32 s;
if (pkt_dev->flags & F_IPDST_RND) {
- t = random32() % (imx - imn) + imn;
- s = htonl(t);
-
- while (ipv4_is_loopback(s) ||
- ipv4_is_multicast(s) ||
- ipv4_is_lbcast(s) ||
- ipv4_is_zeronet(s) ||
- ipv4_is_local_multicast(s)) {
- t = random32() % (imx - imn) + imn;
+ do {
+ t = prandom_u32() %
+ (imx - imn) + imn;
s = htonl(t);
- }
+ } while (ipv4_is_loopback(s) ||
+ ipv4_is_multicast(s) ||
+ ipv4_is_lbcast(s) ||
+ ipv4_is_zeronet(s) ||
+ ipv4_is_local_multicast(s));
pkt_dev->cur_daddr = s;
} else {
t = ntohl(pkt_dev->cur_daddr);
@@ -2437,7 +2435,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
for (i = 0; i < 4; i++) {
pkt_dev->cur_in6_daddr.s6_addr32[i] =
- (((__force __be32)random32() |
+ (((__force __be32)prandom_u32() |
pkt_dev->min_in6_daddr.s6_addr32[i]) &
pkt_dev->max_in6_daddr.s6_addr32[i]);
}
@@ -2447,7 +2445,7 @@ static void mod_cur_headers(struct pktgen_dev *pkt_dev)
if (pkt_dev->min_pkt_size < pkt_dev->max_pkt_size) {
__u32 t;
if (pkt_dev->flags & F_TXSIZE_RND) {
- t = random32() %
+ t = prandom_u32() %
(pkt_dev->max_pkt_size - pkt_dev->min_pkt_size)
+ pkt_dev->min_pkt_size;
} else {
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 9e2d1cccd1eb..4a38c6455b9d 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1747,9 +1747,9 @@ static struct ctl_table vs_vars[] = {
},
{
.procname = "sync_qlen_max",
- .maxlen = sizeof(int),
+ .maxlen = sizeof(unsigned long),
.mode = 0644,
- .proc_handler = proc_dointvec,
+ .proc_handler = proc_doulongvec_minmax,
},
{
.procname = "sync_sock_size",
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c8e001a9c45b..f84965af4a4e 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -264,7 +264,7 @@ static void death_by_event(unsigned long ul_conntrack)
if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
/* bad luck, let's retry again */
ecache->timeout.expires = jiffies +
- (random32() % net->ct.sysctl_events_retry_timeout);
+ (prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
return;
}
@@ -283,7 +283,7 @@ void nf_ct_dying_timeout(struct nf_conn *ct)
/* set a new timer to retry event delivery */
setup_timer(&ecache->timeout, death_by_event, (unsigned long)ct);
ecache->timeout.expires = jiffies +
- (random32() % net->ct.sysctl_events_retry_timeout);
+ (prandom_u32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ecache->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_dying_timeout);
diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index cc37dd52ecf9..ef53ab8d0aae 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -80,7 +80,7 @@ struct choke_sched_data {
/* deliver a random number between 0 and N - 1 */
static u32 random_N(unsigned int N)
{
- return reciprocal_divide(random32(), N);
+ return reciprocal_divide(prandom_u32(), N);
}
/* number of elements in queue including holes */
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 88edec929d73..1da52d1406fc 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -130,8 +130,8 @@ gss_krb5_make_confounder(char *p, u32 conflen)
/* initialize to random value */
if (i == 0) {
- i = random32();
- i = (i << 32) | random32();
+ i = prandom_u32();
+ i = (i << 32) | prandom_u32();
}
switch (conflen) {
diff --git a/scripts/Makefile.headersinst b/scripts/Makefile.headersinst
index 477d137c0557..182084d728c8 100644
--- a/scripts/Makefile.headersinst
+++ b/scripts/Makefile.headersinst
@@ -72,7 +72,7 @@ printdir = $(patsubst $(INSTALL_HDR_PATH)/%/,%,$(dir $@))
quiet_cmd_install = INSTALL $(printdir) ($(words $(all-files))\
file$(if $(word 2, $(all-files)),s))
cmd_install = \
- $(PERL) $< $(installdir) $(SRCARCH) $(input-files); \
+ $(CONFIG_SHELL) $< $(installdir) $(input-files); \
for F in $(wrapper-files); do \
echo "\#include <asm-generic/$$F>" > $(installdir)/$$F; \
done; \
@@ -98,7 +98,7 @@ __headersinst: $(subdirs) $(install-file)
@:
targets += $(install-file)
-$(install-file): scripts/headers_install.pl $(input-files) FORCE
+$(install-file): scripts/headers_install.sh $(input-files) FORCE
$(if $(unwanted),$(call cmd,remove),)
$(if $(wildcard $(dir $@)),,$(shell mkdir -p $(dir $@)))
$(call if_changed,install)
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index a373a1f66023..09e9b4b8a015 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -314,6 +314,11 @@ cmd_lzo = (cat $(filter-out FORCE,$^) | \
lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
(rm -f $@ ; false)
+quiet_cmd_lz4 = LZ4 $@
+cmd_lz4 = (cat $(filter-out FORCE,$^) | \
+ lz4demo -c1 stdin stdout && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+ (rm -f $@ ; false)
+
# U-Boot mkimage
# ---------------------------------------------------------------------------
diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index b28cc384a5bc..3fb6d8674b2a 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -628,6 +628,13 @@ sub sanitise_line {
return $res;
}
+sub get_quoted_string {
+ my ($line, $rawline) = @_;
+
+ return "" if ($line !~ m/(\"[X]+\")/g);
+ return substr($rawline, $-[0], $+[0] - $-[0]);
+}
+
sub ctx_statement_block {
my ($linenr, $remain, $off) = @_;
my $line = $linenr - 1;
@@ -1576,7 +1583,8 @@ sub process {
# Check for incorrect file permissions
if ($line =~ /^new (file )?mode.*[7531]\d{0,2}$/) {
my $permhere = $here . "FILE: $realfile\n";
- if ($realfile =~ /(Makefile|Kconfig|\.c|\.h|\.S|\.tmpl)$/) {
+ if ($realfile !~ m@scripts/@ &&
+ $realfile !~ /\.(py|pl|awk|sh)$/) {
ERROR("EXECUTE_PERMISSIONS",
"do not set execute permissions for source files\n" . $permhere);
}
@@ -3372,6 +3380,15 @@ sub process {
"struct spinlock should be spinlock_t\n" . $herecurr);
}
+# check for seq_printf uses that could be seq_puts
+ if ($line =~ /\bseq_printf\s*\(/) {
+ my $fmt = get_quoted_string($line, $rawline);
+ if ($fmt !~ /[^\\]\%/) {
+ WARN("PREFER_SEQ_PUTS",
+ "Prefer seq_puts to seq_printf\n" . $herecurr);
+ }
+ }
+
# Check for misused memsets
if ($^V && $^V ge 5.10.0 &&
defined $stat &&
@@ -3476,6 +3493,13 @@ sub process {
"unnecessary cast may hide bugs, see http://c-faq.com/malloc/mallocnocast.html\n" . $herecurr);
}
+# check for krealloc arg reuse
+ if ($^V && $^V ge 5.10.0 &&
+ $line =~ /\b($Lval)\s*\=\s*(?:$balanced_parens)?\s*krealloc\s*\(\s*\1\s*,/) {
+ WARN("KREALLOC_ARG_REUSE",
+ "Reusing the krealloc arg is almost always a bug\n" . $herecurr);
+ }
+
# check for alloc argument mismatch
if ($line =~ /\b(kcalloc|kmalloc_array)\s*\(\s*sizeof\b/) {
WARN("ALLOC_ARRAY_ARGS",
diff --git a/scripts/decodecode b/scripts/decodecode
index 4f8248d5a11f..d8824f37acce 100755
--- a/scripts/decodecode
+++ b/scripts/decodecode
@@ -89,10 +89,16 @@ echo $code >> $T.s
disas $T
cat $T.dis >> $T.aa
+# (lines of whole $T.oo) - (lines of $T.aa, i.e. "Code starting") + 3,
+# i.e. the title + the "===..=" line (sed is counting from 1, 0 address is
+# special)
+faultlinenum=$(( $(wc -l $T.oo | cut -d" " -f1) - \
+ $(wc -l $T.aa | cut -d" " -f1) + 3))
+
faultline=`cat $T.dis | head -1 | cut -d":" -f2-`
faultline=`echo "$faultline" | sed -e 's/\[/\\\[/g; s/\]/\\\]/g'`
-cat $T.oo | sed -e "s/\($faultline\)/\*\1 <-- trapping instruction/g"
+cat $T.oo | sed -e "${faultlinenum}s/^\(.*:\)\(.*\)/\1\*\2\t\t<-- trapping instruction/"
echo
cat $T.aa
cleanup
diff --git a/scripts/get_maintainer.pl b/scripts/get_maintainer.pl
index ce4cc837b748..5e4fb144a04f 100755
--- a/scripts/get_maintainer.pl
+++ b/scripts/get_maintainer.pl
@@ -611,7 +611,7 @@ sub get_maintainers {
$hash{$tvi} = $value_pd;
}
}
- } elsif ($type eq 'K') {
+ } elsif ($type eq 'N') {
if ($file =~ m/$value/x) {
$hash{$tvi} = 0;
}
diff --git a/scripts/headers_install.pl b/scripts/headers_install.pl
deleted file mode 100644
index 581ca99c96f2..000000000000
--- a/scripts/headers_install.pl
+++ /dev/null
@@ -1,63 +0,0 @@
-#!/usr/bin/perl -w
-#
-# headers_install prepare the listed header files for use in
-# user space and copy the files to their destination.
-#
-# Usage: headers_install.pl readdir installdir arch [files...]
-# installdir: dir to install the files to
-# arch: current architecture
-# arch is used to force a reinstallation when the arch
-# changes because kbuild then detect a command line change.
-# files: list of files to check
-#
-# Step in preparation for users space:
-# 1) Drop all use of compiler.h definitions
-# 2) Drop include of compiler.h
-# 3) Drop all sections defined out by __KERNEL__ (using unifdef)
-
-use strict;
-
-my ($installdir, $arch, @files) = @ARGV;
-
-my $unifdef = "scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__";
-
-foreach my $filename (@files) {
- my $file = $filename;
- $file =~ s!^.*/!!;
-
- my $tmpfile = "$installdir/$file.tmp";
-
- open(my $in, '<', $filename)
- or die "$filename: $!\n";
- open(my $out, '>', $tmpfile)
- or die "$tmpfile: $!\n";
- while (my $line = <$in>) {
- $line =~ s/([\s(])__user\s/$1/g;
- $line =~ s/([\s(])__force\s/$1/g;
- $line =~ s/([\s(])__iomem\s/$1/g;
- $line =~ s/\s__attribute_const__\s/ /g;
- $line =~ s/\s__attribute_const__$//g;
- $line =~ s/\b__packed\b/__attribute__((packed))/g;
- $line =~ s/^#include <linux\/compiler.h>//;
- $line =~ s/(^|\s)(inline)\b/$1__$2__/g;
- $line =~ s/(^|\s)(asm)\b(\s|[(]|$)/$1__$2__$3/g;
- $line =~ s/(^|\s|[(])(volatile)\b(\s|[(]|$)/$1__$2__$3/g;
- $line =~ s/#ifndef\s+_UAPI/#ifndef /;
- $line =~ s/#define\s+_UAPI/#define /;
- $line =~ s!#endif\s+/[*]\s*_UAPI!#endif /* !;
- printf {$out} "%s", $line;
- }
- close $out;
- close $in;
-
- system $unifdef . " $tmpfile > $installdir/$file";
- # unifdef will exit 0 on success, and will exit 1 when the
- # file was processed successfully but no changes were made,
- # so abort only when it's higher than that.
- my $e = $? >> 8;
- if ($e > 1) {
- die "$tmpfile: $!\n";
- }
- unlink $tmpfile;
-}
-exit 0;
diff --git a/scripts/headers_install.sh b/scripts/headers_install.sh
new file mode 100644
index 000000000000..643764f53ea7
--- /dev/null
+++ b/scripts/headers_install.sh
@@ -0,0 +1,43 @@
+#!/bin/sh
+
+if [ $# -lt 1 ]
+then
+ echo "Usage: headers_install.sh OUTDIR [FILES...]
+ echo
+ echo "Prepares kernel header files for use by user space, by removing"
+ echo "all compiler.h definitions and #includes, removing any"
+ echo "#ifdef __KERNEL__ sections, and putting __underscores__ around"
+ echo "asm/inline/volatile keywords."
+ echo
+ echo "OUTDIR: directory to write each userspace header FILE to."
+ echo "FILES: list of header files to operate on."
+
+ exit 1
+fi
+
+# Grab arguments
+
+OUTDIR="$1"
+shift
+
+# Iterate through files listed on command line
+
+FILE=
+trap 'rm -f "$OUTDIR/$FILE" "$OUTDIR/$FILE.sed"' EXIT
+for i in "$@"
+do
+ FILE="$(basename "$i")"
+ sed -r \
+ -e 's/([ \t(])(__user|__force|__iomem)[ \t]/\1/g' \
+ -e 's/__attribute_const__([ \t]|$)/\1/g' \
+ -e 's@^#include <linux/compiler.h>@@' \
+ -e 's/(^|[^a-zA-Z0-9])__packed([^a-zA-Z0-9_]|$)/\1__attribute__((packed))\2/g' \
+ -e 's/(^|[ \t(])(inline|asm|volatile)([ \t(]|$)/\1__\2__\3/g' \
+ -e 's@#(ifndef|define|endif[ \t]*/[*])[ \t]*_UAPI@#\1 @' \
+ "$i" > "$OUTDIR/$FILE.sed" || exit 1
+ scripts/unifdef -U__KERNEL__ -D__EXPORTED_HEADERS__ "$OUTDIR/$FILE.sed" \
+ > "$OUTDIR/$FILE"
+ [ $? -gt 1 ] && exit 1
+ rm -f "$OUTDIR/$FILE.sed"
+done
+trap - EXIT
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 8bbefc3b55d4..d4f1468b9b50 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -16,6 +16,8 @@
#include <linux/key-type.h>
#include <linux/task_work.h>
+struct iovec;
+
#ifdef __KDEBUG
#define kenter(FMT, ...) \
printk(KERN_DEBUG "==> %s("FMT")\n", __func__, ##__VA_ARGS__)
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index 4b5c948eb414..33cfd27b4de2 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -22,6 +22,7 @@
#include <linux/err.h>
#include <linux/vmalloc.h>
#include <linux/security.h>
+#include <linux/uio.h>
#include <asm/uaccess.h>
#include "internal.h"
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 4bd6bdb74193..c411f9bb156b 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -93,9 +93,16 @@ static void umh_keys_cleanup(struct subprocess_info *info)
static int call_usermodehelper_keys(char *path, char **argv, char **envp,
struct key *session_keyring, int wait)
{
- return call_usermodehelper_fns(path, argv, envp, wait,
- umh_keys_init, umh_keys_cleanup,
- key_get(session_keyring));
+ struct subprocess_info *info;
+
+ info = call_usermodehelper_setup(path, argv, envp, GFP_KERNEL,
+ umh_keys_init, umh_keys_cleanup,
+ session_keyring);
+ if (!info)
+ return -ENOMEM;
+
+ key_get(session_keyring);
+ return call_usermodehelper_exec(info, wait);
}
/*
diff --git a/sound/core/info.c b/sound/core/info.c
index c9042b4d3695..dca0223f0b55 100644
--- a/sound/core/info.c
+++ b/sound/core/info.c
@@ -949,20 +949,20 @@ EXPORT_SYMBOL(snd_info_free_entry);
int snd_info_register(struct snd_info_entry * entry)
{
struct proc_dir_entry *root, *p = NULL;
+ const struct file_operations *fops = NULL;
if (snd_BUG_ON(!entry))
return -ENXIO;
root = entry->parent == NULL ? snd_proc_root : entry->parent->p;
mutex_lock(&info_mutex);
- p = create_proc_entry(entry->name, entry->mode, root);
+ if (!S_ISDIR(entry->mode))
+ fops = &snd_info_entry_operations;
+ p = proc_create_data(entry->name, entry->mode, root, fops, entry);
if (!p) {
mutex_unlock(&info_mutex);
return -ENOMEM;
}
- if (!S_ISDIR(entry->mode))
- p->proc_fops = &snd_info_entry_operations;
p->size = entry->size;
- p->data = entry;
entry->p = p;
if (entry->parent)
list_add_tail(&entry->list, &entry->parent->children);
diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c
index 5bce9152b64e..479e0a581797 100644
--- a/sound/core/pcm_native.c
+++ b/sound/core/pcm_native.c
@@ -25,7 +25,7 @@
#include <linux/slab.h>
#include <linux/time.h>
#include <linux/pm_qos.h>
-#include <linux/uio.h>
+#include <linux/aio.h>
#include <linux/dma-mapping.h>
#include <sound/core.h>
#include <sound/control.h>
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index a4805932972b..0b7e6c7b6038 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,4 +1,5 @@
TARGETS = breakpoints
+TARGETS += epoll
TARGETS += kcmp
TARGETS += mqueue
TARGETS += vm
@@ -6,6 +7,8 @@ TARGETS += cpu-hotplug
TARGETS += memory-hotplug
TARGETS += efivarfs
TARGETS += net
+TARGETS += timers
+TARGETS += ptrace
all:
for TARGET in $(TARGETS); do \
diff --git a/tools/testing/selftests/epoll/Makefile b/tools/testing/selftests/epoll/Makefile
new file mode 100644
index 000000000000..19806ed62f50
--- /dev/null
+++ b/tools/testing/selftests/epoll/Makefile
@@ -0,0 +1,11 @@
+# Makefile for epoll selftests
+
+all: test_epoll
+%: %.c
+ gcc -pthread -g -o $@ $^
+
+run_tests: all
+ ./test_epoll
+
+clean:
+ $(RM) test_epoll
diff --git a/tools/testing/selftests/epoll/test_epoll.c b/tools/testing/selftests/epoll/test_epoll.c
new file mode 100644
index 000000000000..1034ed4cc5b4
--- /dev/null
+++ b/tools/testing/selftests/epoll/test_epoll.c
@@ -0,0 +1,364 @@
+/*
+ * tools/testing/selftests/epoll/test_epoll.c
+ *
+ * Copyright 2012 Adobe Systems Incorporated
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * Paton J. Lewis <palewis@adobe.com>
+ *
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+
+/*
+ * A pointer to an epoll_item_private structure will be stored in the epoll
+ * item's event structure so that we can get access to the epoll_item_private
+ * data after calling epoll_wait:
+ */
+struct epoll_item_private {
+ int index; /* Position of this struct within the epoll_items array. */
+ int fd;
+ uint32_t events;
+ pthread_mutex_t mutex; /* Guards the following variables... */
+ int stop;
+ int status; /* Stores any error encountered while handling item. */
+ /* The following variable allows us to test whether we have encountered
+ a problem while attempting to cancel and delete the associated
+ event. When the test program exits, 'deleted' should be exactly
+ one. If it is greater than one, then the failed test reflects a real
+ world situation where we would have tried to access the epoll item's
+ private data after deleting it: */
+ int deleted;
+};
+
+struct epoll_item_private *epoll_items;
+
+/*
+ * Delete the specified item from the epoll set. In a real-world secneario this
+ * is where we would free the associated data structure, but in this testing
+ * environment we retain the structure so that we can test for double-deletion:
+ */
+void delete_item(int index)
+{
+ __sync_fetch_and_add(&epoll_items[index].deleted, 1);
+}
+
+/*
+ * A pointer to a read_thread_data structure will be passed as the argument to
+ * each read thread:
+ */
+struct read_thread_data {
+ int stop;
+ int status; /* Indicates any error encountered by the read thread. */
+ int epoll_set;
+};
+
+/*
+ * The function executed by the read threads:
+ */
+void *read_thread_function(void *function_data)
+{
+ struct read_thread_data *thread_data =
+ (struct read_thread_data *)function_data;
+ struct epoll_event event_data;
+ struct epoll_item_private *item_data;
+ char socket_data;
+
+ /* Handle events until we encounter an error or this thread's 'stop'
+ condition is set: */
+ while (1) {
+ int result = epoll_wait(thread_data->epoll_set,
+ &event_data,
+ 1, /* Number of desired events */
+ 1000); /* Timeout in ms */
+ if (result < 0) {
+ /* Breakpoints signal all threads. Ignore that while
+ debugging: */
+ if (errno == EINTR)
+ continue;
+ thread_data->status = errno;
+ return 0;
+ } else if (thread_data->stop)
+ return 0;
+ else if (result == 0) /* Timeout */
+ continue;
+
+ /* We need the mutex here because checking for the stop
+ condition and re-enabling the epoll item need to be done
+ together as one atomic operation when EPOLL_CTL_DISABLE is
+ available: */
+ item_data = (struct epoll_item_private *)event_data.data.ptr;
+ pthread_mutex_lock(&item_data->mutex);
+
+ /* Remove the item from the epoll set if we want to stop
+ handling that event: */
+ if (item_data->stop)
+ delete_item(item_data->index);
+ else {
+ /* Clear the data that was written to the other end of
+ our non-blocking socket: */
+ do {
+ if (read(item_data->fd, &socket_data, 1) < 1) {
+ if ((errno == EAGAIN) ||
+ (errno == EWOULDBLOCK))
+ break;
+ else
+ goto error_unlock;
+ }
+ } while (item_data->events & EPOLLET);
+
+ /* The item was one-shot, so re-enable it: */
+ event_data.events = item_data->events;
+ if (epoll_ctl(thread_data->epoll_set,
+ EPOLL_CTL_MOD,
+ item_data->fd,
+ &event_data) < 0)
+ goto error_unlock;
+ }
+
+ pthread_mutex_unlock(&item_data->mutex);
+ }
+
+error_unlock:
+ thread_data->status = item_data->status = errno;
+ pthread_mutex_unlock(&item_data->mutex);
+ return 0;
+}
+
+/*
+ * A pointer to a write_thread_data structure will be passed as the argument to
+ * the write thread:
+ */
+struct write_thread_data {
+ int stop;
+ int status; /* Indicates any error encountered by the write thread. */
+ int n_fds;
+ int *fds;
+};
+
+/*
+ * The function executed by the write thread. It writes a single byte to each
+ * socket in turn until the stop condition for this thread is set. If writing to
+ * a socket would block (i.e. errno was EAGAIN), we leave that socket alone for
+ * the moment and just move on to the next socket in the list. We don't care
+ * about the order in which we deliver events to the epoll set. In fact we don't
+ * care about the data we're writing to the pipes at all; we just want to
+ * trigger epoll events:
+ */
+void *write_thread_function(void *function_data)
+{
+ const char data = 'X';
+ int index;
+ struct write_thread_data *thread_data =
+ (struct write_thread_data *)function_data;
+ while (!thread_data->stop)
+ for (index = 0;
+ !thread_data->stop && (index < thread_data->n_fds);
+ ++index)
+ if ((write(thread_data->fds[index], &data, 1) < 1) &&
+ (errno != EAGAIN) &&
+ (errno != EWOULDBLOCK)) {
+ thread_data->status = errno;
+ return;
+ }
+}
+
+/*
+ * Arguments are currently ignored:
+ */
+int main(int argc, char **argv)
+{
+ const int n_read_threads = 100;
+ const int n_epoll_items = 500;
+ int index;
+ int epoll_set = epoll_create1(0);
+ struct write_thread_data write_thread_data = {
+ 0, 0, n_epoll_items, malloc(n_epoll_items * sizeof(int))
+ };
+ struct read_thread_data *read_thread_data =
+ malloc(n_read_threads * sizeof(struct read_thread_data));
+ pthread_t *read_threads = malloc(n_read_threads * sizeof(pthread_t));
+ pthread_t write_thread;
+ int socket_pair[2];
+ struct epoll_event event_data;
+
+ printf("-----------------\n");
+ printf("Runing test_epoll\n");
+ printf("-----------------\n");
+
+ epoll_items = malloc(n_epoll_items * sizeof(struct epoll_item_private));
+
+ if (epoll_set < 0 || !epoll_items || write_thread_data.fds == NULL ||
+ !read_thread_data || !read_threads)
+ goto error;
+
+ if (sysconf(_SC_NPROCESSORS_ONLN) < 2) {
+ printf("Error: please run this test on a multi-core system.\n");
+ goto error;
+ }
+
+ /* Create the socket pairs and epoll items: */
+ for (index = 0; index < n_epoll_items; ++index) {
+ if (socketpair(AF_UNIX,
+ SOCK_STREAM | SOCK_NONBLOCK,
+ 0,
+ socket_pair) < 0)
+ goto error;
+ write_thread_data.fds[index] = socket_pair[0];
+ epoll_items[index].index = index;
+ epoll_items[index].fd = socket_pair[1];
+ if (pthread_mutex_init(&epoll_items[index].mutex, NULL) != 0)
+ goto error;
+ /* We always use EPOLLONESHOT because this test is currently
+ structured to demonstrate the need for EPOLL_CTL_DISABLE,
+ which only produces useful information in the EPOLLONESHOT
+ case (without EPOLLONESHOT, calling epoll_ctl with
+ EPOLL_CTL_DISABLE will never return EBUSY). If support for
+ testing events without EPOLLONESHOT is desired, it should
+ probably be implemented in a separate unit test. */
+ epoll_items[index].events = EPOLLIN | EPOLLONESHOT;
+ if (index < n_epoll_items / 2)
+ epoll_items[index].events |= EPOLLET;
+ epoll_items[index].stop = 0;
+ epoll_items[index].status = 0;
+ epoll_items[index].deleted = 0;
+ event_data.events = epoll_items[index].events;
+ event_data.data.ptr = &epoll_items[index];
+ if (epoll_ctl(epoll_set,
+ EPOLL_CTL_ADD,
+ epoll_items[index].fd,
+ &event_data) < 0)
+ goto error;
+ }
+
+#ifdef EPOLL_CTL_DISABLE
+ /* Test to make sure that using EPOLL_CTL_DISABLE without EPOLLONESHOT
+ returns a clear error: */
+ if (socketpair(AF_UNIX,
+ SOCK_STREAM | SOCK_NONBLOCK,
+ 0,
+ socket_pair) < 0)
+ goto error;
+ event_data.events = EPOLLIN;
+ event_data.data.ptr = NULL;
+ if (epoll_ctl(epoll_set, EPOLL_CTL_ADD,
+ socket_pair[1], &event_data) < 0)
+ goto error;
+ if ((epoll_ctl(epoll_set, EPOLL_CTL_DISABLE,
+ socket_pair[1], NULL) == 0) || (errno != EINVAL))
+ goto error;
+ if (epoll_ctl(epoll_set, EPOLL_CTL_DEL, socket_pair[1], NULL) != 0)
+ goto error;
+#endif
+
+ /* Create and start the read threads: */
+ for (index = 0; index < n_read_threads; ++index) {
+ read_thread_data[index].stop = 0;
+ read_thread_data[index].status = 0;
+ read_thread_data[index].epoll_set = epoll_set;
+ if (pthread_create(&read_threads[index],
+ NULL,
+ read_thread_function,
+ &read_thread_data[index]) != 0)
+ goto error;
+ }
+
+ if (pthread_create(&write_thread,
+ NULL,
+ write_thread_function,
+ &write_thread_data) != 0)
+ goto error;
+
+ /* Cancel all event pollers: */
+#ifdef EPOLL_CTL_DISABLE
+ for (index = 0; index < n_epoll_items; ++index) {
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ ++epoll_items[index].stop;
+ if (epoll_ctl(epoll_set,
+ EPOLL_CTL_DISABLE,
+ epoll_items[index].fd,
+ NULL) == 0)
+ delete_item(index);
+ else if (errno != EBUSY) {
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ goto error;
+ }
+ /* EBUSY means events were being handled; allow the other thread
+ to delete the item. */
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ }
+#else
+ for (index = 0; index < n_epoll_items; ++index) {
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ ++epoll_items[index].stop;
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ /* Wait in case a thread running read_thread_function is
+ currently executing code between epoll_wait and
+ pthread_mutex_lock with this item. Note that a longer delay
+ would make double-deletion less likely (at the expense of
+ performance), but there is no guarantee that any delay would
+ ever be sufficient. Note also that we delete all event
+ pollers at once for testing purposes, but in a real-world
+ environment we are likely to want to be able to cancel event
+ pollers at arbitrary times. Therefore we can't improve this
+ situation by just splitting this loop into two loops
+ (i.e. signal 'stop' for all items, sleep, and then delete all
+ items). We also can't fix the problem via EPOLL_CTL_DEL
+ because that command can't prevent the case where some other
+ thread is executing read_thread_function within the region
+ mentioned above: */
+ usleep(1);
+ pthread_mutex_lock(&epoll_items[index].mutex);
+ if (!epoll_items[index].deleted)
+ delete_item(index);
+ pthread_mutex_unlock(&epoll_items[index].mutex);
+ }
+#endif
+
+ /* Shut down the read threads: */
+ for (index = 0; index < n_read_threads; ++index)
+ __sync_fetch_and_add(&read_thread_data[index].stop, 1);
+ for (index = 0; index < n_read_threads; ++index) {
+ if (pthread_join(read_threads[index], NULL) != 0)
+ goto error;
+ if (read_thread_data[index].status)
+ goto error;
+ }
+
+ /* Shut down the write thread: */
+ __sync_fetch_and_add(&write_thread_data.stop, 1);
+ if ((pthread_join(write_thread, NULL) != 0) || write_thread_data.status)
+ goto error;
+
+ /* Check for final error conditions: */
+ for (index = 0; index < n_epoll_items; ++index) {
+ if (epoll_items[index].status != 0)
+ goto error;
+ if (pthread_mutex_destroy(&epoll_items[index].mutex) < 0)
+ goto error;
+ }
+ for (index = 0; index < n_epoll_items; ++index)
+ if (epoll_items[index].deleted != 1) {
+ printf("Error: item data deleted %1d times.\n",
+ epoll_items[index].deleted);
+ goto error;
+ }
+
+ printf("[PASS]\n");
+ return 0;
+
+ error:
+ printf("[FAIL]\n");
+ return errno;
+}
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
new file mode 100644
index 000000000000..47ae2d385ce8
--- /dev/null
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -0,0 +1,10 @@
+CFLAGS += -iquote../../../../include/uapi -Wall
+peeksiginfo: peeksiginfo.c
+
+all: peeksiginfo
+
+clean:
+ rm -f peeksiginfo
+
+run_tests: all
+ @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]"
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
new file mode 100644
index 000000000000..d46558b1f58d
--- /dev/null
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -0,0 +1,214 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <sys/mman.h>
+
+#include "linux/ptrace.h"
+
+static int sys_rt_sigqueueinfo(pid_t tgid, int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_sigqueueinfo, tgid, sig, uinfo);
+}
+
+static int sys_rt_tgsigqueueinfo(pid_t tgid, pid_t tid,
+ int sig, siginfo_t *uinfo)
+{
+ return syscall(SYS_rt_tgsigqueueinfo, tgid, tid, sig, uinfo);
+}
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+ return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+#define SIGNR 10
+#define TEST_SICODE_PRIV -1
+#define TEST_SICODE_SHARE -2
+
+#define err(fmt, ...) \
+ fprintf(stderr, \
+ "Error (%s:%d): " fmt, \
+ __FILE__, __LINE__, ##__VA_ARGS__)
+
+static int check_error_paths(pid_t child)
+{
+ struct ptrace_peeksiginfo_args arg;
+ int ret, exit_code = -1;
+ void *addr_rw, *addr_ro;
+
+ /*
+ * Allocate two contiguous pages. The first one is for read-write,
+ * another is for read-only.
+ */
+ addr_rw = mmap(NULL, 2 * PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr_rw == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ return 1;
+ }
+
+ addr_ro = mmap(addr_rw + PAGE_SIZE, PAGE_SIZE, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ if (addr_ro == MAP_FAILED) {
+ err("mmap() failed: %m\n");
+ goto out;
+ }
+
+ arg.nr = SIGNR;
+ arg.off = 0;
+
+ /* Unsupported flags */
+ arg.flags = ~0;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_rw);
+ if (ret != -1 || errno != EINVAL) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EINVAL);
+ goto out;
+ }
+ arg.flags = 0;
+
+ /* A part of the buffer is read-only */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg,
+ addr_ro - sizeof(siginfo_t) * 2);
+ if (ret != 2) {
+ err("sys_ptrace() returns %d (expected 2): %m\n", ret);
+ goto out;
+ }
+
+ /* Read-only buffer */
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, addr_ro);
+ if (ret != -1 && errno != EFAULT) {
+ err("sys_ptrace() returns %d (expected -1),"
+ " errno %d (expected %d): %m\n",
+ ret, errno, EFAULT);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ munmap(addr_rw, 2 * PAGE_SIZE);
+ return exit_code;
+}
+
+int check_direct_path(pid_t child, int shared, int nr)
+{
+ struct ptrace_peeksiginfo_args arg = {.flags = 0, .nr = nr, .off = 0};
+ int i, j, ret, exit_code = -1;
+ siginfo_t siginfo[SIGNR];
+ int si_code;
+
+ if (shared == 1) {
+ arg.flags = PTRACE_PEEKSIGINFO_SHARED;
+ si_code = TEST_SICODE_SHARE;
+ } else {
+ arg.flags = 0;
+ si_code = TEST_SICODE_PRIV;
+ }
+
+ for (i = 0; i < SIGNR; ) {
+ arg.off = i;
+ ret = sys_ptrace(PTRACE_PEEKSIGINFO, child, &arg, siginfo);
+ if (ret == -1) {
+ err("ptrace() failed: %m\n");
+ goto out;
+ }
+
+ if (ret == 0)
+ break;
+
+ for (j = 0; j < ret; j++, i++) {
+ if (siginfo[j].si_code == si_code &&
+ siginfo[j].si_int == i)
+ continue;
+
+ err("%d: Wrong siginfo i=%d si_code=%d si_int=%d\n",
+ shared, i, siginfo[j].si_code, siginfo[j].si_int);
+ goto out;
+ }
+ }
+
+ if (i != SIGNR) {
+ err("Only %d signals were read\n", i);
+ goto out;
+ }
+
+ exit_code = 0;
+out:
+ return exit_code;
+}
+
+int main(int argc, char *argv[])
+{
+ siginfo_t siginfo[SIGNR];
+ int i, exit_code = 1;
+ sigset_t blockmask;
+ pid_t child;
+
+ sigemptyset(&blockmask);
+ sigaddset(&blockmask, SIGRTMIN);
+ sigprocmask(SIG_BLOCK, &blockmask, NULL);
+
+ child = fork();
+ if (child == -1) {
+ err("fork() failed: %m");
+ return 1;
+ } else if (child == 0) {
+ pid_t ppid = getppid();
+ while (1) {
+ if (ppid != getppid())
+ break;
+ sleep(1);
+ }
+ return 1;
+ }
+
+ /* Send signals in process-wide and per-thread queues */
+ for (i = 0; i < SIGNR; i++) {
+ siginfo->si_code = TEST_SICODE_SHARE;
+ siginfo->si_int = i;
+ sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
+
+ siginfo->si_code = TEST_SICODE_PRIV;
+ siginfo->si_int = i;
+ sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
+ }
+
+ if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ /* Dump signals one by one*/
+ if (check_direct_path(child, 0, 1))
+ goto out;
+ /* Dump all signals for one call */
+ if (check_direct_path(child, 0, SIGNR))
+ goto out;
+
+ /*
+ * Dump signal from the process-wide queue.
+ * The number of signals is not multible to the buffer size
+ */
+ if (check_direct_path(child, 1, 3))
+ goto out;
+
+ if (check_error_paths(child))
+ goto out;
+
+ printf("PASS\n");
+ exit_code = 0;
+out:
+ if (sys_ptrace(PTRACE_KILL, child, NULL, NULL) == -1)
+ return 1;
+
+ waitpid(child, NULL, 0);
+
+ return exit_code;
+}
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
new file mode 100644
index 000000000000..eb2859f4ad21
--- /dev/null
+++ b/tools/testing/selftests/timers/Makefile
@@ -0,0 +1,8 @@
+all:
+ gcc posix_timers.c -o posix_timers -lrt
+
+run_tests: all
+ ./posix_timers
+
+clean:
+ rm -f ./posix_timers
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
new file mode 100644
index 000000000000..4fa655d68a81
--- /dev/null
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -0,0 +1,221 @@
+/*
+ * Copyright (C) 2013 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com>
+ *
+ * Licensed under the terms of the GNU GPL License version 2
+ *
+ * Selftests for a few posix timers interface.
+ *
+ * Kernel loop code stolen from Steven Rostedt <srostedt@redhat.com>
+ */
+
+#include <sys/time.h>
+#include <stdio.h>
+#include <signal.h>
+#include <unistd.h>
+#include <time.h>
+#include <pthread.h>
+
+#define DELAY 2
+#define USECS_PER_SEC 1000000
+
+static volatile int done;
+
+/* Busy loop in userspace to elapse ITIMER_VIRTUAL */
+static void user_loop(void)
+{
+ while (!done);
+}
+
+/*
+ * Try to spend as much time as possible in kernelspace
+ * to elapse ITIMER_PROF.
+ */
+static void kernel_loop(void)
+{
+ void *addr = sbrk(0);
+
+ while (!done) {
+ brk(addr + 4096);
+ brk(addr);
+ }
+}
+
+/*
+ * Sleep until ITIMER_REAL expiration.
+ */
+static void idle_loop(void)
+{
+ pause();
+}
+
+static void sig_handler(int nr)
+{
+ done = 1;
+}
+
+/*
+ * Check the expected timer expiration matches the GTOD elapsed delta since
+ * we armed the timer. Keep a 0.5 sec error margin due to various jitter.
+ */
+static int check_diff(struct timeval start, struct timeval end)
+{
+ long long diff;
+
+ diff = end.tv_usec - start.tv_usec;
+ diff += (end.tv_sec - start.tv_sec) * USECS_PER_SEC;
+
+ if (abs(diff - DELAY * USECS_PER_SEC) > USECS_PER_SEC / 2) {
+ printf("Diff too high: %lld..", diff);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_itimer(int which)
+{
+ int err;
+ struct timeval start, end;
+ struct itimerval val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check itimer ");
+
+ if (which == ITIMER_VIRTUAL)
+ printf("virtual... ");
+ else if (which == ITIMER_PROF)
+ printf("prof... ");
+ else if (which == ITIMER_REAL)
+ printf("real... ");
+
+ fflush(stdout);
+
+ done = 0;
+
+ if (which == ITIMER_VIRTUAL)
+ signal(SIGVTALRM, sig_handler);
+ else if (which == ITIMER_PROF)
+ signal(SIGPROF, sig_handler);
+ else if (which == ITIMER_REAL)
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = setitimer(which, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ if (which == ITIMER_VIRTUAL)
+ user_loop();
+ else if (which == ITIMER_PROF)
+ kernel_loop();
+ else if (which == ITIMER_REAL)
+ idle_loop();
+
+ gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+static int check_timer_create(int which)
+{
+ int err;
+ timer_t id;
+ struct timeval start, end;
+ struct itimerspec val = {
+ .it_value.tv_sec = DELAY,
+ };
+
+ printf("Check timer_create() ");
+ if (which == CLOCK_THREAD_CPUTIME_ID) {
+ printf("per thread... ");
+ } else if (which == CLOCK_PROCESS_CPUTIME_ID) {
+ printf("per process... ");
+ }
+ fflush(stdout);
+
+ done = 0;
+ timer_create(which, NULL, &id);
+ if (err < 0) {
+ perror("Can't create timer\n");
+ return -1;
+ }
+ signal(SIGALRM, sig_handler);
+
+ err = gettimeofday(&start, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ err = timer_settime(id, 0, &val, NULL);
+ if (err < 0) {
+ perror("Can't set timer\n");
+ return -1;
+ }
+
+ user_loop();
+
+ gettimeofday(&end, NULL);
+ if (err < 0) {
+ perror("Can't call gettimeofday()\n");
+ return -1;
+ }
+
+ if (!check_diff(start, end))
+ printf("[OK]\n");
+ else
+ printf("[FAIL]\n");
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int err;
+
+ printf("Testing posix timers. False negative may happen on CPU execution \n");
+ printf("based timers if other threads run on the CPU...\n");
+
+ if (check_itimer(ITIMER_VIRTUAL) < 0)
+ return -1;
+
+ if (check_itimer(ITIMER_PROF) < 0)
+ return -1;
+
+ if (check_itimer(ITIMER_REAL) < 0)
+ return -1;
+
+ if (check_timer_create(CLOCK_THREAD_CPUTIME_ID) < 0)
+ return -1;
+
+ /*
+ * It's unfortunately hard to reliably test a timer expiration
+ * on parallel multithread cputime. We could arm it to expire
+ * on DELAY * nr_threads, with nr_threads busy looping, then wait
+ * the normal DELAY since the time is elapsing nr_threads faster.
+ * But for that we need to ensure we have real physical free CPUs
+ * to ensure true parallelism. So test only one thread until we
+ * find a better solution.
+ */
+ if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
+ return -1;
+
+ return 0;
+}
diff --git a/usr/Kconfig b/usr/Kconfig
index 085872bb2bb5..642f503d3e9f 100644
--- a/usr/Kconfig
+++ b/usr/Kconfig
@@ -90,6 +90,15 @@ config RD_LZO
Support loading of a LZO encoded initial ramdisk or cpio buffer
If unsure, say N.
+config RD_LZ4
+ bool "Support initial ramdisks compressed using LZ4" if EXPERT
+ default !EXPERT
+ depends on BLK_DEV_INITRD
+ select DECOMPRESS_LZ4
+ help
+ Support loading of a LZ4 encoded initial ramdisk or cpio buffer
+ If unsure, say N.
+
choice
prompt "Built-in initramfs compression mode" if INITRAMFS_SOURCE!=""
help