diff options
Diffstat (limited to 'include/linux')
113 files changed, 15851 insertions, 0 deletions
diff --git a/include/linux/atomic.h b/include/linux/atomic.h new file mode 100644 index 0000000..ad33ad3 --- /dev/null +++ b/include/linux/atomic.h @@ -0,0 +1,250 @@ +#ifndef __TOOLS_LINUX_ATOMIC_H +#define __TOOLS_LINUX_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#define xchg(p, v) \ + __atomic_exchange_n(p, v, __ATOMIC_SEQ_CST) + +#define xchg_acquire(p, v) \ + __atomic_exchange_n(p, v, __ATOMIC_ACQUIRE) + +#define cmpxchg(p, old, new) \ +({ \ + typeof(*(p)) __old = (old); \ + \ + __atomic_compare_exchange_n((p), &__old, new, false, \ + __ATOMIC_SEQ_CST, \ + __ATOMIC_SEQ_CST); \ + __old; \ +}) + +#define cmpxchg_acquire(p, old, new) \ +({ \ + typeof(*(p)) __old = (old); \ + \ + __atomic_compare_exchange_n((p), &__old, new, false, \ + __ATOMIC_ACQUIRE, \ + __ATOMIC_ACQUIRE); \ + __old; \ +}) + +#define smp_mb__before_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_mb__after_atomic() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_wmb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_rmb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_mb() __atomic_thread_fence(__ATOMIC_SEQ_CST) +#define smp_read_barrier_depends() + +#define smp_store_mb(var, value) do { WRITE_ONCE(var, value); smp_mb(); } while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + smp_mb(); \ + ___p1; \ +}) + +#define smp_store_release(p, v) \ +do { \ + smp_mb(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +typedef struct { + int counter; +} atomic_t; + +static inline int atomic_read(const atomic_t *v) +{ + return __atomic_load_n(&v->counter, __ATOMIC_RELAXED); +} + +static inline void atomic_set(atomic_t *v, int i) +{ + __atomic_store_n(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline int atomic_add_return(int i, atomic_t *v) +{ + return __atomic_add_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline int atomic_sub_return(int i, atomic_t *v) +{ + return __atomic_sub_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline int atomic_add_negative(int i, atomic_t *v) +{ + return atomic_add_return(i, v) < 0; +} + +static inline void atomic_add(int i, atomic_t *v) +{ + atomic_add_return(i, v); +} + +static inline void atomic_sub(int i, atomic_t *v) +{ + atomic_sub_return(i, v); +} + +static inline void atomic_inc(atomic_t *v) +{ + atomic_add(1, v); +} + +static inline void atomic_dec(atomic_t *v) +{ + atomic_sub(1, v); +} + +#define atomic_dec_return(v) atomic_sub_return(1, (v)) +#define atomic_inc_return(v) atomic_add_return(1, (v)) + +#define atomic_sub_and_test(i, v) (atomic_sub_return((i), (v)) == 0) +#define atomic_dec_and_test(v) (atomic_dec_return(v) == 0) +#define atomic_inc_and_test(v) (atomic_inc_return(v) == 0) + +#define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) +#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) + +static inline int atomic_add_unless(atomic_t *v, int a, int u) +{ + int c, old; + c = atomic_read(v); + while (c != u && (old = atomic_cmpxchg(v, c, c + a)) != c) + c = old; + return c; +} + +#define atomic_inc_not_zero(v) atomic_add_unless((v), 1, 0) + +typedef struct { + long counter; +} atomic_long_t; + +static inline long atomic_long_read(const atomic_long_t *v) +{ + return __atomic_load_n(&v->counter, __ATOMIC_RELAXED); +} + +static inline void atomic_long_set(atomic_long_t *v, long i) +{ + __atomic_store_n(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline long atomic_long_add_return(long i, atomic_long_t *v) +{ + return __atomic_add_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline long atomic_long_sub_return(long i, atomic_long_t *v) +{ + return __atomic_sub_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline void atomic_long_add(long i, atomic_long_t *v) +{ + atomic_long_add_return(i, v); +} + +static inline void atomic_long_sub(long i, atomic_long_t *v) +{ + atomic_long_sub_return(i, v); +} + +static inline void atomic_long_inc(atomic_long_t *v) +{ + atomic_long_add(1, v); +} + +static inline void atomic_long_dec(atomic_long_t *v) +{ + atomic_long_sub(1, v); +} + +static inline long atomic_long_cmpxchg(atomic_long_t *v, long old, long new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline bool atomic_long_inc_not_zero(atomic_long_t *i) +{ + long old, v = atomic_long_read(i); + + do { + if (!(old = v)) + return false; + } while ((v = atomic_long_cmpxchg(i, old, old + 1)) != old); + + return true; +} + +#define atomic_long_sub_and_test(i, v) (atomic_long_sub_return((i), (v)) == 0) + +typedef struct { + u64 counter; +} atomic64_t; + +static inline s64 atomic64_read(const atomic64_t *v) +{ + return __atomic_load_n(&v->counter, __ATOMIC_RELAXED); +} + +static inline void atomic64_set(atomic64_t *v, s64 i) +{ + __atomic_store_n(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline s64 atomic64_add_return(s64 i, atomic64_t *v) +{ + return __atomic_add_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline s64 atomic64_sub_return(s64 i, atomic64_t *v) +{ + return __atomic_sub_fetch(&v->counter, i, __ATOMIC_RELAXED); +} + +static inline void atomic64_add(s64 i, atomic64_t *v) +{ + atomic64_add_return(i, v); +} + +static inline void atomic64_sub(s64 i, atomic64_t *v) +{ + atomic64_sub_return(i, v); +} + +static inline void atomic64_inc(atomic64_t *v) +{ + atomic64_add(1, v); +} + +static inline void atomic64_dec(atomic64_t *v) +{ + atomic64_sub(1, v); +} + +#define atomic64_dec_return(v) atomic64_sub_return(1, (v)) +#define atomic64_inc_return(v) atomic64_add_return(1, (v)) + +static inline s64 atomic64_cmpxchg(atomic64_t *v, s64 old, s64 new) +{ + return cmpxchg(&v->counter, old, new); +} + +static inline s64 atomic64_cmpxchg_acquire(atomic64_t *v, s64 old, s64 new) +{ + return cmpxchg_acquire(&v->counter, old, new); +} + +static inline s64 atomic64_add_return_release(s64 i, atomic64_t *v) +{ + return __atomic_add_fetch(&v->counter, i, __ATOMIC_RELEASE); +} + +#endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h new file mode 100644 index 0000000..a68fca4 --- /dev/null +++ b/include/linux/backing-dev.h @@ -0,0 +1,42 @@ +#ifndef _LINUX_BACKING_DEV_H +#define _LINUX_BACKING_DEV_H + +typedef int (congested_fn)(void *, int); + +enum wb_congested_state { + WB_async_congested, /* The async (write) queue is getting full */ + WB_sync_congested, /* The sync queue is getting full */ +}; + +struct backing_dev_info { + unsigned ra_pages; + unsigned capabilities; + + congested_fn *congested_fn; + void *congested_data; +}; + +#define BDI_CAP_NO_ACCT_DIRTY 0x00000001 +#define BDI_CAP_NO_WRITEBACK 0x00000002 +#define BDI_CAP_NO_ACCT_WB 0x00000004 +#define BDI_CAP_STABLE_WRITES 0x00000008 +#define BDI_CAP_STRICTLIMIT 0x00000010 +#define BDI_CAP_CGROUP_WRITEBACK 0x00000020 + +static inline int bdi_congested(struct backing_dev_info *bdi, int cong_bits) +{ + return 0; +} + +static inline int __must_check bdi_setup_and_register(struct backing_dev_info *bdi, + char *name) +{ + bdi->capabilities = 0; + return 0; +} + +static inline void bdi_destroy(struct backing_dev_info *bdi) {} + +#define VM_MAX_READAHEAD 128 /* kbytes */ + +#endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/bcache-ioctl.h b/include/linux/bcache-ioctl.h new file mode 100644 index 0000000..8ca2fdb --- /dev/null +++ b/include/linux/bcache-ioctl.h @@ -0,0 +1,86 @@ +#ifndef _LINUX_BCACHE_IOCTL_H +#define _LINUX_BCACHE_IOCTL_H + +#include <linux/bcache.h> +#include <linux/uuid.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* global control dev: */ + +#define BCH_FORCE_IF_DATA_MISSING (1 << 0) +#define BCH_FORCE_IF_METADATA_MISSING (1 << 1) + +#define BCH_IOCTL_ASSEMBLE _IOW('r', 1, struct bch_ioctl_assemble) +#define BCH_IOCTL_INCREMENTAL _IOW('r', 1, struct bch_ioctl_incremental) + +/* cache set control dev: */ + +#define BCH_IOCTL_RUN _IO('r', 2) +#define BCH_IOCTL_STOP _IO('r', 3) + +#define BCH_IOCTL_DISK_ADD _IOW('r', 4, struct bch_ioctl_disk_add) +#define BCH_IOCTL_DISK_REMOVE _IOW('r', 5, struct bch_ioctl_disk_remove) +#define BCH_IOCTL_DISK_FAIL _IOW('r', 6, struct bch_ioctl_disk_fail) + +#define BCH_IOCTL_DISK_REMOVE_BY_UUID \ + _IOW('r', 5, struct bch_ioctl_disk_remove_by_uuid) +#define BCH_IOCTL_DISK_FAIL_BY_UUID \ + _IOW('r', 6, struct bch_ioctl_disk_fail_by_uuid) + +#define BCH_IOCTL_QUERY_UUID _IOR('r', 6, struct bch_ioctl_query_uuid) + +struct bch_ioctl_assemble { + __u32 flags; + __u32 nr_devs; + __u64 pad; + __u64 devs[]; +}; + +struct bch_ioctl_incremental { + __u32 flags; + __u64 pad; + __u64 dev; +}; + +struct bch_ioctl_disk_add { + __u32 flags; + __u32 pad; + __u64 dev; +}; + +struct bch_ioctl_disk_remove { + __u32 flags; + __u32 pad; + __u64 dev; +}; + +struct bch_ioctl_disk_fail { + __u32 flags; + __u32 pad; + __u64 dev; +}; + +struct bch_ioctl_disk_remove_by_uuid { + __u32 flags; + __u32 pad; + uuid_le dev; +}; + +struct bch_ioctl_disk_fail_by_uuid { + __u32 flags; + __u32 pad; + uuid_le dev; +}; + +struct bch_ioctl_query_uuid { + uuid_le uuid; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _LINUX_BCACHE_IOCTL_H */ diff --git a/include/linux/bcache.h b/include/linux/bcache.h new file mode 100644 index 0000000..f09a44a --- /dev/null +++ b/include/linux/bcache.h @@ -0,0 +1,1284 @@ +#ifndef _LINUX_BCACHE_H +#define _LINUX_BCACHE_H + +/* + * Bcache on disk data structures + */ + +#ifdef __cplusplus +typedef bool _Bool; +extern "C" { +#endif + +#include <asm/types.h> +#include <asm/byteorder.h> +#include <linux/uuid.h> + +#define LE32_BITMASK(name, type, field, offset, end) \ +static const unsigned name##_OFFSET = offset; \ +static const unsigned name##_BITS = (end - offset); \ +static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ + \ +static inline __u64 name(const type *k) \ +{ \ + return (__le32_to_cpu(k->field) >> offset) & \ + ~(~0ULL << (end - offset)); \ +} \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + __u64 new = __le32_to_cpu(k->field); \ + \ + new &= ~(~(~0ULL << (end - offset)) << offset); \ + new |= (v & ~(~0ULL << (end - offset))) << offset; \ + k->field = __cpu_to_le32(new); \ +} + +#define LE64_BITMASK(name, type, field, offset, end) \ +static const unsigned name##_OFFSET = offset; \ +static const unsigned name##_BITS = (end - offset); \ +static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ + \ +static inline __u64 name(const type *k) \ +{ \ + return (__le64_to_cpu(k->field) >> offset) & \ + ~(~0ULL << (end - offset)); \ +} \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + __u64 new = __le64_to_cpu(k->field); \ + \ + new &= ~(~(~0ULL << (end - offset)) << offset); \ + new |= (v & ~(~0ULL << (end - offset))) << offset; \ + k->field = __cpu_to_le64(new); \ +} + +struct bkey_format { + __u8 key_u64s; + __u8 nr_fields; + /* One unused slot for now: */ + __u8 bits_per_field[6]; + __le64 field_offset[6]; +}; + +/* Btree keys - all units are in sectors */ + +struct bpos { + /* Word order matches machine byte order */ +#if defined(__LITTLE_ENDIAN) + __u32 snapshot; + __u64 offset; + __u64 inode; +#elif defined(__BIG_ENDIAN) + __u64 inode; + __u64 offset; /* Points to end of extent - sectors */ + __u32 snapshot; +#else +#error edit for your odd byteorder. +#endif +} __attribute__((packed, aligned(4))); + +#define KEY_INODE_MAX ((__u64)~0ULL) +#define KEY_OFFSET_MAX ((__u64)~0ULL) +#define KEY_SNAPSHOT_MAX ((__u32)~0U) + +static inline struct bpos POS(__u64 inode, __u64 offset) +{ + struct bpos ret; + + ret.inode = inode; + ret.offset = offset; + ret.snapshot = 0; + + return ret; +} + +#define POS_MIN POS(0, 0) +#define POS_MAX POS(KEY_INODE_MAX, KEY_OFFSET_MAX) + +/* Empty placeholder struct, for container_of() */ +struct bch_val { + __u64 __nothing[0]; +}; + +struct bkey { + __u64 _data[0]; + + /* Size of combined key and value, in u64s */ + __u8 u64s; + + /* Format of key (0 for format local to btree node) */ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 format:7, + needs_whiteout:1; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 needs_whiteout:1, + format:7; +#else +#error edit for your odd byteorder. +#endif + + /* Type of the value */ + __u8 type; + +#if defined(__LITTLE_ENDIAN) + __u8 pad[1]; + + __u32 version; + __u32 size; /* extent size, in sectors */ + struct bpos p; +#elif defined(__BIG_ENDIAN) + struct bpos p; + __u32 size; /* extent size, in sectors */ + __u32 version; + + __u8 pad[1]; +#endif +} __attribute__((packed, aligned(8))); + +struct bkey_packed { + __u64 _data[0]; + + /* Size of combined key and value, in u64s */ + __u8 u64s; + + /* Format of key (0 for format local to btree node) */ + + /* + * XXX: next incompat on disk format change, switch format and + * needs_whiteout - bkey_packed() will be cheaper if format is the high + * bits of the bitfield + */ +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u8 format:7, + needs_whiteout:1; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u8 needs_whiteout:1, + format:7; +#endif + + /* Type of the value */ + __u8 type; + __u8 key_start[0]; + + /* + * We copy bkeys with struct assignment in various places, and while + * that shouldn't be done with packed bkeys we can't disallow it in C, + * and it's legal to cast a bkey to a bkey_packed - so padding it out + * to the same size as struct bkey should hopefully be safest. + */ + __u8 pad[sizeof(struct bkey) - 3]; +} __attribute__((packed, aligned(8))); + +#define BKEY_U64s (sizeof(struct bkey) / sizeof(__u64)) +#define KEY_PACKED_BITS_START 24 + +#define KEY_SIZE_MAX ((__u32)~0U) + +#define KEY_FORMAT_LOCAL_BTREE 0 +#define KEY_FORMAT_CURRENT 1 + +enum bch_bkey_fields { + BKEY_FIELD_INODE, + BKEY_FIELD_OFFSET, + BKEY_FIELD_SNAPSHOT, + BKEY_FIELD_SIZE, + BKEY_FIELD_VERSION, + BKEY_NR_FIELDS, +}; + +#define bkey_format_field(name, field) \ + [BKEY_FIELD_##name] = (sizeof(((struct bkey *) NULL)->field) * 8) + +#define BKEY_FORMAT_CURRENT \ +((struct bkey_format) { \ + .key_u64s = BKEY_U64s, \ + .nr_fields = BKEY_NR_FIELDS, \ + .bits_per_field = { \ + bkey_format_field(INODE, p.inode), \ + bkey_format_field(OFFSET, p.offset), \ + bkey_format_field(SNAPSHOT, p.snapshot), \ + bkey_format_field(SIZE, size), \ + bkey_format_field(VERSION, version), \ + }, \ +}) + +/* bkey with inline value */ +struct bkey_i { + struct bkey k; + struct bch_val v; +}; + +#ifndef __cplusplus + +#define KEY(_inode, _offset, _size) \ +((struct bkey) { \ + .u64s = BKEY_U64s, \ + .format = KEY_FORMAT_CURRENT, \ + .p = POS(_inode, _offset), \ + .size = _size, \ +}) + +#else + +static inline struct bkey KEY(__u64 inode, __u64 offset, __u64 size) +{ + struct bkey ret; + + memset(&ret, 0, sizeof(ret)); + ret.u64s = BKEY_U64s; + ret.format = KEY_FORMAT_CURRENT; + ret.p.inode = inode; + ret.p.offset = offset; + ret.size = size; + + return ret; +} + +#endif + +static inline void bkey_init(struct bkey *k) +{ + *k = KEY(0, 0, 0); +} + +#define bkey_bytes(_k) ((_k)->u64s * sizeof(__u64)) + +#define __BKEY_PADDED(key, pad) \ + struct { struct bkey_i key; __u64 key ## _pad[pad]; } + +#define BKEY_VAL_TYPE(name, nr) \ +struct bkey_i_##name { \ + union { \ + struct bkey k; \ + struct bkey_i k_i; \ + }; \ + struct bch_##name v; \ +} + +/* + * - DELETED keys are used internally to mark keys that should be ignored but + * override keys in composition order. Their version number is ignored. + * + * - DISCARDED keys indicate that the data is all 0s because it has been + * discarded. DISCARDs may have a version; if the version is nonzero the key + * will be persistent, otherwise the key will be dropped whenever the btree + * node is rewritten (like DELETED keys). + * + * - ERROR: any read of the data returns a read error, as the data was lost due + * to a failing device. Like DISCARDED keys, they can be removed (overridden) + * by new writes or cluster-wide GC. Node repair can also overwrite them with + * the same or a more recent version number, but not with an older version + * number. +*/ +#define KEY_TYPE_DELETED 0 +#define KEY_TYPE_DISCARD 1 +#define KEY_TYPE_ERROR 2 +#define KEY_TYPE_COOKIE 3 +#define KEY_TYPE_PERSISTENT_DISCARD 4 +#define KEY_TYPE_GENERIC_NR 128 + +struct bch_cookie { + struct bch_val v; + __le64 cookie; +}; +BKEY_VAL_TYPE(cookie, KEY_TYPE_COOKIE); + +/* Extents */ + +/* + * In extent bkeys, the value is a list of pointers (bch_extent_ptr), optionally + * preceded by checksum/compression information (bch_extent_crc32 or + * bch_extent_crc64). + * + * One major determining factor in the format of extents is how we handle and + * represent extents that have been partially overwritten and thus trimmed: + * + * If an extent is not checksummed or compressed, when the extent is trimmed we + * don't have to remember the extent we originally allocated and wrote: we can + * merely adjust ptr->offset to point to the start of the start of the data that + * is currently live. The size field in struct bkey records the current (live) + * size of the extent, and is also used to mean "size of region on disk that we + * point to" in this case. + * + * Thus an extent that is not checksummed or compressed will consist only of a + * list of bch_extent_ptrs, with none of the fields in + * bch_extent_crc32/bch_extent_crc64. + * + * When an extent is checksummed or compressed, it's not possible to read only + * the data that is currently live: we have to read the entire extent that was + * originally written, and then return only the part of the extent that is + * currently live. + * + * Thus, in addition to the current size of the extent in struct bkey, we need + * to store the size of the originally allocated space - this is the + * compressed_size and uncompressed_size fields in bch_extent_crc32/64. Also, + * when the extent is trimmed, instead of modifying the offset field of the + * pointer, we keep a second smaller offset field - "offset into the original + * extent of the currently live region". + * + * The other major determining factor is replication and data migration: + * + * Each pointer may have its own bch_extent_crc32/64. When doing a replicated + * write, we will initially write all the replicas in the same format, with the + * same checksum type and compression format - however, when copygc runs later (or + * tiering/cache promotion, anything that moves data), it is not in general + * going to rewrite all the pointers at once - one of the replicas may be in a + * bucket on one device that has very little fragmentation while another lives + * in a bucket that has become heavily fragmented, and thus is being rewritten + * sooner than the rest. + * + * Thus it will only move a subset of the pointers (or in the case of + * tiering/cache promotion perhaps add a single pointer without dropping any + * current pointers), and if the extent has been partially overwritten it must + * write only the currently live portion (or copygc would not be able to reduce + * fragmentation!) - which necessitates a different bch_extent_crc format for + * the new pointer. + * + * But in the interests of space efficiency, we don't want to store one + * bch_extent_crc for each pointer if we don't have to. + * + * Thus, a bch_extent consists of bch_extent_crc32s, bch_extent_crc64s, and + * bch_extent_ptrs appended arbitrarily one after the other. We determine the + * type of a given entry with a scheme similar to utf8 (except we're encoding a + * type, not a size), encoding the type in the position of the first set bit: + * + * bch_extent_crc32 - 0b1 + * bch_extent_ptr - 0b10 + * bch_extent_crc64 - 0b100 + * + * We do it this way because bch_extent_crc32 is _very_ constrained on bits (and + * bch_extent_crc64 is the least constrained). + * + * Then, each bch_extent_crc32/64 applies to the pointers that follow after it, + * until the next bch_extent_crc32/64. + * + * If there are no bch_extent_crcs preceding a bch_extent_ptr, then that pointer + * is neither checksummed nor compressed. + */ + +enum bch_extent_entry_type { + BCH_EXTENT_ENTRY_crc32 = 0, + BCH_EXTENT_ENTRY_ptr = 1, + BCH_EXTENT_ENTRY_crc64 = 2, +}; + +#define BCH_EXTENT_ENTRY_MAX 3 + +struct bch_extent_crc32 { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u32 type:1, + offset:7, + compressed_size:8, + uncompressed_size:8, + csum_type:4, + compression_type:4; + __u32 csum; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u32 csum; + __u32 compression_type:4, + csum_type:4, + uncompressed_size:8, + compressed_size:8, + offset:7, + type:1; +#endif +} __attribute__((packed, aligned(8))); + +#define CRC32_EXTENT_SIZE_MAX (1U << 7) + +/* 64k */ +#define BCH_COMPRESSED_EXTENT_MAX 128U + +struct bch_extent_crc64 { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:3, + offset:17, + compressed_size:18, + uncompressed_size:18, + csum_type:4, + compression_type:4; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 compression_type:4, + csum_type:4, + uncompressed_size:18, + compressed_size:18, + offset:17, + type:3; +#endif + __u64 csum; +} __attribute__((packed, aligned(8))); + +#define CRC64_EXTENT_SIZE_MAX (1U << 17) + +/* + * @reservation - pointer hasn't been written to, just reserved + */ +struct bch_extent_ptr { +#if defined(__LITTLE_ENDIAN_BITFIELD) + __u64 type:2, + erasure_coded:1, + reservation:1, + offset:44, /* 8 petabytes */ + dev:8, + gen:8; +#elif defined (__BIG_ENDIAN_BITFIELD) + __u64 gen:8, + dev:8, + offset:44, + reservation:1, + erasure_coded:1, + type:2; +#endif +} __attribute__((packed, aligned(8))); + +union bch_extent_entry { +#if defined(__LITTLE_ENDIAN) || __BITS_PER_LONG == 64 + unsigned long type; +#elif __BITS_PER_LONG == 32 + struct { + unsigned long pad; + unsigned long type; + }; +#else +#error edit for your odd byteorder. +#endif + struct bch_extent_crc32 crc32; + struct bch_extent_crc64 crc64; + struct bch_extent_ptr ptr; +}; + +enum { + BCH_EXTENT = 128, + + /* + * This is kind of a hack, we're overloading the type for a boolean that + * really should be part of the value - BCH_EXTENT and BCH_EXTENT_CACHED + * have the same value type: + */ + BCH_EXTENT_CACHED = 129, + + /* + * Persistent reservation: + */ + BCH_RESERVATION = 130, +}; + +struct bch_extent { + struct bch_val v; + + union bch_extent_entry start[0]; + __u64 _data[0]; +} __attribute__((packed, aligned(8))); +BKEY_VAL_TYPE(extent, BCH_EXTENT); + +/* Maximum size (in u64s) a single pointer could be: */ +#define BKEY_EXTENT_PTR_U64s_MAX\ + ((sizeof(struct bch_extent_crc64) + \ + sizeof(struct bch_extent_ptr)) / sizeof(u64)) + +/* Maximum possible size of an entire extent value: */ +/* There's a hack in the keylist code that needs to be fixed.. */ +#define BKEY_EXTENT_VAL_U64s_MAX \ + (BKEY_EXTENT_PTR_U64s_MAX * BCH_REPLICAS_MAX) + +/* * Maximum possible size of an entire extent, key + value: */ +#define BKEY_EXTENT_U64s_MAX (BKEY_U64s + BKEY_EXTENT_VAL_U64s_MAX) + +/* Btree pointers don't carry around checksums: */ +#define BKEY_BTREE_PTR_VAL_U64s_MAX \ + ((sizeof(struct bch_extent_ptr)) / sizeof(u64) * BCH_REPLICAS_MAX) +#define BKEY_BTREE_PTR_U64s_MAX \ + (BKEY_U64s + BKEY_BTREE_PTR_VAL_U64s_MAX) + +/* Inodes */ + +#define BLOCKDEV_INODE_MAX 4096 + +#define BCACHE_ROOT_INO 4096 + +enum bch_inode_types { + BCH_INODE_FS = 128, + BCH_INODE_BLOCKDEV = 129, +}; + +struct bch_inode { + struct bch_val v; + + __le16 i_mode; + __le16 pad; + __le32 i_flags; + + /* Nanoseconds */ + __le64 i_atime; + __le64 i_ctime; + __le64 i_mtime; + + __le64 i_size; + __le64 i_sectors; + + __le32 i_uid; + __le32 i_gid; + __le32 i_nlink; + + __le32 i_dev; + + __le64 i_hash_seed; +} __attribute__((packed)); +BKEY_VAL_TYPE(inode, BCH_INODE_FS); + +enum { + /* + * User flags (get/settable with FS_IOC_*FLAGS, correspond to FS_*_FL + * flags) + */ + __BCH_INODE_SYNC = 0, + __BCH_INODE_IMMUTABLE = 1, + __BCH_INODE_APPEND = 2, + __BCH_INODE_NODUMP = 3, + __BCH_INODE_NOATIME = 4, + + __BCH_INODE_I_SIZE_DIRTY= 5, + __BCH_INODE_I_SECTORS_DIRTY= 6, + + /* not implemented yet: */ + __BCH_INODE_HAS_XATTRS = 7, /* has xattrs in xattr btree */ +}; + +LE32_BITMASK(INODE_STR_HASH_TYPE, struct bch_inode, i_flags, 28, 32); + +#define BCH_INODE_SYNC (1 << __BCH_INODE_SYNC) +#define BCH_INODE_IMMUTABLE (1 << __BCH_INODE_IMMUTABLE) +#define BCH_INODE_APPEND (1 << __BCH_INODE_APPEND) +#define BCH_INODE_NODUMP (1 << __BCH_INODE_NODUMP) +#define BCH_INODE_NOATIME (1 << __BCH_INODE_NOATIME) +#define BCH_INODE_I_SIZE_DIRTY (1 << __BCH_INODE_I_SIZE_DIRTY) +#define BCH_INODE_I_SECTORS_DIRTY (1 << __BCH_INODE_I_SECTORS_DIRTY) +#define BCH_INODE_HAS_XATTRS (1 << __BCH_INODE_HAS_XATTRS) + +struct bch_inode_blockdev { + struct bch_val v; + + __le64 i_size; + __le64 i_flags; + + /* Seconds: */ + __le64 i_ctime; + __le64 i_mtime; + + uuid_le i_uuid; + __u8 i_label[32]; +} __attribute__((packed, aligned(8))); +BKEY_VAL_TYPE(inode_blockdev, BCH_INODE_BLOCKDEV); + +/* Thin provisioned volume, or cache for another block device? */ +LE64_BITMASK(CACHED_DEV, struct bch_inode_blockdev, i_flags, 0, 1) +/* Dirents */ + +/* + * Dirents (and xattrs) have to implement string lookups; since our b-tree + * doesn't support arbitrary length strings for the key, we instead index by a + * 64 bit hash (currently truncated sha1) of the string, stored in the offset + * field of the key - using linear probing to resolve hash collisions. This also + * provides us with the readdir cookie posix requires. + * + * Linear probing requires us to use whiteouts for deletions, in the event of a + * collision: + */ + +enum { + BCH_DIRENT = 128, + BCH_DIRENT_WHITEOUT = 129, +}; + +struct bch_dirent { + struct bch_val v; + + /* Target inode number: */ + __le64 d_inum; + + /* + * Copy of mode bits 12-15 from the target inode - so userspace can get + * the filetype without having to do a stat() + */ + __u8 d_type; + + __u8 d_name[]; +} __attribute__((packed)); +BKEY_VAL_TYPE(dirent, BCH_DIRENT); + +/* Xattrs */ + +enum { + BCH_XATTR = 128, + BCH_XATTR_WHITEOUT = 129, +}; + +#define BCH_XATTR_INDEX_USER 0 +#define BCH_XATTR_INDEX_POSIX_ACL_ACCESS 1 +#define BCH_XATTR_INDEX_POSIX_ACL_DEFAULT 2 +#define BCH_XATTR_INDEX_TRUSTED 3 +#define BCH_XATTR_INDEX_SECURITY 4 + +struct bch_xattr { + struct bch_val v; + __u8 x_type; + __u8 x_name_len; + __le16 x_val_len; + __u8 x_name[]; +} __attribute__((packed)); +BKEY_VAL_TYPE(xattr, BCH_XATTR); + +/* Superblock */ + +/* Version 0: Cache device + * Version 1: Backing device + * Version 2: Seed pointer into btree node checksum + * Version 3: Cache device with new UUID format + * Version 4: Backing device with data offset + * Version 5: All the incompat changes + * Version 6: Cache device UUIDs all in superblock, another incompat bset change + */ +#define BCACHE_SB_VERSION_CDEV_V0 0 +#define BCACHE_SB_VERSION_BDEV 1 +#define BCACHE_SB_VERSION_CDEV_WITH_UUID 3 +#define BCACHE_SB_VERSION_BDEV_WITH_OFFSET 4 +#define BCACHE_SB_VERSION_CDEV_V2 5 +#define BCACHE_SB_VERSION_CDEV_V3 6 +#define BCACHE_SB_VERSION_CDEV 6 +#define BCACHE_SB_MAX_VERSION 6 + +#define SB_SECTOR 8 +#define SB_LABEL_SIZE 32 +#define MAX_CACHES_PER_SET 64 + +#define BDEV_DATA_START_DEFAULT 16 /* sectors */ + +struct cache_member { + uuid_le uuid; + __le64 nbuckets; /* device size */ + __le16 first_bucket; /* index of first bucket used */ + __le16 bucket_size; /* sectors */ + __le32 pad; + __le64 last_mount; /* time_t */ + + __le64 f1; + __le64 f2; +}; + +LE64_BITMASK(CACHE_STATE, struct cache_member, f1, 0, 4) +#define CACHE_ACTIVE 0U +#define CACHE_RO 1U +#define CACHE_FAILED 2U +#define CACHE_SPARE 3U +#define CACHE_STATE_NR 4U + +LE64_BITMASK(CACHE_TIER, struct cache_member, f1, 4, 8) +#define CACHE_TIERS 4U + +LE64_BITMASK(CACHE_REPLICATION_SET, struct cache_member, f1, 8, 16) + +LE64_BITMASK(CACHE_HAS_METADATA, struct cache_member, f1, 24, 25) +LE64_BITMASK(CACHE_HAS_DATA, struct cache_member, f1, 25, 26) + +LE64_BITMASK(CACHE_REPLACEMENT, struct cache_member, f1, 26, 30) +#define CACHE_REPLACEMENT_LRU 0U +#define CACHE_REPLACEMENT_FIFO 1U +#define CACHE_REPLACEMENT_RANDOM 2U +#define CACHE_REPLACEMENT_NR 3U + +LE64_BITMASK(CACHE_DISCARD, struct cache_member, f1, 30, 31); + +LE64_BITMASK(CACHE_NR_READ_ERRORS, struct cache_member, f2, 0, 20); +LE64_BITMASK(CACHE_NR_WRITE_ERRORS, struct cache_member, f2, 20, 40); + +struct cache_sb { + __le64 csum; + __le64 offset; /* sector where this sb was written */ + __le64 version; /* of on disk format */ + + uuid_le magic; /* bcache superblock UUID */ + + /* Identifies this disk within the cache set: */ + uuid_le disk_uuid; + + /* + * Internal cache set UUID - xored with various magic numbers and thus + * must never change: + */ + union { + uuid_le set_uuid; + __le64 set_magic; + }; + + __u8 label[SB_LABEL_SIZE]; + + __le64 flags; + + /* Incremented each time superblock is written: */ + __le64 seq; + + /* + * User visible UUID for identifying the cache set the user is allowed + * to change: + */ + uuid_le user_uuid; + + __le64 flags2; + __le64 pad1[5]; + + /* Number of cache_member entries: */ + __u8 nr_in_set; + + /* + * Index of this device - for PTR_DEV(), and also this device's + * slot in the cache_member array: + */ + __u8 nr_this_dev; + __le16 pad2[3]; + + __le16 block_size; /* sectors */ + __le16 pad3[6]; + + __le16 u64s; /* size of variable length portion */ + + union { + struct cache_member members[0]; + /* + * Journal buckets also in the variable length portion, after + * the member info: + */ + __le64 _data[0]; + }; +}; + +/* XXX: rename CACHE_SET -> BCH_FS or something? */ + +LE64_BITMASK(CACHE_SET_SYNC, struct cache_sb, flags, 0, 1); + +LE64_BITMASK(CACHE_SET_ERROR_ACTION, struct cache_sb, flags, 1, 4); +#define BCH_ON_ERROR_CONTINUE 0U +#define BCH_ON_ERROR_RO 1U +#define BCH_ON_ERROR_PANIC 2U +#define BCH_NR_ERROR_ACTIONS 3U + +LE64_BITMASK(CACHE_SET_META_REPLICAS_WANT,struct cache_sb, flags, 4, 8); +LE64_BITMASK(CACHE_SET_DATA_REPLICAS_WANT,struct cache_sb, flags, 8, 12); + +#define BCH_REPLICAS_MAX 4U + +LE64_BITMASK(CACHE_SB_CSUM_TYPE, struct cache_sb, flags, 12, 16); + +LE64_BITMASK(CACHE_SET_META_PREFERRED_CSUM_TYPE,struct cache_sb, flags, 16, 20); +#define BCH_CSUM_NONE 0U +#define BCH_CSUM_CRC32C 1U +#define BCH_CSUM_CRC64 2U +#define BCH_CSUM_NR 3U + +LE64_BITMASK(CACHE_SET_BTREE_NODE_SIZE, struct cache_sb, flags, 20, 36); + +LE64_BITMASK(CACHE_SET_META_REPLICAS_HAVE,struct cache_sb, flags, 36, 40); +LE64_BITMASK(CACHE_SET_DATA_REPLICAS_HAVE,struct cache_sb, flags, 40, 44); + +LE64_BITMASK(CACHE_SET_STR_HASH_TYPE,struct cache_sb, flags, 44, 48); +enum bch_str_hash_type { + BCH_STR_HASH_CRC32C = 0, + BCH_STR_HASH_CRC64 = 1, + BCH_STR_HASH_SIPHASH = 2, + BCH_STR_HASH_SHA1 = 3, +}; + +#define BCH_STR_HASH_NR 4 + +LE64_BITMASK(CACHE_SET_DATA_PREFERRED_CSUM_TYPE, struct cache_sb, flags, 48, 52); + +LE64_BITMASK(CACHE_SET_COMPRESSION_TYPE, struct cache_sb, flags, 52, 56); +enum { + BCH_COMPRESSION_NONE = 0, + BCH_COMPRESSION_LZ4 = 1, + BCH_COMPRESSION_GZIP = 2, +}; + +#define BCH_COMPRESSION_NR 3U + +/* Limit inode numbers to 32 bits: */ +LE64_BITMASK(CACHE_INODE_32BIT, struct cache_sb, flags, 56, 57); + +LE64_BITMASK(CACHE_SET_GC_RESERVE, struct cache_sb, flags, 57, 63); + +LE64_BITMASK(CACHE_SET_ROOT_RESERVE, struct cache_sb, flags2, 0, 6); + +/* + * Did we shut down cleanly? Just a hint, doesn't affect behaviour of + * mount/recovery path: + */ +LE64_BITMASK(CACHE_SET_CLEAN, struct cache_sb, flags2, 6, 7); + +LE64_BITMASK(CACHE_SET_JOURNAL_ENTRY_SIZE, struct cache_sb, flags2, 7, 15); + +/* options: */ + +/** + * CACHE_SET_OPT(name, choices, min, max, sb_option, sysfs_writeable) + * + * @name - name of mount option, sysfs attribute, and struct cache_set_opts + * member + * + * @choices - array of strings that the user can select from - option is by + * array index + * + * Booleans are special cased; if @choices is bch_bool_opt the mount + * options name and noname will work as expected. + * + * @min, @max + * + * @sb_option - name of corresponding superblock option + * + * @sysfs_writeable - if true, option will be modifiable at runtime via sysfs + */ + +#define CACHE_SET_SB_OPTS() \ + CACHE_SET_OPT(errors, \ + bch_error_actions, \ + 0, BCH_NR_ERROR_ACTIONS, \ + CACHE_SET_ERROR_ACTION, \ + true) \ + CACHE_SET_OPT(metadata_replicas, \ + bch_uint_opt, \ + 0, BCH_REPLICAS_MAX, \ + CACHE_SET_META_REPLICAS_WANT, \ + false) \ + CACHE_SET_OPT(data_replicas, \ + bch_uint_opt, \ + 0, BCH_REPLICAS_MAX, \ + CACHE_SET_DATA_REPLICAS_WANT, \ + false) \ + CACHE_SET_OPT(metadata_checksum, \ + bch_csum_types, \ + 0, BCH_CSUM_NR, \ + CACHE_SET_META_PREFERRED_CSUM_TYPE, \ + true) \ + CACHE_SET_OPT(data_checksum, \ + bch_csum_types, \ + 0, BCH_CSUM_NR, \ + CACHE_SET_DATA_PREFERRED_CSUM_TYPE, \ + true) \ + CACHE_SET_OPT(compression, \ + bch_compression_types, \ + 0, BCH_COMPRESSION_NR, \ + CACHE_SET_COMPRESSION_TYPE, \ + true) \ + CACHE_SET_OPT(str_hash, \ + bch_str_hash_types, \ + 0, BCH_STR_HASH_NR, \ + CACHE_SET_STR_HASH_TYPE, \ + true) \ + CACHE_SET_OPT(inodes_32bit, \ + bch_bool_opt, 0, 2, \ + CACHE_INODE_32BIT, \ + true) \ + CACHE_SET_OPT(gc_reserve_percent, \ + bch_uint_opt, \ + 5, 21, \ + CACHE_SET_GC_RESERVE, \ + false) \ + CACHE_SET_OPT(root_reserve_percent, \ + bch_uint_opt, \ + 0, 21, \ + CACHE_SET_ROOT_RESERVE, \ + false) + +/* backing device specific stuff: */ + +struct backingdev_sb { + __le64 csum; + __le64 offset; /* sector where this sb was written */ + __le64 version; /* of on disk format */ + + uuid_le magic; /* bcache superblock UUID */ + + uuid_le disk_uuid; + + /* + * Internal cache set UUID - xored with various magic numbers and thus + * must never change: + */ + union { + uuid_le set_uuid; + __le64 set_magic; + }; + __u8 label[SB_LABEL_SIZE]; + + __le64 flags; + + /* Incremented each time superblock is written: */ + __le64 seq; + + /* + * User visible UUID for identifying the cache set the user is allowed + * to change: + * + * XXX hooked up? + */ + uuid_le user_uuid; + __le64 pad1[6]; + + __le64 data_offset; + __le16 block_size; /* sectors */ + __le16 pad2[3]; + + __le32 last_mount; /* time_t */ + __le16 pad3; + /* size of variable length portion - always 0 for backingdev superblock */ + __le16 u64s; + __u64 _data[0]; +}; + +LE64_BITMASK(BDEV_CACHE_MODE, struct backingdev_sb, flags, 0, 4); +#define CACHE_MODE_WRITETHROUGH 0U +#define CACHE_MODE_WRITEBACK 1U +#define CACHE_MODE_WRITEAROUND 2U +#define CACHE_MODE_NONE 3U + +LE64_BITMASK(BDEV_STATE, struct backingdev_sb, flags, 61, 63); +#define BDEV_STATE_NONE 0U +#define BDEV_STATE_CLEAN 1U +#define BDEV_STATE_DIRTY 2U +#define BDEV_STATE_STALE 3U + +static inline unsigned bch_journal_buckets_offset(struct cache_sb *sb) +{ + return sb->nr_in_set * (sizeof(struct cache_member) / sizeof(__u64)); +} + +static inline unsigned bch_nr_journal_buckets(struct cache_sb *sb) +{ + return __le16_to_cpu(sb->u64s) - bch_journal_buckets_offset(sb); +} + +static inline _Bool __SB_IS_BDEV(__u64 version) +{ + return version == BCACHE_SB_VERSION_BDEV + || version == BCACHE_SB_VERSION_BDEV_WITH_OFFSET; +} + +static inline _Bool SB_IS_BDEV(const struct cache_sb *sb) +{ + return __SB_IS_BDEV(sb->version); +} + +/* + * Magic numbers + * + * The various other data structures have their own magic numbers, which are + * xored with the first part of the cache set's UUID + */ + +#define BCACHE_MAGIC \ + UUID_LE(0xf67385c6, 0x1a4e, 0xca45, \ + 0x82, 0x65, 0xf5, 0x7f, 0x48, 0xba, 0x6d, 0x81) + +#define BCACHE_STATFS_MAGIC 0xca451a4e + +#define BCACHE_SB_MAGIC 0xca451a4ef67385c6ULL +#define BCACHE_SB_MAGIC2 0x816dba487ff56582ULL +#define JSET_MAGIC 0x245235c1a3625032ULL +#define PSET_MAGIC 0x6750e15f87337f91ULL +#define BSET_MAGIC 0x90135c78b99e07f5ULL + +static inline __u64 jset_magic(struct cache_sb *sb) +{ + return __le64_to_cpu(sb->set_magic) ^ JSET_MAGIC; +} + +static inline __u64 pset_magic(struct cache_sb *sb) +{ + return __le64_to_cpu(sb->set_magic) ^ PSET_MAGIC; +} + +static inline __u64 bset_magic(struct cache_sb *sb) +{ + return __le64_to_cpu(sb->set_magic) ^ BSET_MAGIC; +} + +/* Journal */ + + +#define BCACHE_JSET_VERSION_UUIDv1 1 +#define BCACHE_JSET_VERSION_UUID 1 /* Always latest UUID format */ +#define BCACHE_JSET_VERSION_JKEYS 2 +#define BCACHE_JSET_VERSION 2 + +struct jset_entry { + __le16 u64s; + __u8 btree_id; + __u8 level; + __le32 flags; /* designates what this jset holds */ + + union { + struct bkey_i start[0]; + __u64 _data[0]; + }; +}; + +#define JSET_KEYS_U64s (sizeof(struct jset_entry) / sizeof(__u64)) + +LE32_BITMASK(JOURNAL_ENTRY_TYPE, struct jset_entry, flags, 0, 8); +enum { + JOURNAL_ENTRY_BTREE_KEYS = 0, + JOURNAL_ENTRY_BTREE_ROOT = 1, + JOURNAL_ENTRY_PRIO_PTRS = 2, + + /* + * Journal sequence numbers can be blacklisted: bsets record the max + * sequence number of all the journal entries they contain updates for, + * so that on recovery we can ignore those bsets that contain index + * updates newer that what made it into the journal. + * + * This means that we can't reuse that journal_seq - we have to skip it, + * and then record that we skipped it so that the next time we crash and + * recover we don't think there was a missing journal entry. + */ + JOURNAL_ENTRY_JOURNAL_SEQ_BLACKLISTED = 3, +}; + +/* + * On disk format for a journal entry: + * seq is monotonically increasing; every journal entry has its own unique + * sequence number. + * + * last_seq is the oldest journal entry that still has keys the btree hasn't + * flushed to disk yet. + * + * version is for on disk format changes. + */ +struct jset { + __le64 csum; + __le64 magic; + __le32 version; + __le32 flags; + + /* Sequence number of oldest dirty journal entry */ + __le64 seq; + __le64 last_seq; + + __le16 read_clock; + __le16 write_clock; + __le32 u64s; /* size of d[] in u64s */ + + union { + struct jset_entry start[0]; + __u64 _data[0]; + }; +}; + +LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4); +LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5); + +#define BCH_JOURNAL_BUCKETS_MIN 20 + +/* Bucket prios/gens */ + +struct prio_set { + __le64 csum; + __le64 magic; + __le32 version; + __le32 flags; + + __le64 next_bucket; + + struct bucket_disk { + __le16 read_prio; + __le16 write_prio; + __u8 gen; + } __attribute__((packed)) data[]; +}; + +LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4); + +/* Btree: */ + +#define DEFINE_BCH_BTREE_IDS() \ + DEF_BTREE_ID(EXTENTS, 0, "extents") \ + DEF_BTREE_ID(INODES, 1, "inodes") \ + DEF_BTREE_ID(DIRENTS, 2, "dirents") \ + DEF_BTREE_ID(XATTRS, 3, "xattrs") + +#define DEF_BTREE_ID(kwd, val, name) BTREE_ID_##kwd = val, + +enum btree_id { + DEFINE_BCH_BTREE_IDS() + BTREE_ID_NR +}; + +#undef DEF_BTREE_ID + +#define BTREE_MAX_DEPTH 4U + +/* Btree nodes */ + +/* Version 1: Seed pointer into btree node checksum + */ +#define BCACHE_BSET_CSUM 1 +#define BCACHE_BSET_KEY_v1 2 +#define BCACHE_BSET_JOURNAL_SEQ 3 +#define BCACHE_BSET_VERSION 3 + +/* + * Btree nodes + * + * On disk a btree node is a list/log of these; within each set the keys are + * sorted + */ +struct bset { + __le64 seq; + + /* + * Highest journal entry this bset contains keys for. + * If on recovery we don't see that journal entry, this bset is ignored: + * this allows us to preserve the order of all index updates after a + * crash, since the journal records a total order of all index updates + * and anything that didn't make it to the journal doesn't get used. + */ + __le64 journal_seq; + + __le32 flags; + __le16 version; + __le16 u64s; /* count of d[] in u64s */ + + union { + struct bkey_packed start[0]; + __u64 _data[0]; + }; +} __attribute__((packed)); + +LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4); + +/* Only used in first bset */ +LE32_BITMASK(BSET_BTREE_LEVEL, struct bset, flags, 4, 8); + +LE32_BITMASK(BSET_BIG_ENDIAN, struct bset, flags, 8, 9); +LE32_BITMASK(BSET_SEPARATE_WHITEOUTS, + struct bset, flags, 9, 10); + +struct btree_node { + __le64 csum; + __le64 magic; + + /* Closed interval: */ + struct bpos min_key; + struct bpos max_key; + struct bkey_format format; + + struct bset keys; +} __attribute__((packed)); + +struct btree_node_entry { + __le64 csum; + struct bset keys; +} __attribute__((packed)); + +/* OBSOLETE */ + +#define BITMASK(name, type, field, offset, end) \ +static const unsigned name##_OFFSET = offset; \ +static const unsigned name##_BITS = (end - offset); \ +static const __u64 name##_MAX = (1ULL << (end - offset)) - 1; \ + \ +static inline __u64 name(const type *k) \ +{ return (k->field >> offset) & ~(~0ULL << (end - offset)); } \ + \ +static inline void SET_##name(type *k, __u64 v) \ +{ \ + k->field &= ~(~(~0ULL << (end - offset)) << offset); \ + k->field |= (v & ~(~0ULL << (end - offset))) << offset; \ +} + +struct bkey_v0 { + __u64 high; + __u64 low; + __u64 ptr[]; +}; + +#define KEY0_FIELD(name, field, offset, size) \ + BITMASK(name, struct bkey_v0, field, offset, size) + +KEY0_FIELD(KEY0_PTRS, high, 60, 63) +KEY0_FIELD(KEY0_CSUM, high, 56, 58) +KEY0_FIELD(KEY0_DIRTY, high, 36, 37) + +KEY0_FIELD(KEY0_SIZE, high, 20, 36) +KEY0_FIELD(KEY0_INODE, high, 0, 20) + +static inline unsigned long bkey_v0_u64s(const struct bkey_v0 *k) +{ + return (sizeof(struct bkey_v0) / sizeof(__u64)) + KEY0_PTRS(k); +} + +static inline struct bkey_v0 *bkey_v0_next(const struct bkey_v0 *k) +{ + __u64 *d = (__u64 *) k; + + return (struct bkey_v0 *) (d + bkey_v0_u64s(k)); +} + +struct jset_v0 { + __u64 csum; + __u64 magic; + __u64 seq; + __u32 version; + __u32 keys; + + __u64 last_seq; + + __BKEY_PADDED(uuid_bucket, 4); + __BKEY_PADDED(btree_root, 4); + __u16 btree_level; + __u16 pad[3]; + + __u64 prio_bucket[MAX_CACHES_PER_SET]; + + union { + struct bkey start[0]; + __u64 d[0]; + }; +}; + +/* UUIDS - per backing device/flash only volume metadata */ + +struct uuid_entry_v0 { + uuid_le uuid; + __u8 label[32]; + __u32 first_reg; + __u32 last_reg; + __u32 invalidated; + __u32 pad; +}; + +struct uuid_entry { + union { + struct { + uuid_le uuid; + __u8 label[32]; + __u32 first_reg; + __u32 last_reg; + __u32 invalidated; + + __u32 flags; + /* Size of flash only volumes */ + __u64 sectors; + }; + + __u8 pad[128]; + }; +}; + +BITMASK(UUID_FLASH_ONLY, struct uuid_entry, flags, 0, 1); + +#ifdef __cplusplus +} +#endif +#endif /* _LINUX_BCACHE_H */ + +/* vim: set foldnestmax=2: */ diff --git a/include/linux/bio.h b/include/linux/bio.h new file mode 100644 index 0000000..94e9048 --- /dev/null +++ b/include/linux/bio.h @@ -0,0 +1,461 @@ +/* + * 2.5 block I/O model + * + * Copyright (C) 2001 Jens Axboe <axboe@suse.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BIO_H +#define __LINUX_BIO_H + +#include <linux/mempool.h> +#include <linux/bug.h> +#include <linux/err.h> + +#include <linux/blkdev.h> +#include <linux/blk_types.h> +#include <linux/workqueue.h> + +#define bio_prio(bio) (bio)->bi_ioprio +#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) + +#define bio_iter_iovec(bio, iter) \ + bvec_iter_bvec((bio)->bi_io_vec, (iter)) + +#define bio_iter_page(bio, iter) \ + bvec_iter_page((bio)->bi_io_vec, (iter)) +#define bio_iter_len(bio, iter) \ + bvec_iter_len((bio)->bi_io_vec, (iter)) +#define bio_iter_offset(bio, iter) \ + bvec_iter_offset((bio)->bi_io_vec, (iter)) + +#define bio_page(bio) bio_iter_page((bio), (bio)->bi_iter) +#define bio_offset(bio) bio_iter_offset((bio), (bio)->bi_iter) +#define bio_iovec(bio) bio_iter_iovec((bio), (bio)->bi_iter) + +#define bio_multiple_segments(bio) \ + ((bio)->bi_iter.bi_size != bio_iovec(bio).bv_len) + +#define bvec_iter_sectors(iter) ((iter).bi_size >> 9) +#define bvec_iter_end_sector(iter) ((iter).bi_sector + bvec_iter_sectors((iter))) + +#define bio_sectors(bio) bvec_iter_sectors((bio)->bi_iter) +#define bio_end_sector(bio) bvec_iter_end_sector((bio)->bi_iter) + +static inline bool bio_has_data(struct bio *bio) +{ + if (bio && + bio->bi_iter.bi_size && + bio_op(bio) != REQ_OP_DISCARD && + bio_op(bio) != REQ_OP_SECURE_ERASE) + return true; + + return false; +} + +static inline bool bio_no_advance_iter(struct bio *bio) +{ + return bio_op(bio) == REQ_OP_DISCARD || + bio_op(bio) == REQ_OP_SECURE_ERASE || + bio_op(bio) == REQ_OP_WRITE_SAME; +} + +static inline bool bio_is_rw(struct bio *bio) +{ + if (!bio_has_data(bio)) + return false; + + if (bio_no_advance_iter(bio)) + return false; + + return true; +} + +static inline bool bio_mergeable(struct bio *bio) +{ + if (bio->bi_opf & REQ_NOMERGE_FLAGS) + return false; + + return true; +} + +static inline unsigned int bio_cur_bytes(struct bio *bio) +{ + if (bio_has_data(bio)) + return bio_iovec(bio).bv_len; + else /* dataless requests such as discard */ + return bio->bi_iter.bi_size; +} + +static inline void *bio_data(struct bio *bio) +{ + if (bio_has_data(bio)) + return page_address(bio_page(bio)) + bio_offset(bio); + + return NULL; +} + +#define __bio_kmap_atomic(bio, iter) \ + (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \ + bio_iter_iovec((bio), (iter)).bv_offset) + +#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) + +#define bio_for_each_segment_all(bvl, bio, i) \ + for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++) + +static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, + unsigned bytes) +{ + iter->bi_sector += bytes >> 9; + + if (bio_no_advance_iter(bio)) + iter->bi_size -= bytes; + else + bvec_iter_advance(bio->bi_io_vec, iter, bytes); +} + +#define __bio_for_each_segment(bvl, bio, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bio_iter_iovec((bio), (iter))), 1); \ + bio_advance_iter((bio), &(iter), (bvl).bv_len)) + +#define bio_for_each_segment(bvl, bio, iter) \ + __bio_for_each_segment(bvl, bio, iter, (bio)->bi_iter) + +#define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) + +static inline unsigned bio_segments(struct bio *bio) +{ + unsigned segs = 0; + struct bio_vec bv; + struct bvec_iter iter; + + /* + * We special case discard/write same, because they interpret bi_size + * differently: + */ + + if (bio_op(bio) == REQ_OP_DISCARD) + return 1; + + if (bio_op(bio) == REQ_OP_SECURE_ERASE) + return 1; + + if (bio_op(bio) == REQ_OP_WRITE_SAME) + return 1; + + bio_for_each_segment(bv, bio, iter) + segs++; + + return segs; +} + +static inline void bio_get(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_REFFED); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_cnt); +} + +static inline bool bio_flagged(struct bio *bio, unsigned int bit) +{ + return (bio->bi_flags & (1U << bit)) != 0; +} + +static inline void bio_set_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags |= (1U << bit); +} + +static inline void bio_clear_flag(struct bio *bio, unsigned int bit) +{ + bio->bi_flags &= ~(1U << bit); +} + +static inline void bio_get_first_bvec(struct bio *bio, struct bio_vec *bv) +{ + *bv = bio_iovec(bio); +} + +static inline void bio_get_last_bvec(struct bio *bio, struct bio_vec *bv) +{ + struct bvec_iter iter = bio->bi_iter; + int idx; + + if (unlikely(!bio_multiple_segments(bio))) { + *bv = bio_iovec(bio); + return; + } + + bio_advance_iter(bio, &iter, iter.bi_size); + + if (!iter.bi_bvec_done) + idx = iter.bi_idx - 1; + else /* in the middle of bvec */ + idx = iter.bi_idx; + + *bv = bio->bi_io_vec[idx]; + + /* + * iter.bi_bvec_done records actual length of the last bvec + * if this bio ends in the middle of one io vector + */ + if (iter.bi_bvec_done) + bv->bv_len = iter.bi_bvec_done; +} + +extern struct bio *bio_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs); + +static inline struct bio *bio_next_split(struct bio *bio, int sectors, + gfp_t gfp, struct bio_set *bs) +{ + if (sectors >= bio_sectors(bio)) + return bio; + + return bio_split(bio, sectors, gfp, bs); +} + +struct bio_set { + unsigned int front_pad; +}; + +static inline void bioset_exit(struct bio_set *bs) {} + +static inline void bioset_free(struct bio_set *bs) +{ + kfree(bs); +} + +static inline int bioset_init(struct bio_set *bs, + unsigned pool_size, + unsigned front_pad) +{ + bs->front_pad = front_pad; + return 0; +} + +extern struct bio_set *bioset_create(unsigned int, unsigned int); +extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); + +extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); +extern void bio_put(struct bio *); + +extern void __bio_clone_fast(struct bio *, struct bio *); +extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); +extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); + +static inline struct bio *bio_kmalloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); +} + +static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, NULL); + +} + +extern void bio_endio(struct bio *); +extern void bio_endio_nodec(struct bio *); + +static inline void bio_io_error(struct bio *bio) +{ + bio->bi_error = -EIO; + bio_endio(bio); +} + +extern void bio_advance(struct bio *, unsigned); + +extern void bio_reset(struct bio *); +void bio_chain(struct bio *, struct bio *); + +static inline void bio_flush_dcache_pages(struct bio *bi) +{ +} + +extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter dst_iter, + struct bio *src, struct bvec_iter src_iter); +extern void bio_copy_data(struct bio *dst, struct bio *src); +extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); + +void zero_fill_bio_iter(struct bio *bio, struct bvec_iter iter); + +static inline void zero_fill_bio(struct bio *bio) +{ + zero_fill_bio_iter(bio, bio->bi_iter); +} + +static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) +{ + return page_address(bvec->bv_page) + bvec->bv_offset; +} + +static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) +{ + *flags = 0; +} + +static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, + unsigned long *flags) +{ + return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags); +} +#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) + +#define bio_kmap_irq(bio, flags) \ + __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) +#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) + +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define BIO_EMPTY_LIST { NULL, NULL } + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_peek(struct bio_list *bl) +{ + return bl->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio_set_flag(bio, BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + +static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) +{ + return bio_alloc_bioset(gfp_mask, nr_iovecs, NULL); +} + +static inline struct bio *bio_clone(struct bio *bio, gfp_t gfp_mask) +{ + return bio_clone_bioset(bio, gfp_mask, NULL); +} + +static inline void bio_init(struct bio *bio) +{ + memset(bio, 0, sizeof(*bio)); + atomic_set(&bio->__bi_remaining, 1); + atomic_set(&bio->__bi_cnt, 1); +} + +#endif /* __LINUX_BIO_H */ diff --git a/include/linux/bit_spinlock.h b/include/linux/bit_spinlock.h new file mode 100644 index 0000000..0e88820 --- /dev/null +++ b/include/linux/bit_spinlock.h @@ -0,0 +1,41 @@ +#ifndef __LINUX_BIT_SPINLOCK_H +#define __LINUX_BIT_SPINLOCK_H + +#include <linux/kernel.h> +#include <linux/preempt.h> +#include <linux/atomic.h> +#include <linux/bug.h> + +static inline void bit_spin_lock(int bitnum, unsigned long *addr) +{ + while (unlikely(test_and_set_bit_lock(bitnum, addr))) { + do { + cpu_relax(); + } while (test_bit(bitnum, addr)); + } +} + +static inline int bit_spin_trylock(int bitnum, unsigned long *addr) +{ + return !test_and_set_bit_lock(bitnum, addr); +} + +static inline void bit_spin_unlock(int bitnum, unsigned long *addr) +{ + BUG_ON(!test_bit(bitnum, addr)); + + clear_bit_unlock(bitnum, addr); +} + +static inline void __bit_spin_unlock(int bitnum, unsigned long *addr) +{ + bit_spin_unlock(bitnum, addr); +} + +static inline int bit_spin_is_locked(int bitnum, unsigned long *addr) +{ + return test_bit(bitnum, addr); +} + +#endif /* __LINUX_BIT_SPINLOCK_H */ + diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h new file mode 100644 index 0000000..3baa61e --- /dev/null +++ b/include/linux/bitmap.h @@ -0,0 +1,132 @@ +#ifndef _PERF_BITOPS_H +#define _PERF_BITOPS_H + +#include <string.h> +#include <linux/bitops.h> +#include <stdlib.h> + +#define DECLARE_BITMAP(name,bits) \ + unsigned long name[BITS_TO_LONGS(bits)] + +int __bitmap_weight(const unsigned long *bitmap, int bits); +void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, int bits); +int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits); + +#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) + +#define BITMAP_LAST_WORD_MASK(nbits) \ +( \ + ((nbits) % BITS_PER_LONG) ? \ + (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \ +) + +#define small_const_nbits(nbits) \ + (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG) + +static inline void bitmap_zero(unsigned long *dst, int nbits) +{ + memset(dst, 0, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); +} + +static inline int bitmap_weight(const unsigned long *src, int nbits) +{ + if (small_const_nbits(nbits)) + return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits)); + return __bitmap_weight(src, nbits); +} + +static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, int nbits) +{ + if (small_const_nbits(nbits)) + *dst = *src1 | *src2; + else + __bitmap_or(dst, src1, src2, nbits); +} + +/** + * bitmap_alloc - Allocate bitmap + * @nr: Bit to set + */ +static inline unsigned long *bitmap_alloc(int nbits) +{ + return calloc(1, BITS_TO_LONGS(nbits) * sizeof(unsigned long)); +} + +/* + * bitmap_scnprintf - print bitmap list into buffer + * @bitmap: bitmap + * @nbits: size of bitmap + * @buf: buffer to store output + * @size: size of @buf + */ +size_t bitmap_scnprintf(unsigned long *bitmap, int nbits, + char *buf, size_t size); + +/** + * bitmap_and - Do logical and on bitmaps + * @dst: resulting bitmap + * @src1: operand 1 + * @src2: operand 2 + * @nbits: size of bitmap + */ +static inline int bitmap_and(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return (*dst = *src1 & *src2 & BITMAP_LAST_WORD_MASK(nbits)) != 0; + return __bitmap_and(dst, src1, src2, nbits); +} + +static inline unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= BITMAP_FIRST_WORD_MASK(start); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(tmp), nbits); +} + +static inline unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, 0UL); +} + +static inline unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, ~0UL); +} + +static inline unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ~0UL) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); + } + + return size; +} + +#endif /* _PERF_BITOPS_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h new file mode 100644 index 0000000..a0c6508 --- /dev/null +++ b/include/linux/bitops.h @@ -0,0 +1,275 @@ +#ifndef _TOOLS_LINUX_BITOPS_H_ +#define _TOOLS_LINUX_BITOPS_H_ + +#include <asm/types.h> +#include <linux/kernel.h> +#include <linux/compiler.h> +#include <linux/page.h> + +#ifndef __WORDSIZE +#define __WORDSIZE (__SIZEOF_LONG__ * 8) +#endif + +#ifndef BITS_PER_LONG +# define BITS_PER_LONG __WORDSIZE +#endif + +#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG)) +#define BIT_WORD(nr) ((nr) / BITS_PER_LONG) +#define BITS_PER_BYTE 8 +#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long)) +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) +#define BITS_TO_U32(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u32)) +#define BITS_TO_BYTES(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE) + +static inline void __set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + *p |= mask; +} + +static inline void set_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_or_fetch(p, mask, __ATOMIC_RELAXED); +} + +static inline void clear_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_and_fetch(p, ~mask, __ATOMIC_RELAXED); +} + +static inline int test_bit(long nr, const volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + + return (*p & mask) != 0; +} + +static inline int __test_and_set_bit(int nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + + old = *p; + *p = old | mask; + + return (old & mask) != 0; +} + +static inline bool test_and_set_bit(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + unsigned long old; + + old = __atomic_fetch_or(p, mask, __ATOMIC_RELAXED); + + return (old & mask) != 0; +} + +static inline void clear_bit_unlock(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + + __atomic_and_fetch(p, ~mask, __ATOMIC_RELEASE); +} + +static inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *) addr) + BIT_WORD(nr); + unsigned long old; + + old = __atomic_fetch_or(p, mask, __ATOMIC_ACQUIRE); + + return (old & mask) != 0; +} + +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +/* same as for_each_set_bit() but use bit as value to start with */ +#define for_each_set_bit_from(bit, addr, size) \ + for ((bit) = find_next_bit((addr), (size), (bit)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) + +static inline unsigned long hweight_long(unsigned long w) +{ + return __builtin_popcountl(w); +} + +/** + * rol64 - rotate a 64-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 rol64(__u64 word, unsigned int shift) +{ + return (word << shift) | (word >> (64 - shift)); +} + +/** + * ror64 - rotate a 64-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u64 ror64(__u64 word, unsigned int shift) +{ + return (word >> shift) | (word << (64 - shift)); +} + +/** + * rol32 - rotate a 32-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> ((-shift) & 31)); +} + +/** + * ror32 - rotate a 32-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 ror32(__u32 word, unsigned int shift) +{ + return (word >> shift) | (word << (32 - shift)); +} + +/** + * rol16 - rotate a 16-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u16 rol16(__u16 word, unsigned int shift) +{ + return (word << shift) | (word >> (16 - shift)); +} + +/** + * ror16 - rotate a 16-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u16 ror16(__u16 word, unsigned int shift) +{ + return (word >> shift) | (word << (16 - shift)); +} + +/** + * rol8 - rotate an 8-bit value left + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u8 rol8(__u8 word, unsigned int shift) +{ + return (word << shift) | (word >> (8 - shift)); +} + +/** + * ror8 - rotate an 8-bit value right + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u8 ror8(__u8 word, unsigned int shift) +{ + return (word >> shift) | (word << (8 - shift)); +} + +static inline unsigned long __fls(unsigned long word) +{ + return (sizeof(word) * 8) - 1 - __builtin_clzl(word); +} + +static inline int fls(int x) +{ + return x ? sizeof(x) * 8 - __builtin_clz(x) : 0; +} + +static inline int fls64(__u64 x) +{ +#if BITS_PER_LONG == 32 + __u32 h = x >> 32; + if (h) + return fls(h) + 32; + return fls(x); +#elif BITS_PER_LONG == 64 + if (x == 0) + return 0; + return __fls(x) + 1; +#endif +} + +static inline unsigned fls_long(unsigned long l) +{ + if (sizeof(l) == 4) + return fls(l); + return fls64(l); +} + +static inline unsigned long __ffs(unsigned long word) +{ + return __builtin_ctzl(word); +} + +static inline unsigned long __ffs64(u64 word) +{ +#if BITS_PER_LONG == 32 + if (((u32)word) == 0UL) + return __ffs((u32)(word >> 32)) + 32; +#elif BITS_PER_LONG != 64 +#error BITS_PER_LONG not 32 or 64 +#endif + return __ffs((unsigned long)word); +} + +#define ffz(x) __ffs(~(x)) + +static inline __attribute__((const)) +unsigned long rounddown_pow_of_two(unsigned long n) +{ + return 1UL << (fls_long(n) - 1); +} + +static inline __attribute_const__ +int __get_order(unsigned long size) +{ + int order; + + size--; + size >>= PAGE_SHIFT; +#if BITS_PER_LONG == 32 + order = fls(size); +#else + order = fls64(size); +#endif + return order; +} + +#define get_order(n) \ +( \ + __builtin_constant_p(n) ? ( \ + ((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \ + (((n) < (1UL << PAGE_SHIFT)) ? 0 : \ + ilog2((n) - 1) - PAGE_SHIFT + 1) \ + ) : \ + __get_order(n) \ +) + +#endif diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h new file mode 100644 index 0000000..fb790b8 --- /dev/null +++ b/include/linux/bitrev.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_BITREV_H +#define _LINUX_BITREV_H + +#include <linux/types.h> + +#ifdef CONFIG_HAVE_ARCH_BITREVERSE +#include <asm/bitrev.h> + +#define __bitrev32 __arch_bitrev32 +#define __bitrev16 __arch_bitrev16 +#define __bitrev8 __arch_bitrev8 + +#else +extern u8 const byte_rev_table[256]; +static inline u8 __bitrev8(u8 byte) +{ + return byte_rev_table[byte]; +} + +static inline u16 __bitrev16(u16 x) +{ + return (__bitrev8(x & 0xff) << 8) | __bitrev8(x >> 8); +} + +static inline u32 __bitrev32(u32 x) +{ + return (__bitrev16(x & 0xffff) << 16) | __bitrev16(x >> 16); +} + +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ + +#define __constant_bitrev32(x) \ +({ \ + u32 __x = x; \ + __x = (__x >> 16) | (__x << 16); \ + __x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8); \ + __x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4); \ + __x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2); \ + __x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1); \ + __x; \ +}) + +#define __constant_bitrev16(x) \ +({ \ + u16 __x = x; \ + __x = (__x >> 8) | (__x << 8); \ + __x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4); \ + __x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2); \ + __x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1); \ + __x; \ +}) + +#define __constant_bitrev8(x) \ +({ \ + u8 __x = x; \ + __x = (__x >> 4) | (__x << 4); \ + __x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2); \ + __x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1); \ + __x; \ +}) + +#define bitrev32(x) \ +({ \ + u32 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev32(__x) : \ + __bitrev32(__x); \ +}) + +#define bitrev16(x) \ +({ \ + u16 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev16(__x) : \ + __bitrev16(__x); \ + }) + +#define bitrev8(x) \ +({ \ + u8 __x = x; \ + __builtin_constant_p(__x) ? \ + __constant_bitrev8(__x) : \ + __bitrev8(__x) ; \ + }) +#endif /* _LINUX_BITREV_H */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h new file mode 100644 index 0000000..4fc5af3 --- /dev/null +++ b/include/linux/blk_types.h @@ -0,0 +1,156 @@ +/* + * Block data types and constants. Directly include this file only to + * break include dependency loop. + */ +#ifndef __LINUX_BLK_TYPES_H +#define __LINUX_BLK_TYPES_H + +#include <linux/atomic.h> +#include <linux/types.h> +#include <linux/bvec.h> + +struct bio_set; +struct bio; +struct block_device; +typedef void (bio_end_io_t) (struct bio *); +typedef void (bio_destructor_t) (struct bio *); + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + struct bio *bi_next; /* request queue link */ + struct block_device *bi_bdev; + int bi_error; + unsigned int bi_opf; /* bottom bits req flags, + * top bits REQ_OP. Use + * accessors. + */ + unsigned short bi_flags; /* status, command, etc */ + unsigned short bi_ioprio; + + struct bvec_iter bi_iter; + + atomic_t __bi_remaining; + + bio_end_io_t *bi_end_io; + void *bi_private; + + unsigned short bi_vcnt; /* how many bio_vec's */ + + /* + * Everything starting with bi_max_vecs will be preserved by bio_reset() + */ + + unsigned short bi_max_vecs; /* max bvl_vecs we can hold */ + + atomic_t __bi_cnt; /* pin count */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + + struct bio_set *bi_pool; + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; +}; + +#define BIO_OP_SHIFT (8 * sizeof(unsigned int) - REQ_OP_BITS) +#define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) + +#define bio_set_op_attrs(bio, op, op_flags) do { \ + WARN_ON(op >= (1 << REQ_OP_BITS)); \ + (bio)->bi_opf &= ((1 << BIO_OP_SHIFT) - 1); \ + (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ + (bio)->bi_opf |= op_flags; \ +} while (0) + +#define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) + +/* + * bio flags + */ +#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ +#define BIO_CLONED 2 /* doesn't own data */ +#define BIO_BOUNCED 3 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 4 /* contains user pages */ +#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ +#define BIO_QUIET 6 /* Make BIO Quiet */ +#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ +#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ + +/* + * Flags starting here get preserved by bio_reset() - this includes + * BVEC_POOL_IDX() + */ +#define BIO_RESET_BITS 10 + +/* + * We support 6 different bvec pools, the last one is magic in that it + * is backed by a mempool. + */ +#define BVEC_POOL_NR 6 +#define BVEC_POOL_MAX (BVEC_POOL_NR - 1) + +/* + * Top 4 bits of bio flags indicate the pool the bvecs came from. We add + * 1 to the actual index so that 0 indicates that there are no bvecs to be + * freed. + */ +#define BVEC_POOL_BITS (4) +#define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) +#define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) + +/* + * Request flags. For use in the cmd_flags field of struct request, and in + * bi_opf of struct bio. Note that some flags are only valid in either one. + */ +enum rq_flag_bits { + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_PRIO, /* boost priority in cfq */ + + __REQ_FUA, /* forced unit access */ + __REQ_PREFLUSH, /* request for cache flush */ +}; + +#define REQ_SYNC (1ULL << __REQ_SYNC) +#define REQ_META (1ULL << __REQ_META) +#define REQ_PRIO (1ULL << __REQ_PRIO) + +#define REQ_NOMERGE_FLAGS (REQ_PREFLUSH | REQ_FUA) + +#define REQ_RAHEAD (1ULL << __REQ_RAHEAD) +#define REQ_THROTTLED (1ULL << __REQ_THROTTLED) + +#define REQ_FUA (1ULL << __REQ_FUA) +#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) + +#define RW_MASK REQ_OP_WRITE + +#define READ REQ_OP_READ +#define WRITE REQ_OP_WRITE + +#define READ_SYNC REQ_SYNC +#define WRITE_SYNC (REQ_SYNC) +#define WRITE_ODIRECT REQ_SYNC +#define WRITE_FLUSH (REQ_SYNC | REQ_PREFLUSH) +#define WRITE_FUA (REQ_SYNC | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_PREFLUSH | REQ_FUA) + +enum req_op { + REQ_OP_READ, + REQ_OP_WRITE, + REQ_OP_DISCARD, /* request to discard sectors */ + REQ_OP_SECURE_ERASE, /* request to securely erase sectors */ + REQ_OP_WRITE_SAME, /* write same block many times */ + REQ_OP_FLUSH, /* request for cache flush */ +}; + +#define REQ_OP_BITS 3 + +#endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h new file mode 100644 index 0000000..3c18594 --- /dev/null +++ b/include/linux/blkdev.h @@ -0,0 +1,188 @@ +#ifndef __TOOLS_LINUX_BLKDEV_H +#define __TOOLS_LINUX_BLKDEV_H + +#include <linux/backing-dev.h> +#include <linux/blk_types.h> + +typedef u64 sector_t; +typedef unsigned fmode_t; + +struct bio; +struct user_namespace; + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +/* file is open for reading */ +#define FMODE_READ ((__force fmode_t)0x1) +/* file is open for writing */ +#define FMODE_WRITE ((__force fmode_t)0x2) +/* file is seekable */ +#define FMODE_LSEEK ((__force fmode_t)0x4) +/* file can be accessed using pread */ +#define FMODE_PREAD ((__force fmode_t)0x8) +/* file can be accessed using pwrite */ +#define FMODE_PWRITE ((__force fmode_t)0x10) +/* File is opened for execution with sys_execve / sys_uselib */ +#define FMODE_EXEC ((__force fmode_t)0x20) +/* File is opened with O_NDELAY (only set for block devices) */ +#define FMODE_NDELAY ((__force fmode_t)0x40) +/* File is opened with O_EXCL (only set for block devices) */ +#define FMODE_EXCL ((__force fmode_t)0x80) +/* File is opened using open(.., 3, ..) and is writeable only for ioctls + (specialy hack for floppy.c) */ +#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) +/* 32bit hashes as llseek() offset (for directories) */ +#define FMODE_32BITHASH ((__force fmode_t)0x200) +/* 64bit hashes as llseek() offset (for directories) */ +#define FMODE_64BITHASH ((__force fmode_t)0x400) + +struct inode { + unsigned long i_ino; + loff_t i_size; + struct super_block *i_sb; +}; + +struct file { + struct inode *f_inode; +}; + +static inline struct inode *file_inode(const struct file *f) +{ + return f->f_inode; +} + +#define BDEVNAME_SIZE 32 + +struct request_queue { + struct backing_dev_info backing_dev_info; +}; + +struct gendisk { +}; + +struct block_device { + char name[BDEVNAME_SIZE]; + struct inode *bd_inode; + struct request_queue queue; + void *bd_holder; + struct gendisk *bd_disk; + struct gendisk __bd_disk; + int bd_fd; +}; + +void generic_make_request(struct bio *); +int submit_bio_wait(struct bio *); +int blkdev_issue_discard(struct block_device *, sector_t, + sector_t, gfp_t, unsigned long); + +#define bdev_get_queue(bdev) (&((bdev)->queue)) + +#define blk_queue_discard(q) ((void) (q), 0) +#define blk_queue_nonrot(q) ((void) (q), 0) + +static inline struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) +{ + struct request_queue *q = bdev_get_queue(bdev); + + return &q->backing_dev_info; +} + +unsigned bdev_logical_block_size(struct block_device *bdev); +sector_t get_capacity(struct gendisk *disk); + +void blkdev_put(struct block_device *bdev, fmode_t mode); +void bdput(struct block_device *bdev); +struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, void *holder); +struct block_device *lookup_bdev(const char *path); + +struct super_block { + void *s_fs_info; +}; + +/* + * File types + * + * NOTE! These match bits 12..15 of stat.st_mode + * (ie "(i_mode >> 12) & 15"). + */ +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 +#define DT_WHT 14 + +/* + * This is the "filldir" function type, used by readdir() to let + * the kernel specify what kind of dirent layout it wants to have. + * This allows the kernel to read directories into kernel space or + * to have different dirent layouts depending on the binary type. + */ +struct dir_context; +typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, + unsigned); + +struct dir_context { + const filldir_t actor; + u64 pos; +}; + +/* /sys/fs */ +extern struct kobject *fs_kobj; + +struct file_operations { +}; + +static inline int register_chrdev(unsigned int major, const char *name, + const struct file_operations *fops) +{ + return 1; +} + +static inline void unregister_chrdev(unsigned int major, const char *name) +{ +} + +static inline const char *bdevname(struct block_device *bdev, char *buf) +{ + snprintf(buf, BDEVNAME_SIZE, "%s", bdev->name); + return buf; +} + +static inline bool op_is_write(unsigned int op) +{ + return op == REQ_OP_READ ? false : true; +} + +/* + * return data direction, READ or WRITE + */ +static inline int bio_data_dir(struct bio *bio) +{ + return op_is_write(bio_op(bio)) ? WRITE : READ; +} + +static inline bool dir_emit(struct dir_context *ctx, + const char *name, int namelen, + u64 ino, unsigned type) +{ + return ctx->actor(ctx, name, namelen, ctx->pos, ino, type) == 0; +} + +static inline bool dir_emit_dots(struct file *file, struct dir_context *ctx) +{ + return true; +} + +#define capable(cap) true + +#endif /* __TOOLS_LINUX_BLKDEV_H */ + diff --git a/include/linux/bug.h b/include/linux/bug.h new file mode 100644 index 0000000..f01e5f7 --- /dev/null +++ b/include/linux/bug.h @@ -0,0 +1,31 @@ +#ifndef __TOOLS_LINUX_BUG_H +#define __TOOLS_LINUX_BUG_H + +#include <assert.h> +#include <linux/compiler.h> + +#define BUILD_BUG_ON_NOT_POWER_OF_2(n) \ + BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0)) +#define BUILD_BUG_ON_ZERO(e) (sizeof(struct { int:-!!(e); })) +#define BUILD_BUG_ON_NULL(e) ((void *)sizeof(struct { int:-!!(e); })) + +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define BUG() do { assert(0); unreachable(); } while (0) +#define BUG_ON(cond) assert(!(cond)) + +#define WARN_ON_ONCE(cond) assert(!(cond)) +#define WARN_ONCE(cond, msg) assert(!(cond)) + +#define __WARN() assert(0) +#define __WARN_printf(arg...) assert(0) +#define WARN(cond, ...) assert(!(cond)) + +#define WARN_ON(condition) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN(); \ + unlikely(__ret_warn_on); \ +}) + +#endif /* __TOOLS_LINUX_BUG_H */ diff --git a/include/linux/bvec.h b/include/linux/bvec.h new file mode 100644 index 0000000..89b65b8 --- /dev/null +++ b/include/linux/bvec.h @@ -0,0 +1,97 @@ +/* + * bvec iterator + * + * Copyright (C) 2001 Ming Lei <ming.lei@canonical.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BVEC_ITER_H +#define __LINUX_BVEC_ITER_H + +#include <linux/kernel.h> +#include <linux/bug.h> + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +struct bvec_iter { + sector_t bi_sector; /* device address in 512 byte + sectors */ + unsigned int bi_size; /* residual I/O count */ + + unsigned int bi_idx; /* current index into bvl_vec */ + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ +}; + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) + +#define bvec_iter_page(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_page) + +#define bvec_iter_len(bvec, iter) \ + min((iter).bi_size, \ + __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) + +#define bvec_iter_offset(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) + +#define bvec_iter_bvec(bvec, iter) \ +((struct bio_vec) { \ + .bv_page = bvec_iter_page((bvec), (iter)), \ + .bv_len = bvec_iter_len((bvec), (iter)), \ + .bv_offset = bvec_iter_offset((bvec), (iter)), \ +}) + +static inline void bvec_iter_advance(const struct bio_vec *bv, + struct bvec_iter *iter, + unsigned bytes) +{ + WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n"); + + while (bytes) { + unsigned iter_len = bvec_iter_len(bv, *iter); + unsigned len = min(bytes, iter_len); + + bytes -= len; + iter->bi_size -= len; + iter->bi_bvec_done += len; + + if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { + iter->bi_bvec_done = 0; + iter->bi_idx++; + } + } +} + +#define for_each_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + +#endif /* __LINUX_BVEC_ITER_H */ diff --git a/include/linux/byteorder.h b/include/linux/byteorder.h new file mode 100644 index 0000000..35ef228 --- /dev/null +++ b/include/linux/byteorder.h @@ -0,0 +1,74 @@ +#ifndef __LINUX_BYTEORDER_H +#define __LINUX_BYTEORDER_H + +#include <asm/byteorder.h> + +#define swab16 __swab16 +#define swab32 __swab32 +#define swab64 __swab64 +#define swahw32 __swahw32 +#define swahb32 __swahb32 +#define swab16p __swab16p +#define swab32p __swab32p +#define swab64p __swab64p +#define swahw32p __swahw32p +#define swahb32p __swahb32p +#define swab16s __swab16s +#define swab32s __swab32s +#define swab64s __swab64s +#define swahw32s __swahw32s +#define swahb32s __swahb32s + +#define cpu_to_le64 __cpu_to_le64 +#define le64_to_cpu __le64_to_cpu +#define cpu_to_le32 __cpu_to_le32 +#define le32_to_cpu __le32_to_cpu +#define cpu_to_le16 __cpu_to_le16 +#define le16_to_cpu __le16_to_cpu +#define cpu_to_be64 __cpu_to_be64 +#define be64_to_cpu __be64_to_cpu +#define cpu_to_be32 __cpu_to_be32 +#define be32_to_cpu __be32_to_cpu +#define cpu_to_be16 __cpu_to_be16 +#define be16_to_cpu __be16_to_cpu +#define cpu_to_le64p __cpu_to_le64p +#define le64_to_cpup __le64_to_cpup +#define cpu_to_le32p __cpu_to_le32p +#define le32_to_cpup __le32_to_cpup +#define cpu_to_le16p __cpu_to_le16p +#define le16_to_cpup __le16_to_cpup +#define cpu_to_be64p __cpu_to_be64p +#define be64_to_cpup __be64_to_cpup +#define cpu_to_be32p __cpu_to_be32p +#define be32_to_cpup __be32_to_cpup +#define cpu_to_be16p __cpu_to_be16p +#define be16_to_cpup __be16_to_cpup +#define cpu_to_le64s __cpu_to_le64s +#define le64_to_cpus __le64_to_cpus +#define cpu_to_le32s __cpu_to_le32s +#define le32_to_cpus __le32_to_cpus +#define cpu_to_le16s __cpu_to_le16s +#define le16_to_cpus __le16_to_cpus +#define cpu_to_be64s __cpu_to_be64s +#define be64_to_cpus __be64_to_cpus +#define cpu_to_be32s __cpu_to_be32s +#define be32_to_cpus __be32_to_cpus +#define cpu_to_be16s __cpu_to_be16s +#define be16_to_cpus __be16_to_cpus + +static inline void le16_add_cpu(__le16 *var, u16 val) +{ + *var = cpu_to_le16(le16_to_cpu(*var) + val); +} + +static inline void le32_add_cpu(__le32 *var, u32 val) +{ + *var = cpu_to_le32(le32_to_cpu(*var) + val); +} + +static inline void le64_add_cpu(__le64 *var, u64 val) +{ + *var = cpu_to_le64(le64_to_cpu(*var) + val); +} + +#endif /* __LINUX_BYTEORDER_H */ diff --git a/include/linux/cache.h b/include/linux/cache.h new file mode 100644 index 0000000..4ee609a --- /dev/null +++ b/include/linux/cache.h @@ -0,0 +1,16 @@ +#ifndef __TOOLS_LINUX_CACHE_H +#define __TOOLS_LINUX_CACHE_H + +#define L1_CACHE_BYTES 64 +#define SMP_CACHE_BYTES L1_CACHE_BYTES + +#define L1_CACHE_ALIGN(x) __ALIGN_KERNEL(x, L1_CACHE_BYTES) + +#define __read_mostly +#define __ro_after_init + +#define ____cacheline_aligned __attribute__((__aligned__(SMP_CACHE_BYTES))) +#define ____cacheline_aligned_in_smp ____cacheline_aligned + +#endif /* __TOOLS_LINUX_CACHE_H */ + diff --git a/include/linux/compiler.h b/include/linux/compiler.h new file mode 100644 index 0000000..e5c31a6 --- /dev/null +++ b/include/linux/compiler.h @@ -0,0 +1,169 @@ +#ifndef _TOOLS_LINUX_COMPILER_H_ +#define _TOOLS_LINUX_COMPILER_H_ + +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") +#define barrier_data(ptr) __asm__ __volatile__("": :"r"(ptr) :"memory") + +#ifndef __always_inline +# define __always_inline inline __attribute__((always_inline)) +#endif + +#ifdef __ANDROID__ +/* + * FIXME: Big hammer to get rid of tons of: + * "warning: always_inline function might not be inlinable" + * + * At least on android-ndk-r12/platforms/android-24/arch-arm + */ +#undef __always_inline +#define __always_inline inline +#endif + +#define noinline +#define noinline_for_stack noinline + +#define __user +#define __kernel + +#define __pure __attribute__((pure)) +#define __aligned(x) __attribute__((aligned(x))) +#define __printf(a, b) __attribute__((format(printf, a, b))) +#define __used __attribute__((__used__)) +#define __maybe_unused __attribute__((unused)) +#define __always_unused __attribute__((unused)) +#define __packed __attribute__((__packed__)) +#define __force +#define __nocast +#define __iomem +#define __chk_user_ptr(x) (void)0 +#define __chk_io_ptr(x) (void)0 +#define __builtin_warning(x, y...) (1) +#define __must_hold(x) +#define __acquires(x) +#define __releases(x) +#define __acquire(x) (void)0 +#define __release(x) (void)0 +#define __cond_lock(x,c) (c) +#define __percpu +#define __rcu +#define __sched +#define __init +#define __exit +#define __private +#define __must_check +#define __malloc +#define __weak __attribute__((weak)) +#define likely(x) __builtin_expect(!!(x), 1) +#define unlikely(x) __builtin_expect(!!(x), 0) +#define unreachable() __builtin_unreachable() +#define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) + +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) +#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#define __initcall(x) /* unimplemented */ +#define __exitcall(x) /* unimplemented */ + +#include <linux/types.h> + +/* + * Following functions are taken from kernel sources and + * break aliasing rules in their original form. + * + * While kernel is compiled with -fno-strict-aliasing, + * perf uses -Wstrict-aliasing=3 which makes build fail + * under gcc 4.4. + * + * Using extra __may_alias__ type to allow aliasing + * in this case. + */ +typedef __u8 __attribute__((__may_alias__)) __u8_alias_t; +typedef __u16 __attribute__((__may_alias__)) __u16_alias_t; +typedef __u32 __attribute__((__may_alias__)) __u32_alias_t; +typedef __u64 __attribute__((__may_alias__)) __u64_alias_t; + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8_alias_t *) res = *(volatile __u8_alias_t *) p; break; + case 2: *(__u16_alias_t *) res = *(volatile __u16_alias_t *) p; break; + case 4: *(__u32_alias_t *) res = *(volatile __u32_alias_t *) p; break; + case 8: *(__u64_alias_t *) res = *(volatile __u64_alias_t *) p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8_alias_t *) p = *(__u8_alias_t *) res; break; + case 2: *(volatile __u16_alias_t *) p = *(__u16_alias_t *) res; break; + case 4: *(volatile __u32_alias_t *) p = *(__u32_alias_t *) res; break; + case 8: *(volatile __u64_alias_t *) p = *(__u64_alias_t *) res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define lockless_dereference(p) \ +({ \ + typeof(p) _________p1 = READ_ONCE(p); \ + typeof(*(p)) *___typecheck_p __maybe_unused; \ + smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ + (_________p1); \ +}) + +#define flush_cache_all() do { } while (0) +#define flush_cache_mm(mm) do { } while (0) +#define flush_cache_dup_mm(mm) do { } while (0) +#define flush_cache_range(vma, start, end) do { } while (0) +#define flush_cache_page(vma, vmaddr, pfn) do { } while (0) +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 0 +#define flush_dcache_page(page) do { } while (0) +#define flush_dcache_mmap_lock(mapping) do { } while (0) +#define flush_dcache_mmap_unlock(mapping) do { } while (0) +#define flush_icache_range(start, end) do { } while (0) +#define flush_icache_page(vma,pg) do { } while (0) +#define flush_icache_user_range(vma,pg,adr,len) do { } while (0) +#define flush_cache_vmap(start, end) do { } while (0) +#define flush_cache_vunmap(start, end) do { } while (0) + +#endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/include/linux/completion.h b/include/linux/completion.h new file mode 100644 index 0000000..b8bac21 --- /dev/null +++ b/include/linux/completion.h @@ -0,0 +1,83 @@ +#ifndef __LINUX_COMPLETION_H +#define __LINUX_COMPLETION_H + +/* + * (C) Copyright 2001 Linus Torvalds + * + * Atomic wait-for-completion handler data structures. + * See kernel/sched/completion.c for details. + */ + +#include <linux/wait.h> + +/* + * struct completion - structure used to maintain state for a "completion" + * + * This is the opaque structure used to maintain the state for a "completion". + * Completions currently use a FIFO to queue threads that have to wait for + * the "completion" event. + * + * See also: complete(), wait_for_completion() (and friends _timeout, + * _interruptible, _interruptible_timeout, and _killable), init_completion(), + * reinit_completion(), and macros DECLARE_COMPLETION(), + * DECLARE_COMPLETION_ONSTACK(). + */ +struct completion { + unsigned int done; + wait_queue_head_t wait; +}; + +#define COMPLETION_INITIALIZER(work) \ + { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } + +#define COMPLETION_INITIALIZER_ONSTACK(work) \ + ({ init_completion(&work); work; }) + +#define DECLARE_COMPLETION(work) \ + struct completion work = COMPLETION_INITIALIZER(work) +#define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) + +/** + * init_completion - Initialize a dynamically allocated completion + * @x: pointer to completion structure that is to be initialized + * + * This inline function will initialize a dynamically created completion + * structure. + */ +static inline void init_completion(struct completion *x) +{ + x->done = 0; + init_waitqueue_head(&x->wait); +} + +/** + * reinit_completion - reinitialize a completion structure + * @x: pointer to completion structure that is to be reinitialized + * + * This inline function should be used to reinitialize a completion structure so it can + * be reused. This is especially important after complete_all() is used. + */ +static inline void reinit_completion(struct completion *x) +{ + x->done = 0; +} + +extern void wait_for_completion(struct completion *); +extern void wait_for_completion_io(struct completion *); +extern int wait_for_completion_interruptible(struct completion *x); +extern int wait_for_completion_killable(struct completion *x); +extern unsigned long wait_for_completion_timeout(struct completion *x, + unsigned long timeout); +extern unsigned long wait_for_completion_io_timeout(struct completion *x, + unsigned long timeout); +extern long wait_for_completion_interruptible_timeout( + struct completion *x, unsigned long timeout); +extern long wait_for_completion_killable_timeout( + struct completion *x, unsigned long timeout); +extern bool try_wait_for_completion(struct completion *x); +extern bool completion_done(struct completion *x); + +extern void complete(struct completion *); +extern void complete_all(struct completion *); + +#endif diff --git a/include/linux/console.h b/include/linux/console.h new file mode 100644 index 0000000..d01aa9a --- /dev/null +++ b/include/linux/console.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_CONSOLE_H_ +#define _LINUX_CONSOLE_H_ + +#define console_lock() +#define console_unlock() + +#endif /* _LINUX_CONSOLE_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h new file mode 100644 index 0000000..024d645 --- /dev/null +++ b/include/linux/cpumask.h @@ -0,0 +1,24 @@ +#ifndef __LINUX_CPUMASK_H +#define __LINUX_CPUMASK_H + +#define num_online_cpus() 1U +#define num_possible_cpus() 1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U +#define cpu_online(cpu) ((cpu) == 0) +#define cpu_possible(cpu) ((cpu) == 0) +#define cpu_present(cpu) ((cpu) == 0) +#define cpu_active(cpu) ((cpu) == 0) + +#define for_each_cpu(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_not(cpu, mask) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask) +#define for_each_cpu_and(cpu, mask, and) \ + for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask, (void)and) + +#define for_each_possible_cpu(cpu) for_each_cpu((cpu), 1) +#define for_each_online_cpu(cpu) for_each_cpu((cpu), 1) +#define for_each_present_cpu(cpu) for_each_cpu((cpu), 1) + +#endif /* __LINUX_CPUMASK_H */ diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h new file mode 100644 index 0000000..f198ab2 --- /dev/null +++ b/include/linux/crc32c.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_CRC32C_H +#define _LINUX_CRC32C_H + +#include "../../ccan/crc/crc.h" + +#endif /* _LINUX_CRC32C_H */ diff --git a/include/linux/crypto.h b/include/linux/crypto.h new file mode 100644 index 0000000..cb9ad24 --- /dev/null +++ b/include/linux/crypto.h @@ -0,0 +1,921 @@ +/* + * Scatterlist Cryptographic API. + * + * Copyright (c) 2002 James Morris <jmorris@intercode.com.au> + * Copyright (c) 2002 David S. Miller (davem@redhat.com) + * Copyright (c) 2005 Herbert Xu <herbert@gondor.apana.org.au> + * + * Portions derived from Cryptoapi, by Alexander Kjeldaas <astor@fast.no> + * and Nettle, by Niels Möller. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ +#ifndef _LINUX_CRYPTO_H +#define _LINUX_CRYPTO_H + +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/list.h> +#include <linux/bug.h> +#include <linux/slab.h> +#include <linux/string.h> + +/* + * Autoloaded crypto modules should only use a prefixed name to avoid allowing + * arbitrary modules to be loaded. Loading from userspace may still need the + * unprefixed names, so retains those aliases as well. + * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3 + * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro + * expands twice on the same line. Instead, use a separate base name for the + * alias. + */ +#define MODULE_ALIAS_CRYPTO(name) \ + __MODULE_INFO(alias, alias_userspace, name); \ + __MODULE_INFO(alias, alias_crypto, "crypto-" name) + +/* + * Algorithm masks and types. + */ +#define CRYPTO_ALG_TYPE_MASK 0x0000000f +#define CRYPTO_ALG_TYPE_CIPHER 0x00000001 +#define CRYPTO_ALG_TYPE_AEAD 0x00000003 +#define CRYPTO_ALG_TYPE_BLKCIPHER 0x00000004 +#define CRYPTO_ALG_TYPE_ABLKCIPHER 0x00000005 +#define CRYPTO_ALG_TYPE_SKCIPHER 0x00000005 +#define CRYPTO_ALG_TYPE_GIVCIPHER 0x00000006 +#define CRYPTO_ALG_TYPE_KPP 0x00000008 +#define CRYPTO_ALG_TYPE_RNG 0x0000000c +#define CRYPTO_ALG_TYPE_AKCIPHER 0x0000000d +#define CRYPTO_ALG_TYPE_DIGEST 0x0000000e +#define CRYPTO_ALG_TYPE_HASH 0x0000000e +#define CRYPTO_ALG_TYPE_SHASH 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH 0x0000000f + +#define CRYPTO_ALG_TYPE_HASH_MASK 0x0000000e +#define CRYPTO_ALG_TYPE_AHASH_MASK 0x0000000e +#define CRYPTO_ALG_TYPE_BLKCIPHER_MASK 0x0000000c + +#define CRYPTO_ALG_ASYNC 0x00000080 + +/* + * Set this bit if and only if the algorithm requires another algorithm of + * the same type to handle corner cases. + */ +#define CRYPTO_ALG_NEED_FALLBACK 0x00000100 + +/* + * This bit is set for symmetric key ciphers that have already been wrapped + * with a generic IV generator to prevent them from being wrapped again. + */ +#define CRYPTO_ALG_GENIV 0x00000200 + +/* + * Set if the algorithm is an instance that is build from templates. + */ +#define CRYPTO_ALG_INSTANCE 0x00000800 + +/* Set this bit if the algorithm provided is hardware accelerated but + * not available to userspace via instruction set or so. + */ +#define CRYPTO_ALG_KERN_DRIVER_ONLY 0x00001000 + +/* + * Mark a cipher as a service implementation only usable by another + * cipher and never by a normal user of the kernel crypto API + */ +#define CRYPTO_ALG_INTERNAL 0x00002000 + +/* + * Transform masks and values (for crt_flags). + */ +#define CRYPTO_TFM_REQ_MASK 0x000fff00 +#define CRYPTO_TFM_RES_MASK 0xfff00000 + +#define CRYPTO_TFM_REQ_WEAK_KEY 0x00000100 +#define CRYPTO_TFM_REQ_MAY_SLEEP 0x00000200 +#define CRYPTO_TFM_REQ_MAY_BACKLOG 0x00000400 +#define CRYPTO_TFM_RES_WEAK_KEY 0x00100000 +#define CRYPTO_TFM_RES_BAD_KEY_LEN 0x00200000 +#define CRYPTO_TFM_RES_BAD_KEY_SCHED 0x00400000 +#define CRYPTO_TFM_RES_BAD_BLOCK_LEN 0x00800000 +#define CRYPTO_TFM_RES_BAD_FLAGS 0x01000000 + +/* + * Miscellaneous stuff. + */ +#define CRYPTO_MAX_ALG_NAME 64 + +/* + * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual + * declaration) is used to ensure that the crypto_tfm context structure is + * aligned correctly for the given architecture so that there are no alignment + * faults for C data types. In particular, this is required on platforms such + * as arm where pointers are 32-bit aligned but there are data types such as + * u64 which require 64-bit alignment. + */ +#define CRYPTO_MINALIGN ARCH_KMALLOC_MINALIGN + +#define CRYPTO_MINALIGN_ATTR __attribute__ ((__aligned__(CRYPTO_MINALIGN))) + +struct scatterlist; +struct crypto_blkcipher; +struct crypto_tfm; +struct crypto_type; +struct skcipher_givcrypt_request; + +struct blkcipher_desc { + struct crypto_blkcipher *tfm; + void *info; + u32 flags; +}; + +struct cipher_desc { + struct crypto_tfm *tfm; + void (*crfn)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + unsigned int (*prfn)(const struct cipher_desc *desc, u8 *dst, + const u8 *src, unsigned int nbytes); + void *info; +}; + +struct blkcipher_alg { + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, + struct scatterlist *dst, struct scatterlist *src, + unsigned int nbytes); + + const char *geniv; + + unsigned int min_keysize; + unsigned int max_keysize; + unsigned int ivsize; +}; + +struct cipher_alg { + unsigned int cia_min_keysize; + unsigned int cia_max_keysize; + int (*cia_setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + void (*cia_encrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cia_decrypt)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +}; + +struct compress_alg { + int (*coa_compress)(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen); + int (*coa_decompress)(struct crypto_tfm *tfm, const u8 *src, + unsigned int slen, u8 *dst, unsigned int *dlen); +}; + + +#define cra_blkcipher cra_u.blkcipher +#define cra_cipher cra_u.cipher +#define cra_compress cra_u.compress + +struct crypto_alg { + struct list_head cra_list; + struct list_head cra_users; + + u32 cra_flags; + unsigned int cra_blocksize; + unsigned int cra_ctxsize; + unsigned int cra_alignmask; + + int cra_priority; + atomic_t cra_refcnt; + + char cra_name[CRYPTO_MAX_ALG_NAME]; + char cra_driver_name[CRYPTO_MAX_ALG_NAME]; + + const struct crypto_type *cra_type; + + union { + struct blkcipher_alg blkcipher; + struct cipher_alg cipher; + struct compress_alg compress; + } cra_u; + + int (*cra_init)(struct crypto_tfm *tfm); + void (*cra_exit)(struct crypto_tfm *tfm); + void (*cra_destroy)(struct crypto_alg *alg); + + struct module *cra_module; +} CRYPTO_MINALIGN_ATTR; + +/* + * Algorithm registration interface. + */ +int crypto_register_alg(struct crypto_alg *alg); +int crypto_unregister_alg(struct crypto_alg *alg); +int crypto_register_algs(struct crypto_alg *algs, int count); +int crypto_unregister_algs(struct crypto_alg *algs, int count); + +/* + * Algorithm query interface. + */ +int crypto_has_alg(const char *name, u32 type, u32 mask); + +/* + * Transforms: user-instantiated objects which encapsulate algorithms + * and core processing logic. Managed via crypto_alloc_*() and + * crypto_free_*(), as well as the various helpers below. + */ + +struct blkcipher_tfm { + void *iv; + int (*setkey)(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen); + int (*encrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); + int (*decrypt)(struct blkcipher_desc *desc, struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes); +}; + +struct cipher_tfm { + int (*cit_setkey)(struct crypto_tfm *tfm, + const u8 *key, unsigned int keylen); + void (*cit_encrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); + void (*cit_decrypt_one)(struct crypto_tfm *tfm, u8 *dst, const u8 *src); +}; + +struct compress_tfm { + int (*cot_compress)(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); + int (*cot_decompress)(struct crypto_tfm *tfm, + const u8 *src, unsigned int slen, + u8 *dst, unsigned int *dlen); +}; + +#define crt_blkcipher crt_u.blkcipher +#define crt_cipher crt_u.cipher +#define crt_compress crt_u.compress + +struct crypto_tfm { + + u32 crt_flags; + + union { + struct blkcipher_tfm blkcipher; + struct cipher_tfm cipher; + struct compress_tfm compress; + } crt_u; + + void (*exit)(struct crypto_tfm *tfm); + + struct crypto_alg *__crt_alg; + + void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; +}; + +struct crypto_blkcipher { + struct crypto_tfm base; +}; + +struct crypto_cipher { + struct crypto_tfm base; +}; + +struct crypto_comp { + struct crypto_tfm base; +}; + +enum { + CRYPTOA_UNSPEC, + CRYPTOA_ALG, + CRYPTOA_TYPE, + CRYPTOA_U32, + __CRYPTOA_MAX, +}; + +#define CRYPTOA_MAX (__CRYPTOA_MAX - 1) + +/* Maximum number of (rtattr) parameters for each template. */ +#define CRYPTO_MAX_ATTRS 32 + +struct crypto_attr_alg { + char name[CRYPTO_MAX_ALG_NAME]; +}; + +struct crypto_attr_type { + u32 type; + u32 mask; +}; + +struct crypto_attr_u32 { + u32 num; +}; + +/* + * Transform user interface. + */ + +struct crypto_tfm *crypto_alloc_base(const char *alg_name, u32 type, u32 mask); +void crypto_destroy_tfm(void *mem, struct crypto_tfm *tfm); + +static inline void crypto_free_tfm(struct crypto_tfm *tfm) +{ + return crypto_destroy_tfm(tfm, tfm); +} + +int alg_test(const char *driver, const char *alg, u32 type, u32 mask); + +/* + * Transform helpers which query the underlying algorithm. + */ +static inline const char *crypto_tfm_alg_name(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_name; +} + +static inline const char *crypto_tfm_alg_driver_name(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_driver_name; +} + +static inline int crypto_tfm_alg_priority(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_priority; +} + +static inline u32 crypto_tfm_alg_type(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_TYPE_MASK; +} + +static inline unsigned int crypto_tfm_alg_blocksize(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_blocksize; +} + +static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_alignmask; +} + +static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) +{ + return tfm->crt_flags; +} + +static inline void crypto_tfm_set_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags |= flags; +} + +static inline void crypto_tfm_clear_flags(struct crypto_tfm *tfm, u32 flags) +{ + tfm->crt_flags &= ~flags; +} + +static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) +{ + return tfm->__crt_ctx; +} + +static inline unsigned int crypto_tfm_ctx_alignment(void) +{ + struct crypto_tfm *tfm; + return __alignof__(tfm->__crt_ctx); +} + +static inline u32 crypto_skcipher_type(u32 type) +{ + type &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + return type; +} + +static inline u32 crypto_skcipher_mask(u32 mask) +{ + mask &= ~(CRYPTO_ALG_TYPE_MASK | CRYPTO_ALG_GENIV); + mask |= CRYPTO_ALG_TYPE_BLKCIPHER_MASK; + return mask; +} + +/** + * DOC: Synchronous Block Cipher API + * + * The synchronous block cipher API is used with the ciphers of type + * CRYPTO_ALG_TYPE_BLKCIPHER (listed as type "blkcipher" in /proc/crypto) + * + * Synchronous calls, have a context in the tfm. But since a single tfm can be + * used in multiple calls and in parallel, this info should not be changeable + * (unless a lock is used). This applies, for example, to the symmetric key. + * However, the IV is changeable, so there is an iv field in blkcipher_tfm + * structure for synchronous blkcipher api. So, its the only state info that can + * be kept for synchronous calls without using a big lock across a tfm. + * + * The block cipher API allows the use of a complete cipher, i.e. a cipher + * consisting of a template (a block chaining mode) and a single block cipher + * primitive (e.g. AES). + * + * The plaintext data buffer and the ciphertext data buffer are pointed to + * by using scatter/gather lists. The cipher operation is performed + * on all segments of the provided scatter/gather lists. + * + * The kernel crypto API supports a cipher operation "in-place" which means that + * the caller may provide the same scatter/gather list for the plaintext and + * cipher text. After the completion of the cipher operation, the plaintext + * data is replaced with the ciphertext data in case of an encryption and vice + * versa for a decryption. The caller must ensure that the scatter/gather lists + * for the output data point to sufficiently large buffers, i.e. multiples of + * the block size of the cipher. + */ + +static inline struct crypto_blkcipher *__crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + return (struct crypto_blkcipher *)tfm; +} + +static inline struct crypto_blkcipher *crypto_blkcipher_cast( + struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_BLKCIPHER); + return __crypto_blkcipher_cast(tfm); +} + +/** + * crypto_alloc_blkcipher() - allocate synchronous block cipher handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * blkcipher cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Allocate a cipher handle for a block cipher. The returned struct + * crypto_blkcipher is the cipher handle that is required for any subsequent + * API invocation for that block cipher. + * + * Return: allocated cipher handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +static inline struct crypto_blkcipher *crypto_alloc_blkcipher( + const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_blkcipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_blkcipher_tfm( + struct crypto_blkcipher *tfm) +{ + return &tfm->base; +} + +/** + * crypto_free_blkcipher() - zeroize and free the block cipher handle + * @tfm: cipher handle to be freed + */ +static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm) +{ + crypto_free_tfm(crypto_blkcipher_tfm(tfm)); +} + +/** + * crypto_has_blkcipher() - Search for the availability of a block cipher + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * block cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Return: true when the block cipher is known to the kernel crypto API; false + * otherwise + */ +static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_BLKCIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +/** + * crypto_blkcipher_name() - return the name / cra_name from the cipher handle + * @tfm: cipher handle + * + * Return: The character string holding the name of the cipher + */ +static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm)); +} + +static inline struct blkcipher_tfm *crypto_blkcipher_crt( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->crt_blkcipher; +} + +static inline struct blkcipher_alg *crypto_blkcipher_alg( + struct crypto_blkcipher *tfm) +{ + return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher; +} + +/** + * crypto_blkcipher_ivsize() - obtain IV size + * @tfm: cipher handle + * + * The size of the IV for the block cipher referenced by the cipher handle is + * returned. This IV size may be zero if the cipher does not need an IV. + * + * Return: IV size in bytes + */ +static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm) +{ + return crypto_blkcipher_alg(tfm)->ivsize; +} + +/** + * crypto_blkcipher_blocksize() - obtain block size of cipher + * @tfm: cipher handle + * + * The block size for the block cipher referenced with the cipher handle is + * returned. The caller may use that information to allocate appropriate + * memory for the data returned by the encryption or decryption operation. + * + * Return: block size of cipher + */ +static inline unsigned int crypto_blkcipher_blocksize( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_blkcipher_tfm(tfm)); +} + +static inline unsigned int crypto_blkcipher_alignmask( + struct crypto_blkcipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_blkcipher_tfm(tfm)); +} + +static inline u32 crypto_blkcipher_get_flags(struct crypto_blkcipher *tfm) +{ + return crypto_tfm_get_flags(crypto_blkcipher_tfm(tfm)); +} + +static inline void crypto_blkcipher_set_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_blkcipher_tfm(tfm), flags); +} + +static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags); +} + +/** + * crypto_blkcipher_setkey() - set key for cipher + * @tfm: cipher handle + * @key: buffer holding the key + * @keylen: length of the key in bytes + * + * The caller provided key is set for the block cipher referenced by the cipher + * handle. + * + * Note, the key length determines the cipher type. Many block ciphers implement + * different cipher modes depending on the key size, such as AES-128 vs AES-192 + * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 + * is performed. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_blkcipher_crt(tfm)->setkey(crypto_blkcipher_tfm(tfm), + key, keylen); +} + +/** + * crypto_blkcipher_encrypt() - encrypt plaintext + * @desc: reference to the block cipher handle with meta data + * @dst: scatter/gather list that is filled by the cipher operation with the + * ciphertext + * @src: scatter/gather list that holds the plaintext + * @nbytes: number of bytes of the plaintext to encrypt. + * + * Encrypt plaintext data using the IV set by the caller with a preceding + * call of crypto_blkcipher_set_iv. + * + * The blkcipher_desc data structure must be filled by the caller and can + * reside on the stack. The caller must fill desc as follows: desc.tfm is filled + * with the block cipher handle; desc.flags is filled with either + * CRYPTO_TFM_REQ_MAY_SLEEP or 0. + * + * Return: 0 if the cipher operation was successful; < 0 if an error occurred + */ +static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +/** + * crypto_blkcipher_encrypt_iv() - encrypt plaintext with dedicated IV + * @desc: reference to the block cipher handle with meta data + * @dst: scatter/gather list that is filled by the cipher operation with the + * ciphertext + * @src: scatter/gather list that holds the plaintext + * @nbytes: number of bytes of the plaintext to encrypt. + * + * Encrypt plaintext data with the use of an IV that is solely used for this + * cipher operation. Any previously set IV is not used. + * + * The blkcipher_desc data structure must be filled by the caller and can + * reside on the stack. The caller must fill desc as follows: desc.tfm is filled + * with the block cipher handle; desc.info is filled with the IV to be used for + * the current operation; desc.flags is filled with either + * CRYPTO_TFM_REQ_MAY_SLEEP or 0. + * + * Return: 0 if the cipher operation was successful; < 0 if an error occurred + */ +static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes); +} + +/** + * crypto_blkcipher_decrypt() - decrypt ciphertext + * @desc: reference to the block cipher handle with meta data + * @dst: scatter/gather list that is filled by the cipher operation with the + * plaintext + * @src: scatter/gather list that holds the ciphertext + * @nbytes: number of bytes of the ciphertext to decrypt. + * + * Decrypt ciphertext data using the IV set by the caller with a preceding + * call of crypto_blkcipher_set_iv. + * + * The blkcipher_desc data structure must be filled by the caller as documented + * for the crypto_blkcipher_encrypt call above. + * + * Return: 0 if the cipher operation was successful; < 0 if an error occurred + * + */ +static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + desc->info = crypto_blkcipher_crt(desc->tfm)->iv; + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +/** + * crypto_blkcipher_decrypt_iv() - decrypt ciphertext with dedicated IV + * @desc: reference to the block cipher handle with meta data + * @dst: scatter/gather list that is filled by the cipher operation with the + * plaintext + * @src: scatter/gather list that holds the ciphertext + * @nbytes: number of bytes of the ciphertext to decrypt. + * + * Decrypt ciphertext data with the use of an IV that is solely used for this + * cipher operation. Any previously set IV is not used. + * + * The blkcipher_desc data structure must be filled by the caller as documented + * for the crypto_blkcipher_encrypt_iv call above. + * + * Return: 0 if the cipher operation was successful; < 0 if an error occurred + */ +static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes) +{ + return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes); +} + +/** + * crypto_blkcipher_set_iv() - set IV for cipher + * @tfm: cipher handle + * @src: buffer holding the IV + * @len: length of the IV in bytes + * + * The caller provided IV is set for the block cipher referenced by the cipher + * handle. + */ +static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm, + const u8 *src, unsigned int len) +{ + memcpy(crypto_blkcipher_crt(tfm)->iv, src, len); +} + +/** + * crypto_blkcipher_get_iv() - obtain IV from cipher + * @tfm: cipher handle + * @dst: buffer filled with the IV + * @len: length of the buffer dst + * + * The caller can obtain the IV set for the block cipher referenced by the + * cipher handle and store it into the user-provided buffer. If the buffer + * has an insufficient space, the IV is truncated to fit the buffer. + */ +static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm, + u8 *dst, unsigned int len) +{ + memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len); +} + +/** + * DOC: Single Block Cipher API + * + * The single block cipher API is used with the ciphers of type + * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto). + * + * Using the single block cipher API calls, operations with the basic cipher + * primitive can be implemented. These cipher primitives exclude any block + * chaining operations including IV handling. + * + * The purpose of this single block cipher API is to support the implementation + * of templates or other concepts that only need to perform the cipher operation + * on one block at a time. Templates invoke the underlying cipher primitive + * block-wise and process either the input or the output data of these cipher + * operations. + */ + +static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm) +{ + return (struct crypto_cipher *)tfm; +} + +static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm) +{ + BUG_ON(crypto_tfm_alg_type(tfm) != CRYPTO_ALG_TYPE_CIPHER); + return __crypto_cipher_cast(tfm); +} + +/** + * crypto_alloc_cipher() - allocate single block cipher handle + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * single block cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Allocate a cipher handle for a single block cipher. The returned struct + * crypto_cipher is the cipher handle that is required for any subsequent API + * invocation for that single block cipher. + * + * Return: allocated cipher handle in case of success; IS_ERR() is true in case + * of an error, PTR_ERR() returns the error code. + */ +static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name, + u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return __crypto_cipher_cast(crypto_alloc_base(alg_name, type, mask)); +} + +static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm) +{ + return &tfm->base; +} + +/** + * crypto_free_cipher() - zeroize and free the single block cipher handle + * @tfm: cipher handle to be freed + */ +static inline void crypto_free_cipher(struct crypto_cipher *tfm) +{ + crypto_free_tfm(crypto_cipher_tfm(tfm)); +} + +/** + * crypto_has_cipher() - Search for the availability of a single block cipher + * @alg_name: is the cra_name / name or cra_driver_name / driver name of the + * single block cipher + * @type: specifies the type of the cipher + * @mask: specifies the mask for the cipher + * + * Return: true when the single block cipher is known to the kernel crypto API; + * false otherwise + */ +static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask) +{ + type &= ~CRYPTO_ALG_TYPE_MASK; + type |= CRYPTO_ALG_TYPE_CIPHER; + mask |= CRYPTO_ALG_TYPE_MASK; + + return crypto_has_alg(alg_name, type, mask); +} + +static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm) +{ + return &crypto_cipher_tfm(tfm)->crt_cipher; +} + +/** + * crypto_cipher_blocksize() - obtain block size for cipher + * @tfm: cipher handle + * + * The block size for the single block cipher referenced with the cipher handle + * tfm is returned. The caller may use that information to allocate appropriate + * memory for the data returned by the encryption or decryption operation + * + * Return: block size of cipher + */ +static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm)); +} + +static inline unsigned int crypto_cipher_alignmask(struct crypto_cipher *tfm) +{ + return crypto_tfm_alg_alignmask(crypto_cipher_tfm(tfm)); +} + +static inline u32 crypto_cipher_get_flags(struct crypto_cipher *tfm) +{ + return crypto_tfm_get_flags(crypto_cipher_tfm(tfm)); +} + +static inline void crypto_cipher_set_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_set_flags(crypto_cipher_tfm(tfm), flags); +} + +static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm, + u32 flags) +{ + crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags); +} + +/** + * crypto_cipher_setkey() - set key for cipher + * @tfm: cipher handle + * @key: buffer holding the key + * @keylen: length of the key in bytes + * + * The caller provided key is set for the single block cipher referenced by the + * cipher handle. + * + * Note, the key length determines the cipher type. Many block ciphers implement + * different cipher modes depending on the key size, such as AES-128 vs AES-192 + * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128 + * is performed. + * + * Return: 0 if the setting of the key was successful; < 0 if an error occurred + */ +static inline int crypto_cipher_setkey(struct crypto_cipher *tfm, + const u8 *key, unsigned int keylen) +{ + return crypto_cipher_crt(tfm)->cit_setkey(crypto_cipher_tfm(tfm), + key, keylen); +} + +/** + * crypto_cipher_encrypt_one() - encrypt one block of plaintext + * @tfm: cipher handle + * @dst: points to the buffer that will be filled with the ciphertext + * @src: buffer holding the plaintext to be encrypted + * + * Invoke the encryption operation of one block. The caller must ensure that + * the plaintext and ciphertext buffers are at least one block in size. + */ +static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_encrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + +/** + * crypto_cipher_decrypt_one() - decrypt one block of ciphertext + * @tfm: cipher handle + * @dst: points to the buffer that will be filled with the plaintext + * @src: buffer holding the ciphertext to be decrypted + * + * Invoke the decryption operation of one block. The caller must ensure that + * the plaintext and ciphertext buffers are at least one block in size. + */ +static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm, + u8 *dst, const u8 *src) +{ + crypto_cipher_crt(tfm)->cit_decrypt_one(crypto_cipher_tfm(tfm), + dst, src); +} + +#endif /* _LINUX_CRYPTO_H */ + diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h new file mode 100644 index 0000000..8dfcb83 --- /dev/null +++ b/include/linux/cryptohash.h @@ -0,0 +1,20 @@ +#ifndef __CRYPTOHASH_H +#define __CRYPTOHASH_H + +#include <linux/types.h> + +#define SHA_DIGEST_WORDS 5 +#define SHA_MESSAGE_BYTES (512 /*bits*/ / 8) +#define SHA_WORKSPACE_WORDS 16 + +void sha_init(__u32 *buf); +void sha_transform(__u32 *digest, const char *data, __u32 *W); + +#define MD5_DIGEST_WORDS 4 +#define MD5_MESSAGE_BYTES 64 + +void md5_transform(__u32 *hash, __u32 const *in); + +__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]); + +#endif diff --git a/include/linux/ctype.h b/include/linux/ctype.h new file mode 100644 index 0000000..26b7de5 --- /dev/null +++ b/include/linux/ctype.h @@ -0,0 +1,2 @@ + +#include <ctype.h> diff --git a/include/linux/dcache.h b/include/linux/dcache.h new file mode 100644 index 0000000..b569b2c --- /dev/null +++ b/include/linux/dcache.h @@ -0,0 +1,31 @@ +#ifndef __LINUX_DCACHE_H +#define __LINUX_DCACHE_H + +struct super_block; +struct inode; + +/* The hash is always the low bits of hash_len */ +#ifdef __LITTLE_ENDIAN + #define HASH_LEN_DECLARE u32 hash; u32 len +#else + #define HASH_LEN_DECLARE u32 len; u32 hash +#endif + +struct qstr { + union { + struct { + HASH_LEN_DECLARE; + }; + u64 hash_len; + }; + const unsigned char *name; +}; + +#define QSTR_INIT(n,l) { { { .len = l } }, .name = n } + +struct dentry { + struct super_block *d_sb; + struct inode *d_inode; +}; + +#endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h new file mode 100644 index 0000000..4db5b3f --- /dev/null +++ b/include/linux/debugfs.h @@ -0,0 +1,243 @@ +/* + * debugfs.h - a tiny little debug file system + * + * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004 IBM Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * debugfs is for people to use instead of /proc or /sys. + * See Documentation/DocBook/filesystems for more details. + */ + +#ifndef _DEBUGFS_H_ +#define _DEBUGFS_H_ + +#include <linux/fs.h> +#include <linux/seq_file.h> + +#include <linux/types.h> +#include <linux/compiler.h> + +struct device; +struct file_operations; +struct vfsmount; +struct srcu_struct; + +struct debugfs_blob_wrapper { + void *data; + unsigned long size; +}; + +struct debugfs_reg32 { + char *name; + unsigned long offset; +}; + +struct debugfs_regset32 { + const struct debugfs_reg32 *regs; + int nregs; + void __iomem *base; +}; + +extern struct dentry *arch_debugfs_dir; + +extern struct srcu_struct debugfs_srcu; + +#include <linux/err.h> + +static inline struct dentry *debugfs_create_file(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_file_size(const char *name, umode_t mode, + struct dentry *parent, void *data, + const struct file_operations *fops, + loff_t file_size) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_dir(const char *name, + struct dentry *parent) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_symlink(const char *name, + struct dentry *parent, + const char *dest) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_automount(const char *name, + struct dentry *parent, + struct vfsmount *(*f)(void *), + void *data) +{ + return ERR_PTR(-ENODEV); +} + +static inline void debugfs_remove(struct dentry *dentry) +{ } + +static inline void debugfs_remove_recursive(struct dentry *dentry) +{ } + +static inline int debugfs_use_file_start(const struct dentry *dentry, + int *srcu_idx) + __acquires(&debugfs_srcu) +{ + return 0; +} + +static inline void debugfs_use_file_finish(int srcu_idx) + __releases(&debugfs_srcu) +{ } + +#define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ + static const struct file_operations __fops = { 0 } + +static inline struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry, + struct dentry *new_dir, char *new_name) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_u8(const char *name, umode_t mode, + struct dentry *parent, + u8 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_u16(const char *name, umode_t mode, + struct dentry *parent, + u16 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_u32(const char *name, umode_t mode, + struct dentry *parent, + u32 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_u64(const char *name, umode_t mode, + struct dentry *parent, + u64 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_x8(const char *name, umode_t mode, + struct dentry *parent, + u8 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_x16(const char *name, umode_t mode, + struct dentry *parent, + u16 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_x32(const char *name, umode_t mode, + struct dentry *parent, + u32 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_x64(const char *name, umode_t mode, + struct dentry *parent, + u64 *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_size_t(const char *name, umode_t mode, + struct dentry *parent, + size_t *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, + struct dentry *parent, atomic_t *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, + struct dentry *parent, + bool *value) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, + struct dentry *parent, + struct debugfs_blob_wrapper *blob) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_regset32(const char *name, + umode_t mode, struct dentry *parent, + struct debugfs_regset32 *regset) +{ + return ERR_PTR(-ENODEV); +} + +static inline void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs, + int nregs, void __iomem *base, char *prefix) +{ +} + +static inline bool debugfs_initialized(void) +{ + return false; +} + +static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t mode, + struct dentry *parent, + u32 *array, u32 elements) +{ + return ERR_PTR(-ENODEV); +} + +static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev, + const char *name, + struct dentry *parent, + int (*read_fn)(struct seq_file *s, + void *data)) +{ + return ERR_PTR(-ENODEV); +} + +static inline ssize_t debugfs_read_file_bool(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +static inline ssize_t debugfs_write_file_bool(struct file *file, + const char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + +#endif diff --git a/include/linux/device.h b/include/linux/device.h new file mode 100644 index 0000000..2b2b849 --- /dev/null +++ b/include/linux/device.h @@ -0,0 +1,40 @@ +#ifndef _DEVICE_H_ +#define _DEVICE_H_ + +#include <linux/slab.h> +#include <linux/types.h> + +struct module; + +struct class { +}; + +static inline void class_destroy(struct class *class) +{ + kfree(class); +} + +static inline struct class * __must_check class_create(struct module *owner, + const char *name) +{ + return kzalloc(sizeof(struct class), GFP_KERNEL); +} + +struct device { +}; + +static inline void device_unregister(struct device *dev) +{ + kfree(dev); +} + +static inline void device_destroy(struct class *cls, dev_t devt) {} + +static inline struct device *device_create(struct class *cls, struct device *parent, + dev_t devt, void *drvdata, + const char *fmt, ...) +{ + return kzalloc(sizeof(struct device), GFP_KERNEL); +} + +#endif /* _DEVICE_H_ */ diff --git a/include/linux/dynamic_fault.h b/include/linux/dynamic_fault.h new file mode 100644 index 0000000..dd215dc --- /dev/null +++ b/include/linux/dynamic_fault.h @@ -0,0 +1,7 @@ +#ifndef __TOOLS_LINUX_DYNAMIC_FAULT_H +#define __TOOLS_LINUX_DYNAMIC_FAULT_H + +#define dynamic_fault(_class) 0 +#define race_fault() 0 + +#endif /* __TOOLS_LINUX_DYNAMIC_FAULT_H */ diff --git a/include/linux/err.h b/include/linux/err.h new file mode 100644 index 0000000..e94bdff --- /dev/null +++ b/include/linux/err.h @@ -0,0 +1,68 @@ +#ifndef __TOOLS_LINUX_ERR_H +#define __TOOLS_LINUX_ERR_H + +#include <linux/compiler.h> +#include <linux/types.h> + +#include <asm/errno.h> + +/* + * Original kernel header comment: + * + * Kernel pointers have redundant information, so we can use a + * scheme where we can return either an error code or a normal + * pointer with the same return value. + * + * This should be a per-architecture thing, to allow different + * error and pointer decisions. + * + * Userspace note: + * The same principle works for userspace, because 'error' pointers + * fall down to the unused hole far from user space, as described + * in Documentation/x86/x86_64/mm.txt for x86_64 arch: + * + * 0000000000000000 - 00007fffffffffff (=47 bits) user space, different per mm hole caused by [48:63] sign extension + * ffffffffffe00000 - ffffffffffffffff (=2 MB) unused hole + * + * It should be the same case for other architectures, because + * this code is used in generic kernel code. + */ +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error_) +{ + return (void *) error_; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline bool __must_check IS_ERR_OR_NULL(__force const void *ptr) +{ + return unlikely(!ptr) || IS_ERR_VALUE((unsigned long)ptr); +} + +static inline void * __must_check ERR_CAST(__force const void *ptr) +{ + /* cast away the const */ + return (void *) ptr; +} + +static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#endif /* _LINUX_ERR_H */ diff --git a/include/linux/export.h b/include/linux/export.h new file mode 100644 index 0000000..af9da96 --- /dev/null +++ b/include/linux/export.h @@ -0,0 +1,13 @@ +#ifndef _TOOLS_LINUX_EXPORT_H_ +#define _TOOLS_LINUX_EXPORT_H_ + +#define EXPORT_SYMBOL(sym) +#define EXPORT_SYMBOL_GPL(sym) +#define EXPORT_SYMBOL_GPL_FUTURE(sym) +#define EXPORT_UNUSED_SYMBOL(sym) +#define EXPORT_UNUSED_SYMBOL_GPL(sym) + +#define THIS_MODULE ((struct module *)0) +#define KBUILD_MODNAME + +#endif diff --git a/include/linux/freezer.h b/include/linux/freezer.h new file mode 100644 index 0000000..2b76d8c --- /dev/null +++ b/include/linux/freezer.h @@ -0,0 +1,7 @@ +#ifndef __TOOLS_LINUX_FREEZER_H +#define __TOOLS_LINUX_FREEZER_H + +#define try_to_freeze() +#define set_freezable() + +#endif /* __TOOLS_LINUX_FREEZER_H */ diff --git a/include/linux/generic-radix-tree.h b/include/linux/generic-radix-tree.h new file mode 100644 index 0000000..1a951e9 --- /dev/null +++ b/include/linux/generic-radix-tree.h @@ -0,0 +1,137 @@ +#ifndef _LINUX_GENERIC_RADIX_TREE_H +#define _LINUX_GENERIC_RADIX_TREE_H + +/* + * Generic radix trees/sparse arrays: + * + * A generic radix tree has all nodes of size PAGE_SIZE - both leaves and + * interior nodes. + */ + +#include <linux/page.h> +#include <linux/bug.h> +#include <linux/kernel.h> +#include <linux/log2.h> + +struct genradix_node; + +struct __genradix { + struct genradix_node *root; + size_t depth; +}; + +/* + * NOTE: currently, sizeof(_type) must be a power of two and not larger than + * PAGE_SIZE: + */ + +#define __GENRADIX_INITIALIZER \ + { \ + .tree = { \ + .root = NULL, \ + .depth = 0, \ + } \ + } + +/* + * We use a 0 size array to stash the type we're storing without taking any + * space at runtime - then the various accessor macros can use typeof() to get + * to it for casts/sizeof - we also force the alignment so that storing a type + * with a ridiculous alignment doesn't blow up the alignment or size of the + * genradix. + */ + +#define DECLARE_GENRADIX_TYPE(_name, _type) \ +struct _name { \ + struct __genradix tree; \ + _type type[0] __aligned(1); \ +} + +#define DECLARE_GENRADIX(_name, _type) \ +struct { \ + struct __genradix tree; \ + _type type[0] __aligned(1); \ +} _name + +#define DEFINE_GENRADIX(_name, _type) \ + DECLARE_GENRADIX(_name, _type) = __GENRADIX_INITIALIZER + +#define genradix_init(_radix) \ +do { \ + *(_radix) = (typeof(*_radix)) __GENRADIX_INITIALIZER; \ +} while (0) + +void __genradix_free(struct __genradix *); + +#define genradix_free(_radix) __genradix_free(&(_radix)->tree) + +static inline size_t __idx_to_offset(size_t idx, size_t obj_size) +{ + BUILD_BUG_ON(obj_size > PAGE_SIZE); + + if (!is_power_of_2(obj_size)) { + size_t objs_per_page = PAGE_SIZE / obj_size; + + return (idx / objs_per_page) * PAGE_SIZE + + (idx % objs_per_page) * obj_size; + } else { + return idx * obj_size; + } +} + +#define __genradix_cast(_radix) (typeof((_radix)->type[0]) *) +#define __genradix_obj_size(_radix) sizeof((_radix)->type[0]) +#define __genradix_idx_to_offset(_radix, _idx) \ + __idx_to_offset(_idx, __genradix_obj_size(_radix)) + +void *__genradix_ptr(struct __genradix *, size_t); + +/* Returns a pointer to element at @_idx */ +#define genradix_ptr(_radix, _idx) \ + (__genradix_cast(_radix) \ + __genradix_ptr(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx))) + +void *__genradix_ptr_alloc(struct __genradix *, size_t, gfp_t); + +/* Returns a pointer to element at @_idx, allocating it if necessary */ +#define genradix_ptr_alloc(_radix, _idx, _gfp) \ + (__genradix_cast(_radix) \ + __genradix_ptr_alloc(&(_radix)->tree, \ + __genradix_idx_to_offset(_radix, _idx), \ + _gfp)) + +struct genradix_iter { + size_t offset; + size_t pos; +}; + +static inline void genradix_iter_init(struct genradix_iter *iter) +{ + iter->offset = 0; + iter->pos = 0; +} + +void *__genradix_iter_peek(struct genradix_iter *, struct __genradix *, size_t); + +#define genradix_iter_peek(_iter, _radix) \ + (__genradix_cast(_radix) \ + __genradix_iter_peek(_iter, &(_radix)->tree, \ + PAGE_SIZE / __genradix_obj_size(_radix))) + +static inline void __genradix_iter_advance(struct genradix_iter *iter, + size_t obj_size) +{ + iter->offset += obj_size; + + if (!is_power_of_2(obj_size) && + (iter->offset & (PAGE_SIZE - 1)) + obj_size > PAGE_SIZE) + iter->offset = round_up(iter->offset, PAGE_SIZE); + + iter->pos++; +} + +#define genradix_iter_advance(_iter, _radix) \ + __genradix_iter_advance(_iter, __genradix_obj_size(_radix)) + +#endif /* _LINUX_GENERIC_RADIX_TREE_H */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/include/linux/genhd.h diff --git a/include/linux/gfp.h b/include/linux/gfp.h new file mode 100644 index 0000000..3830bc2 --- /dev/null +++ b/include/linux/gfp.h @@ -0,0 +1 @@ +#include <linux/slab.h> diff --git a/include/linux/hash.h b/include/linux/hash.h new file mode 100644 index 0000000..ad6fa21 --- /dev/null +++ b/include/linux/hash.h @@ -0,0 +1,104 @@ +#ifndef _LINUX_HASH_H +#define _LINUX_HASH_H +/* Fast hashing routine for ints, longs and pointers. + (C) 2002 Nadia Yvette Chambers, IBM */ + +#include <asm/types.h> +#include <linux/compiler.h> + +/* + * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and + * fs/inode.c. It's not actually prime any more (the previous primes + * were actively bad for hashing), but the name remains. + */ +#if BITS_PER_LONG == 32 +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32 +#define hash_long(val, bits) hash_32(val, bits) +#elif BITS_PER_LONG == 64 +#define hash_long(val, bits) hash_64(val, bits) +#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64 +#else +#error Wordsize not 32 or 64 +#endif + +/* + * This hash multiplies the input by a large odd number and takes the + * high bits. Since multiplication propagates changes to the most + * significant end only, it is essential that the high bits of the + * product be used for the hash value. + * + * Chuck Lever verified the effectiveness of this technique: + * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf + * + * Although a random odd number will do, it turns out that the golden + * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice + * properties. (See Knuth vol 3, section 6.4, exercise 9.) + * + * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2, + * which is very slightly easier to multiply by and makes no + * difference to the hash distribution. + */ +#define GOLDEN_RATIO_32 0x61C88647 +#define GOLDEN_RATIO_64 0x61C8864680B583EBull + +#ifdef CONFIG_HAVE_ARCH_HASH +/* This header may use the GOLDEN_RATIO_xx constants */ +#include <asm/hash.h> +#endif + +/* + * The _generic versions exist only so lib/test_hash.c can compare + * the arch-optimized versions with the generic. + * + * Note that if you change these, any <asm/hash.h> that aren't updated + * to match need to have their HAVE_ARCH_* define values updated so the + * self-test will not false-positive. + */ +#ifndef HAVE_ARCH__HASH_32 +#define __hash_32 __hash_32_generic +#endif +static inline u32 __hash_32_generic(u32 val) +{ + return val * GOLDEN_RATIO_32; +} + +#ifndef HAVE_ARCH_HASH_32 +#define hash_32 hash_32_generic +#endif +static inline u32 hash_32_generic(u32 val, unsigned int bits) +{ + /* High bits are more random, so use them. */ + return __hash_32(val) >> (32 - bits); +} + +#ifndef HAVE_ARCH_HASH_64 +#define hash_64 hash_64_generic +#endif +static __always_inline u32 hash_64_generic(u64 val, unsigned int bits) +{ +#if BITS_PER_LONG == 64 + /* 64x64-bit multiply is efficient on all 64-bit processors */ + return val * GOLDEN_RATIO_64 >> (64 - bits); +#else + /* Hash 64 bits using only 32x32-bit multiply. */ + return hash_32((u32)val ^ __hash_32(val >> 32), bits); +#endif +} + +static inline u32 hash_ptr(const void *ptr, unsigned int bits) +{ + return hash_long((unsigned long)ptr, bits); +} + +/* This really should be called fold32_ptr; it does no hashing to speak of. */ +static inline u32 hash32_ptr(const void *ptr) +{ + unsigned long val = (unsigned long)ptr; + +#if BITS_PER_LONG == 64 + val ^= (val >> 32); +#endif + return (u32)val; +} + +#endif /* _LINUX_HASH_H */ diff --git a/include/linux/idr.h b/include/linux/idr.h new file mode 100644 index 0000000..6f92825 --- /dev/null +++ b/include/linux/idr.h @@ -0,0 +1,208 @@ +/* + * include/linux/idr.h + * + * 2002-10-18 written by Jim Houston jim.houston@ccur.com + * Copyright (C) 2002 by Concurrent Computer Corporation + * Distributed under the GNU GPL license version 2. + * + * Small id to pointer translation service avoiding fixed sized + * tables. + */ + +#ifndef __IDR_H__ +#define __IDR_H__ + +#include <linux/types.h> +#include <linux/bitmap.h> +#include <linux/bitops.h> +#include <linux/preempt.h> +#include <linux/rcupdate.h> +#include <linux/spinlock.h> + +/* + * We want shallower trees and thus more bits covered at each layer. 8 + * bits gives us large enough first layer for most use cases and maximum + * tree depth of 4. Each idr_layer is slightly larger than 2k on 64bit and + * 1k on 32bit. + */ +#define IDR_BITS 8 +#define IDR_SIZE (1 << IDR_BITS) +#define IDR_MASK ((1 << IDR_BITS)-1) + +struct idr_layer { + int prefix; /* the ID prefix of this idr_layer */ + int layer; /* distance from leaf */ + struct idr_layer __rcu *ary[1<<IDR_BITS]; + int count; /* When zero, we can release it */ + union { + /* A zero bit means "space here" */ + DECLARE_BITMAP(bitmap, IDR_SIZE); + struct rcu_head rcu_head; + }; +}; + +struct idr { + struct idr_layer __rcu *hint; /* the last layer allocated from */ + struct idr_layer __rcu *top; + int layers; /* only valid w/o concurrent changes */ + int cur; /* current pos for cyclic allocation */ + spinlock_t lock; + int id_free_cnt; + struct idr_layer *id_free; +}; + +#define IDR_INIT(name) \ +{ \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ +} +#define DEFINE_IDR(name) struct idr name = IDR_INIT(name) + +/** + * DOC: idr sync + * idr synchronization (stolen from radix-tree.h) + * + * idr_find() is able to be called locklessly, using RCU. The caller must + * ensure calls to this function are made within rcu_read_lock() regions. + * Other readers (lock-free or otherwise) and modifications may be running + * concurrently. + * + * It is still required that the caller manage the synchronization and + * lifetimes of the items. So if RCU lock-free lookups are used, typically + * this would mean that the items have their own locks, or are amenable to + * lock-free access; and that the items are freed by RCU (or only freed after + * having been deleted from the idr tree *and* a synchronize_rcu() grace + * period). + */ + +/* + * This is what we export. + */ + +void *idr_find_slowpath(struct idr *idp, int id); +void idr_preload(gfp_t gfp_mask); + +static inline int idr_alloc(struct idr *idp, void *ptr, int start, int end, gfp_t gfp_mask) +{ + return 0; +} + +static inline void idr_remove(struct idr *idp, int id) {} + +int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp_mask); +int idr_for_each(struct idr *idp, + int (*fn)(int id, void *p, void *data), void *data); +void *idr_get_next(struct idr *idp, int *nextid); +void *idr_replace(struct idr *idp, void *ptr, int id); +void idr_destroy(struct idr *idp); +void idr_init(struct idr *idp); +bool idr_is_empty(struct idr *idp); + +/** + * idr_preload_end - end preload section started with idr_preload() + * + * Each idr_preload() should be matched with an invocation of this + * function. See idr_preload() for details. + */ +static inline void idr_preload_end(void) +{ + preempt_enable(); +} + +/** + * idr_find - return pointer for given id + * @idr: idr handle + * @id: lookup key + * + * Return the pointer given the id it has been registered with. A %NULL + * return indicates that @id is not valid or you passed %NULL in + * idr_get_new(). + * + * This function can be called under rcu_read_lock(), given that the leaf + * pointers lifetimes are correctly managed. + */ +static inline void *idr_find(struct idr *idr, int id) +{ + struct idr_layer *hint = rcu_dereference_raw(idr->hint); + + if (hint && (id & ~IDR_MASK) == hint->prefix) + return rcu_dereference_raw(hint->ary[id & IDR_MASK]); + + return idr_find_slowpath(idr, id); +} + +/** + * idr_for_each_entry - iterate over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + * + * @entry and @id do not need to be initialized before the loop, and + * after normal terminatinon @entry is left with the value NULL. This + * is convenient for a "not found" value. + */ +#define idr_for_each_entry(idp, entry, id) \ + for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) + +/** + * idr_for_each_entry - continue iteration over an idr's elements of a given type + * @idp: idr handle + * @entry: the type * to use as cursor + * @id: id entry's key + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define idr_for_each_entry_continue(idp, entry, id) \ + for ((entry) = idr_get_next((idp), &(id)); \ + entry; \ + ++id, (entry) = idr_get_next((idp), &(id))) + +/* + * IDA - IDR based id allocator, use when translation from id to + * pointer isn't necessary. + * + * IDA_BITMAP_LONGS is calculated to be one less to accommodate + * ida_bitmap->nr_busy so that the whole struct fits in 128 bytes. + */ +#define IDA_CHUNK_SIZE 128 /* 128 bytes per chunk */ +#define IDA_BITMAP_LONGS (IDA_CHUNK_SIZE / sizeof(long) - 1) +#define IDA_BITMAP_BITS (IDA_BITMAP_LONGS * sizeof(long) * 8) + +struct ida_bitmap { + long nr_busy; + unsigned long bitmap[IDA_BITMAP_LONGS]; +}; + +struct ida { + struct idr idr; + struct ida_bitmap *free_bitmap; +}; + +#define IDA_INIT(name) { .idr = IDR_INIT((name).idr), .free_bitmap = NULL, } +#define DEFINE_IDA(name) struct ida name = IDA_INIT(name) + +int ida_pre_get(struct ida *ida, gfp_t gfp_mask); +int ida_get_new_above(struct ida *ida, int starting_id, int *p_id); +void ida_remove(struct ida *ida, int id); +void ida_destroy(struct ida *ida); +void ida_init(struct ida *ida); + +int ida_simple_get(struct ida *ida, unsigned int start, unsigned int end, + gfp_t gfp_mask); +void ida_simple_remove(struct ida *ida, unsigned int id); + +/** + * ida_get_new - allocate new ID + * @ida: idr handle + * @p_id: pointer to the allocated handle + * + * Simple wrapper around ida_get_new_above() w/ @starting_id of zero. + */ +static inline int ida_get_new(struct ida *ida, int *p_id) +{ + return ida_get_new_above(ida, 0, p_id); +} + +void __init idr_init_cache(void); + +#endif /* __IDR_H__ */ diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h new file mode 100644 index 0000000..822c64a --- /dev/null +++ b/include/linux/ioprio.h @@ -0,0 +1,46 @@ +#ifndef IOPRIO_H +#define IOPRIO_H + +/* + * Gives us 8 prio classes with 13-bits of data for each class + */ +#define IOPRIO_BITS (16) +#define IOPRIO_CLASS_SHIFT (13) +#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) + +#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) +#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) +#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) + +#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) + +/* + * These are the io priority groups as implemented by CFQ. RT is the realtime + * class, it always gets premium service. BE is the best-effort scheduling + * class, the default for any process. IDLE is the idle scheduling class, it + * is only served when no one else is using the disk. + */ +enum { + IOPRIO_CLASS_NONE, + IOPRIO_CLASS_RT, + IOPRIO_CLASS_BE, + IOPRIO_CLASS_IDLE, +}; + +/* + * 8 best effort priority levels are supported + */ +#define IOPRIO_BE_NR (8) + +enum { + IOPRIO_WHO_PROCESS = 1, + IOPRIO_WHO_PGRP, + IOPRIO_WHO_USER, +}; + +/* + * Fallback BE priority + */ +#define IOPRIO_NORM (4) + +#endif diff --git a/include/linux/jhash.h b/include/linux/jhash.h new file mode 100644 index 0000000..348c6f4 --- /dev/null +++ b/include/linux/jhash.h @@ -0,0 +1,175 @@ +#ifndef _LINUX_JHASH_H +#define _LINUX_JHASH_H + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu) + * + * I've modified Bob's hash to be useful in the Linux kernel, and + * any bugs present are my fault. + * Jozsef + */ +#include <linux/bitops.h> +#include <linux/unaligned/packed_struct.h> + +/* Best hash sizes are of power of two */ +#define jhash_size(n) ((u32)1<<(n)) +/* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */ +#define jhash_mask(n) (jhash_size(n)-1) + +/* __jhash_mix -- mix 3 32-bit values reversibly. */ +#define __jhash_mix(a, b, c) \ +{ \ + a -= c; a ^= rol32(c, 4); c += b; \ + b -= a; b ^= rol32(a, 6); a += c; \ + c -= b; c ^= rol32(b, 8); b += a; \ + a -= c; a ^= rol32(c, 16); c += b; \ + b -= a; b ^= rol32(a, 19); a += c; \ + c -= b; c ^= rol32(b, 4); b += a; \ +} + +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */ +#define __jhash_final(a, b, c) \ +{ \ + c ^= b; c -= rol32(b, 14); \ + a ^= c; a -= rol32(c, 11); \ + b ^= a; b -= rol32(a, 25); \ + c ^= b; c -= rol32(b, 16); \ + a ^= c; a -= rol32(c, 4); \ + b ^= a; b -= rol32(a, 14); \ + c ^= b; c -= rol32(b, 24); \ +} + +/* An arbitrary initial parameter */ +#define JHASH_INITVAL 0xdeadbeef + +/* jhash - hash an arbitrary key + * @k: sequence of bytes as key + * @length: the length of the key + * @initval: the previous hash, or an arbitray value + * + * The generic version, hashes an arbitrary sequence of bytes. + * No alignment or length assumptions are made about the input key. + * + * Returns the hash value of the key. The result depends on endianness. + */ +static inline u32 jhash(const void *key, u32 length, u32 initval) +{ + u32 a, b, c; + const u8 *k = key; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + length + initval; + + /* All but the last block: affect some 32 bits of (a,b,c) */ + while (length > 12) { + a += __get_unaligned_cpu32(k); + b += __get_unaligned_cpu32(k + 4); + c += __get_unaligned_cpu32(k + 8); + __jhash_mix(a, b, c); + length -= 12; + k += 12; + } + /* Last block: affect all 32 bits of (c) */ + /* All the case statements fall through */ + switch (length) { + case 12: c += (u32)k[11]<<24; + case 11: c += (u32)k[10]<<16; + case 10: c += (u32)k[9]<<8; + case 9: c += k[8]; + case 8: b += (u32)k[7]<<24; + case 7: b += (u32)k[6]<<16; + case 6: b += (u32)k[5]<<8; + case 5: b += k[4]; + case 4: a += (u32)k[3]<<24; + case 3: a += (u32)k[2]<<16; + case 2: a += (u32)k[1]<<8; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + +/* jhash2 - hash an array of u32's + * @k: the key which must be an array of u32's + * @length: the number of u32's in the key + * @initval: the previous hash, or an arbitray value + * + * Returns the hash value of the key. + */ +static inline u32 jhash2(const u32 *k, u32 length, u32 initval) +{ + u32 a, b, c; + + /* Set up the internal state */ + a = b = c = JHASH_INITVAL + (length<<2) + initval; + + /* Handle most of the key */ + while (length > 3) { + a += k[0]; + b += k[1]; + c += k[2]; + __jhash_mix(a, b, c); + length -= 3; + k += 3; + } + + /* Handle the last 3 u32's: all the case statements fall through */ + switch (length) { + case 3: c += k[2]; + case 2: b += k[1]; + case 1: a += k[0]; + __jhash_final(a, b, c); + case 0: /* Nothing left to add */ + break; + } + + return c; +} + + +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */ +static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +{ + a += initval; + b += initval; + c += initval; + + __jhash_final(a, b, c); + + return c; +} + +static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) +{ + return __jhash_nwords(a, b, c, initval + JHASH_INITVAL + (3 << 2)); +} + +static inline u32 jhash_2words(u32 a, u32 b, u32 initval) +{ + return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); +} + +static inline u32 jhash_1word(u32 a, u32 initval) +{ + return __jhash_nwords(a, 0, 0, initval + JHASH_INITVAL + (1 << 2)); +} + +#endif /* _LINUX_JHASH_H */ diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h new file mode 100644 index 0000000..00abaee --- /dev/null +++ b/include/linux/jiffies.h @@ -0,0 +1,451 @@ +#ifndef _LINUX_JIFFIES_H +#define _LINUX_JIFFIES_H + +#include <linux/kernel.h> +#include <linux/time64.h> +#include <linux/typecheck.h> +#include <linux/types.h> + +#define HZ 100 + +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + +/* + * The following defines establish the engineering parameters of the PLL + * model. The HZ variable establishes the timer interrupt frequency, 100 Hz + * for the SunOS kernel, 256 Hz for the Ultrix kernel and 1024 Hz for the + * OSF/1 kernel. The SHIFT_HZ define expresses the same value as the + * nearest power of two in order to avoid hardware multiply operations. + */ +#if HZ >= 12 && HZ < 24 +# define SHIFT_HZ 4 +#elif HZ >= 24 && HZ < 48 +# define SHIFT_HZ 5 +#elif HZ >= 48 && HZ < 96 +# define SHIFT_HZ 6 +#elif HZ >= 96 && HZ < 192 +# define SHIFT_HZ 7 +#elif HZ >= 192 && HZ < 384 +# define SHIFT_HZ 8 +#elif HZ >= 384 && HZ < 768 +# define SHIFT_HZ 9 +#elif HZ >= 768 && HZ < 1536 +# define SHIFT_HZ 10 +#elif HZ >= 1536 && HZ < 3072 +# define SHIFT_HZ 11 +#elif HZ >= 3072 && HZ < 6144 +# define SHIFT_HZ 12 +#elif HZ >= 6144 && HZ < 12288 +# define SHIFT_HZ 13 +#else +# error Invalid value of HZ. +#endif + +/* Suppose we want to divide two numbers NOM and DEN: NOM/DEN, then we can + * improve accuracy by shifting LSH bits, hence calculating: + * (NOM << LSH) / DEN + * This however means trouble for large NOM, because (NOM << LSH) may no + * longer fit in 32 bits. The following way of calculating this gives us + * some slack, under the following conditions: + * - (NOM / DEN) fits in (32 - LSH) bits. + * - (NOM % DEN) fits in (32 - LSH) bits. + */ +#define SH_DIV(NOM,DEN,LSH) ( (((NOM) / (DEN)) << (LSH)) \ + + ((((NOM) % (DEN)) << (LSH)) + (DEN) / 2) / (DEN)) + +/* LATCH is used in the interval timer and ftape setup. */ +#define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ + +extern int register_refined_jiffies(long clock_tick_rate); + +/* TICK_NSEC is the time between ticks in nsec assuming SHIFTED_HZ */ +#define TICK_NSEC ((NSEC_PER_SEC+HZ/2)/HZ) + +/* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ +#define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) + +static inline u64 local_clock(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return ((s64) ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec; +} + +extern unsigned long clock_t_to_jiffies(unsigned long x); +extern u64 jiffies_64_to_clock_t(u64 x); +extern u64 nsec_to_clock_t(u64 x); +extern u64 nsecs_to_jiffies64(u64 n); +extern unsigned long nsecs_to_jiffies(u64 n); + +static inline u64 get_jiffies_64(void) +{ + return nsecs_to_jiffies64(local_clock()); +} + +#define jiffies_64 get_jiffies_64() +#define jiffies ((unsigned long) get_jiffies_64()) + +/* + * These inlines deal with timer wrapping correctly. You are + * strongly encouraged to use them + * 1. Because people otherwise forget + * 2. Because if the timer wrap changes in future you won't have to + * alter your driver code. + * + * time_after(a,b) returns true if the time a is after time b. + * + * Do this with "<0" and ">=0" to only test the sign of the result. A + * good compiler would generate better code (and a really good compiler + * wouldn't care). Gcc is currently neither. + */ +#define time_after(a,b) \ + (typecheck(unsigned long, a) && \ + typecheck(unsigned long, b) && \ + ((long)((b) - (a)) < 0)) +#define time_before(a,b) time_after(b,a) + +#define time_after_eq(a,b) \ + (typecheck(unsigned long, a) && \ + typecheck(unsigned long, b) && \ + ((long)((a) - (b)) >= 0)) +#define time_before_eq(a,b) time_after_eq(b,a) + +/* + * Calculate whether a is in the range of [b, c]. + */ +#define time_in_range(a,b,c) \ + (time_after_eq(a,b) && \ + time_before_eq(a,c)) + +/* + * Calculate whether a is in the range of [b, c). + */ +#define time_in_range_open(a,b,c) \ + (time_after_eq(a,b) && \ + time_before(a,c)) + +/* Same as above, but does so with platform independent 64bit types. + * These must be used when utilizing jiffies_64 (i.e. return value of + * get_jiffies_64() */ +#define time_after64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)((b) - (a)) < 0)) +#define time_before64(a,b) time_after64(b,a) + +#define time_after_eq64(a,b) \ + (typecheck(__u64, a) && \ + typecheck(__u64, b) && \ + ((__s64)((a) - (b)) >= 0)) +#define time_before_eq64(a,b) time_after_eq64(b,a) + +#define time_in_range64(a, b, c) \ + (time_after_eq64(a, b) && \ + time_before_eq64(a, c)) + +/* + * These four macros compare jiffies and 'a' for convenience. + */ + +/* time_is_before_jiffies(a) return true if a is before jiffies */ +#define time_is_before_jiffies(a) time_after(jiffies, a) + +/* time_is_after_jiffies(a) return true if a is after jiffies */ +#define time_is_after_jiffies(a) time_before(jiffies, a) + +/* time_is_before_eq_jiffies(a) return true if a is before or equal to jiffies*/ +#define time_is_before_eq_jiffies(a) time_after_eq(jiffies, a) + +/* time_is_after_eq_jiffies(a) return true if a is after or equal to jiffies*/ +#define time_is_after_eq_jiffies(a) time_before_eq(jiffies, a) + +/* + * Have the 32 bit jiffies value wrap 5 minutes after boot + * so jiffies wrap bugs show up earlier. + */ +#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) + +/* + * Change timeval to jiffies, trying to avoid the + * most obvious overflows.. + * + * And some not so obvious. + * + * Note that we don't want to return LONG_MAX, because + * for various timeout reasons we often end up having + * to wait "jiffies+1" in order to guarantee that we wait + * at _least_ "jiffies" - so "jiffies+1" had better still + * be positive. + */ +#define MAX_JIFFY_OFFSET ((LONG_MAX >> 1)-1) + +extern unsigned long preset_lpj; + +/* + * We want to do realistic conversions of time so we need to use the same + * values the update wall clock code uses as the jiffies size. This value + * is: TICK_NSEC (which is defined in timex.h). This + * is a constant and is in nanoseconds. We will use scaled math + * with a set of scales defined here as SEC_JIFFIE_SC, USEC_JIFFIE_SC and + * NSEC_JIFFIE_SC. Note that these defines contain nothing but + * constants and so are computed at compile time. SHIFT_HZ (computed in + * timex.h) adjusts the scaling for different HZ values. + + * Scaled math??? What is that? + * + * Scaled math is a way to do integer math on values that would, + * otherwise, either overflow, underflow, or cause undesired div + * instructions to appear in the execution path. In short, we "scale" + * up the operands so they take more bits (more precision, less + * underflow), do the desired operation and then "scale" the result back + * by the same amount. If we do the scaling by shifting we avoid the + * costly mpy and the dastardly div instructions. + + * Suppose, for example, we want to convert from seconds to jiffies + * where jiffies is defined in nanoseconds as NSEC_PER_JIFFIE. The + * simple math is: jiff = (sec * NSEC_PER_SEC) / NSEC_PER_JIFFIE; We + * observe that (NSEC_PER_SEC / NSEC_PER_JIFFIE) is a constant which we + * might calculate at compile time, however, the result will only have + * about 3-4 bits of precision (less for smaller values of HZ). + * + * So, we scale as follows: + * jiff = (sec) * (NSEC_PER_SEC / NSEC_PER_JIFFIE); + * jiff = ((sec) * ((NSEC_PER_SEC * SCALE)/ NSEC_PER_JIFFIE)) / SCALE; + * Then we make SCALE a power of two so: + * jiff = ((sec) * ((NSEC_PER_SEC << SCALE)/ NSEC_PER_JIFFIE)) >> SCALE; + * Now we define: + * #define SEC_CONV = ((NSEC_PER_SEC << SCALE)/ NSEC_PER_JIFFIE)) + * jiff = (sec * SEC_CONV) >> SCALE; + * + * Often the math we use will expand beyond 32-bits so we tell C how to + * do this and pass the 64-bit result of the mpy through the ">> SCALE" + * which should take the result back to 32-bits. We want this expansion + * to capture as much precision as possible. At the same time we don't + * want to overflow so we pick the SCALE to avoid this. In this file, + * that means using a different scale for each range of HZ values (as + * defined in timex.h). + * + * For those who want to know, gcc will give a 64-bit result from a "*" + * operator if the result is a long long AND at least one of the + * operands is cast to long long (usually just prior to the "*" so as + * not to confuse it into thinking it really has a 64-bit operand, + * which, buy the way, it can do, but it takes more code and at least 2 + * mpys). + + * We also need to be aware that one second in nanoseconds is only a + * couple of bits away from overflowing a 32-bit word, so we MUST use + * 64-bits to get the full range time in nanoseconds. + + */ + +/* + * Here are the scales we will use. One for seconds, nanoseconds and + * microseconds. + * + * Within the limits of cpp we do a rough cut at the SEC_JIFFIE_SC and + * check if the sign bit is set. If not, we bump the shift count by 1. + * (Gets an extra bit of precision where we can use it.) + * We know it is set for HZ = 1024 and HZ = 100 not for 1000. + * Haven't tested others. + + * Limits of cpp (for #if expressions) only long (no long long), but + * then we only need the most signicant bit. + */ + +#define SEC_JIFFIE_SC (31 - SHIFT_HZ) +#if !((((NSEC_PER_SEC << 2) / TICK_NSEC) << (SEC_JIFFIE_SC - 2)) & 0x80000000) +#undef SEC_JIFFIE_SC +#define SEC_JIFFIE_SC (32 - SHIFT_HZ) +#endif +#define NSEC_JIFFIE_SC (SEC_JIFFIE_SC + 29) +#define SEC_CONVERSION ((unsigned long)((((u64)NSEC_PER_SEC << SEC_JIFFIE_SC) +\ + TICK_NSEC -1) / (u64)TICK_NSEC)) + +#define NSEC_CONVERSION ((unsigned long)((((u64)1 << NSEC_JIFFIE_SC) +\ + TICK_NSEC -1) / (u64)TICK_NSEC)) +/* + * The maximum jiffie value is (MAX_INT >> 1). Here we translate that + * into seconds. The 64-bit case will overflow if we are not careful, + * so use the messy SH_DIV macro to do it. Still all constants. + */ +#if BITS_PER_LONG < 64 +# define MAX_SEC_IN_JIFFIES \ + (long)((u64)((u64)MAX_JIFFY_OFFSET * TICK_NSEC) / NSEC_PER_SEC) +#else /* take care of overflow on 64 bits machines */ +# define MAX_SEC_IN_JIFFIES \ + (SH_DIV((MAX_JIFFY_OFFSET >> SEC_JIFFIE_SC) * TICK_NSEC, NSEC_PER_SEC, 1) - 1) + +#endif + +/* + * Convert various time units to each other: + */ +extern unsigned int jiffies_to_msecs(const unsigned long j); +extern unsigned int jiffies_to_usecs(const unsigned long j); + +static inline u64 jiffies_to_nsecs(const unsigned long j) +{ + return (u64)jiffies_to_usecs(j) * NSEC_PER_USEC; +} + +extern unsigned long __msecs_to_jiffies(const unsigned int m); +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) +/* + * HZ is equal to or smaller than 1000, and 1000 is a nice round + * multiple of HZ, divide with the factor between them, but round + * upwards: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +} +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) +/* + * HZ is larger than 1000, and HZ is a nice round multiple of 1000 - + * simply multiply with the factor between them. + * + * But first make sure the multiplication result cannot overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return m * (HZ / MSEC_PER_SEC); +} +#else +/* + * Generic case - multiply, round and divide. But first check that if + * we are doing a net multiplication, that we wouldn't overflow: + */ +static inline unsigned long _msecs_to_jiffies(const unsigned int m) +{ + if (HZ > MSEC_PER_SEC && m > jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + + return (MSEC_TO_HZ_MUL32 * m + MSEC_TO_HZ_ADJ32) >> MSEC_TO_HZ_SHR32; +} +#endif +/** + * msecs_to_jiffies: - convert milliseconds to jiffies + * @m: time in milliseconds + * + * conversion is done as follows: + * + * - negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET) + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows. + * for the details see __msecs_to_jiffies() + * + * msecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __msecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _msecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ +static __always_inline unsigned long msecs_to_jiffies(const unsigned int m) +{ + if (__builtin_constant_p(m)) { + if ((int)m < 0) + return MAX_JIFFY_OFFSET; + return _msecs_to_jiffies(m); + } else { + return __msecs_to_jiffies(m); + } +} + +extern unsigned long __usecs_to_jiffies(const unsigned int u); +#if !(USEC_PER_SEC % HZ) +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (u + (USEC_PER_SEC / HZ) - 1) / (USEC_PER_SEC / HZ); +} +#else +static inline unsigned long _usecs_to_jiffies(const unsigned int u) +{ + return (USEC_TO_HZ_MUL32 * u + USEC_TO_HZ_ADJ32) + >> USEC_TO_HZ_SHR32; +} +#endif + +/** + * usecs_to_jiffies: - convert microseconds to jiffies + * @u: time in microseconds + * + * conversion is done as follows: + * + * - 'too large' values [that would result in larger than + * MAX_JIFFY_OFFSET values] mean 'infinite timeout' too. + * + * - all other values are converted to jiffies by either multiplying + * the input value by a factor or dividing it with a factor and + * handling any 32-bit overflows as for msecs_to_jiffies. + * + * usecs_to_jiffies() checks for the passed in value being a constant + * via __builtin_constant_p() allowing gcc to eliminate most of the + * code, __usecs_to_jiffies() is called if the value passed does not + * allow constant folding and the actual conversion must be done at + * runtime. + * the HZ range specific helpers _usecs_to_jiffies() are called both + * directly here and from __msecs_to_jiffies() in the case where + * constant folding is not possible. + */ +static __always_inline unsigned long usecs_to_jiffies(const unsigned int u) +{ + if (__builtin_constant_p(u)) { + if (u > jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; + return _usecs_to_jiffies(u); + } else { + return __usecs_to_jiffies(u); + } +} + +extern unsigned long timespec64_to_jiffies(const struct timespec64 *value); + +extern void jiffies_to_timespec64(const unsigned long, + struct timespec64 *value); +static inline unsigned long timespec_to_jiffies(const struct timespec *value) +{ + struct timespec64 ts = timespec_to_timespec64(*value); + + return timespec64_to_jiffies(&ts); +} + +static inline void jiffies_to_timespec(const unsigned long j, + struct timespec *value) +{ + struct timespec64 ts; + + jiffies_to_timespec64(j, &ts); + *value = timespec64_to_timespec(ts); +} + +extern unsigned long timeval_to_jiffies(const struct timeval *value); +extern void jiffies_to_timeval(const unsigned long j, + struct timeval *value); + +extern clock_t jiffies_to_clock_t(unsigned long x); +static inline clock_t jiffies_delta_to_clock_t(long delta) +{ + return jiffies_to_clock_t(max(0L, delta)); +} + +#define TIMESTAMP_SIZE 30 + +#endif diff --git a/include/linux/kernel.h b/include/linux/kernel.h new file mode 100644 index 0000000..e4ffa86 --- /dev/null +++ b/include/linux/kernel.h @@ -0,0 +1,211 @@ +#ifndef __TOOLS_LINUX_KERNEL_H +#define __TOOLS_LINUX_KERNEL_H + +#include <assert.h> +#include <stdarg.h> +#include <stddef.h> +#include <stdio.h> +#include <limits.h> + +#include <linux/bug.h> +#include <linux/compiler.h> + +#define IS_ENABLED(opt) 0 +#define EXPORT_SYMBOL(sym) + +#define U8_MAX ((u8)~0U) +#define S8_MAX ((s8)(U8_MAX>>1)) +#define S8_MIN ((s8)(-S8_MAX - 1)) +#define U16_MAX ((u16)~0U) +#define S16_MAX ((s16)(U16_MAX>>1)) +#define S16_MIN ((s16)(-S16_MAX - 1)) +#define U32_MAX ((u32)~0U) +#define S32_MAX ((s32)(U32_MAX>>1)) +#define S32_MIN ((s32)(-S32_MAX - 1)) +#define U64_MAX ((u64)~0ULL) +#define S64_MAX ((s64)(U64_MAX>>1)) +#define S64_MIN ((s64)(-S64_MAX - 1)) + +#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1) +#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask)) + +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) + +#define __must_be_array(a) BUILD_BUG_ON_ZERO(__same_type((a), &(a)[0])) +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr)) + +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) + +#define mult_frac(x, numer, denom)( \ +{ \ + typeof(x) quot = (x) / (denom); \ + typeof(x) rem = (x) % (denom); \ + (quot * (numer)) + ((rem * (numer)) / (denom)); \ +} \ +) + +#ifndef offsetof +#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER) +#endif + +#ifndef container_of +/** + * container_of - cast a member of a structure out to the containing structure + * @ptr: the pointer to the member. + * @type: the type of the container struct this is embedded in. + * @member: the name of the member within the struct. + * + */ +#define container_of(ptr, type, member) ({ \ + const typeof(((type *)0)->member) * __mptr = (ptr); \ + (type *)((char *)__mptr - offsetof(type, member)); }) +#endif + +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + +#define roundup(x, y) \ +({ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +}) + +#define max(x, y) ({ \ + typeof(x) _max1 = (x); \ + typeof(y) _max2 = (y); \ + (void) (&_max1 == &_max2); \ + _max1 > _max2 ? _max1 : _max2; }) + +#define min(x, y) ({ \ + typeof(x) _min1 = (x); \ + typeof(y) _min2 = (y); \ + (void) (&_min1 == &_min2); \ + _min1 < _min2 ? _min1 : _min2; }) + +#define min_t(type, x, y) ({ \ + type __min1 = (x); \ + type __min2 = (y); \ + __min1 < __min2 ? __min1: __min2; }) + +#define max_t(type, x, y) ({ \ + type __max1 = (x); \ + type __max2 = (y); \ + __max1 > __max2 ? __max1: __max2; }) + +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) + +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#define _RET_IP_ (unsigned long)__builtin_return_address(0) +#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) + +#define might_sleep() + +#define NR_CPUS 32 + +#define cpu_relax() do {} while (0) +#define cpu_relax_lowlatency() do {} while (0) + +__printf(1, 2) +static inline void panic(const char *fmt, ...) +{ + va_list args; + + va_start(args, fmt); + vprintf(fmt, args); + va_end(args); + + BUG(); +} + +unsigned long simple_strtoul(const char *,char **,unsigned int); +long simple_strtol(const char *,char **,unsigned int); +unsigned long long simple_strtoull(const char *,char **,unsigned int); +long long simple_strtoll(const char *,char **,unsigned int); + +int __must_check _kstrtoul(const char *s, unsigned int base, unsigned long *res); +int __must_check _kstrtol(const char *s, unsigned int base, long *res); + +int __must_check kstrtoull(const char *s, unsigned int base, unsigned long long *res); +int __must_check kstrtoll(const char *s, unsigned int base, long long *res); + +/** + * kstrtoul - convert a string to an unsigned long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. +*/ +static inline int __must_check kstrtoul(const char *s, unsigned int base, unsigned long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(unsigned long, unsigned long long) = 0. + */ + if (sizeof(unsigned long) == sizeof(unsigned long long) && + __alignof__(unsigned long) == __alignof__(unsigned long long)) + return kstrtoull(s, base, (unsigned long long *)res); + else + return _kstrtoul(s, base, res); +} + +/** + * kstrtol - convert a string to a long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ +static inline int __must_check kstrtol(const char *s, unsigned int base, long *res) +{ + /* + * We want to shortcut function call, but + * __builtin_types_compatible_p(long, long long) = 0. + */ + if (sizeof(long) == sizeof(long long) && + __alignof__(long) == __alignof__(long long)) + return kstrtoll(s, base, (long long *)res); + else + return _kstrtol(s, base, res); +} + +int __must_check kstrtouint(const char *s, unsigned int base, unsigned int *res); +int __must_check kstrtoint(const char *s, unsigned int base, int *res); + +/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* USER_READABLE >= GROUP_READABLE >= OTHER_READABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 4) < (((perms) >> 3) & 4)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 4) < ((perms) & 4)) + \ + /* USER_WRITABLE >= GROUP_WRITABLE */ \ + BUILD_BUG_ON_ZERO((((perms) >> 6) & 2) < (((perms) >> 3) & 2)) + \ + /* OTHER_WRITABLE? Generally considered a bad idea. */ \ + BUILD_BUG_ON_ZERO((perms) & 2) + \ + (perms)) + +#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK) + +#endif diff --git a/include/linux/kobject.h b/include/linux/kobject.h new file mode 100644 index 0000000..2ec53f8 --- /dev/null +++ b/include/linux/kobject.h @@ -0,0 +1,142 @@ +/* + * kobject.h - generic kernel object infrastructure. + * + * Copyright (c) 2002-2003 Patrick Mochel + * Copyright (c) 2002-2003 Open Source Development Labs + * Copyright (c) 2006-2008 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (c) 2006-2008 Novell Inc. + * + * This file is released under the GPLv2. + * + * Please read Documentation/kobject.txt before using the kobject + * interface, ESPECIALLY the parts about reference counts and object + * destructors. + */ + +#ifndef _KOBJECT_H_ +#define _KOBJECT_H_ + +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/kref.h> +#include <linux/sysfs.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <linux/workqueue.h> + +struct kset; + +struct kobj_type { + void (*release)(struct kobject *kobj); + const struct sysfs_ops *sysfs_ops; + struct attribute **default_attrs; + const struct kobj_ns_type_operations *(*child_ns_type)(struct kobject *kobj); + const void *(*namespace)(struct kobject *kobj); +}; + +struct kobj_uevent_env { +}; + +struct kobj_attribute { + struct attribute attr; + ssize_t (*show)(struct kobject *kobj, struct kobj_attribute *attr, + char *buf); + ssize_t (*store)(struct kobject *kobj, struct kobj_attribute *attr, + const char *buf, size_t count); +}; + +struct kobject { + struct kobject *parent; + struct kset *kset; + struct kobj_type *ktype; + struct kernfs_node *sd; /* sysfs directory entry */ + struct kref kref; + unsigned int state_initialized:1; + unsigned int state_in_sysfs:1; + unsigned int state_add_uevent_sent:1; + unsigned int state_remove_uevent_sent:1; + unsigned int uevent_suppress:1; +}; + +struct kset { + struct kobject kobj; +}; + +static inline struct kobj_type *get_ktype(struct kobject *kobj) +{ + return kobj->ktype; +} + +#define kobject_add(...) 0 + +static inline void kobject_init(struct kobject *kobj, struct kobj_type *ktype) +{ + memset(kobj, 0, sizeof(*kobj)); + + kref_init(&kobj->kref); + kobj->ktype = ktype; + kobj->state_initialized = 1; +} + +static inline void kobject_del(struct kobject *kobj); + +static inline void kobject_cleanup(struct kobject *kobj) +{ + struct kobj_type *t = get_ktype(kobj); + + /* remove from sysfs if the caller did not do it */ + if (kobj->state_in_sysfs) + kobject_del(kobj); + + if (t && t->release) + t->release(kobj); +} + +static inline void kobject_release(struct kref *kref) +{ + struct kobject *kobj = container_of(kref, struct kobject, kref); + + kobject_cleanup(kobj); +} + +static inline void kobject_put(struct kobject *kobj) +{ + BUG_ON(!kobj); + BUG_ON(!kobj->state_initialized); + + kref_put(&kobj->kref, kobject_release); +} + +static inline void kobject_del(struct kobject *kobj) +{ + struct kernfs_node *sd; + + if (!kobj) + return; + + sd = kobj->sd; + kobj->state_in_sysfs = 0; +#if 0 + kobj_kset_leave(kobj); +#endif + kobject_put(kobj->parent); + kobj->parent = NULL; +} + +static inline struct kobject *kobject_get(struct kobject *kobj) +{ + BUG_ON(!kobj); + BUG_ON(!kobj->state_initialized); + + kref_get(&kobj->kref); + return kobj; +} + +static inline void kset_unregister(struct kset *kset) {} + +#define kset_create_and_add(_name, _u, _parent) \ + ((struct kset *) kzalloc(sizeof(struct kset), GFP_KERNEL)) + +#endif /* _KOBJECT_H_ */ diff --git a/include/linux/kref.h b/include/linux/kref.h new file mode 100644 index 0000000..e15828f --- /dev/null +++ b/include/linux/kref.h @@ -0,0 +1,138 @@ +/* + * kref.h - library routines for handling generic reference counted objects + * + * Copyright (C) 2004 Greg Kroah-Hartman <greg@kroah.com> + * Copyright (C) 2004 IBM Corp. + * + * based on kobject.h which was: + * Copyright (C) 2002-2003 Patrick Mochel <mochel@osdl.org> + * Copyright (C) 2002-2003 Open Source Development Labs + * + * This file is released under the GPLv2. + * + */ + +#ifndef _KREF_H_ +#define _KREF_H_ + +#include <linux/bug.h> +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/mutex.h> + +struct kref { + atomic_t refcount; +}; + +/** + * kref_init - initialize object. + * @kref: object in question. + */ +static inline void kref_init(struct kref *kref) +{ + atomic_set(&kref->refcount, 1); +} + +/** + * kref_get - increment refcount for object. + * @kref: object. + */ +static inline void kref_get(struct kref *kref) +{ + /* If refcount was 0 before incrementing then we have a race + * condition when this kref is freeing by some other thread right now. + * In this case one should use kref_get_unless_zero() + */ + WARN_ON_ONCE(atomic_inc_return(&kref->refcount) < 2); +} + +/** + * kref_sub - subtract a number of refcounts for object. + * @kref: object. + * @count: Number of recounts to subtract. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. If the caller does pass kfree to this + * function, you will be publicly mocked mercilessly by the kref + * maintainer, and anyone else who happens to notice it. You have + * been warned. + * + * Subtract @count from the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +static inline int kref_sub(struct kref *kref, unsigned int count, + void (*release)(struct kref *kref)) +{ + WARN_ON(release == NULL); + + if (atomic_sub_and_test((int) count, &kref->refcount)) { + release(kref); + return 1; + } + return 0; +} + +/** + * kref_put - decrement refcount for object. + * @kref: object. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. If the caller does pass kfree to this + * function, you will be publicly mocked mercilessly by the kref + * maintainer, and anyone else who happens to notice it. You have + * been warned. + * + * Decrement the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) +{ + return kref_sub(kref, 1, release); +} + +static inline int kref_put_mutex(struct kref *kref, + void (*release)(struct kref *kref), + struct mutex *lock) +{ + WARN_ON(release == NULL); + if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { + mutex_lock(lock); + if (unlikely(!atomic_dec_and_test(&kref->refcount))) { + mutex_unlock(lock); + return 0; + } + release(kref); + return 1; + } + return 0; +} + +/** + * kref_get_unless_zero - Increment refcount for object unless it is zero. + * @kref: object. + * + * Return non-zero if the increment succeeded. Otherwise return 0. + * + * This function is intended to simplify locking around refcounting for + * objects that can be looked up from a lookup structure, and which are + * removed from that lookup structure in the object destructor. + * Operations on such objects require at least a read lock around + * lookup + kref_get, and a write lock around kref_put + remove from lookup + * structure. Furthermore, RCU implementations become extremely tricky. + * With a lookup followed by a kref_get_unless_zero *with return value check* + * locking in the kref_put path can be deferred to the actual removal from + * the lookup structure and RCU lookups become trivial. + */ +static inline int __must_check kref_get_unless_zero(struct kref *kref) +{ + return atomic_add_unless(&kref->refcount, 1, 0); +} +#endif /* _KREF_H_ */ diff --git a/include/linux/kthread.h b/include/linux/kthread.h new file mode 100644 index 0000000..3a8cf10 --- /dev/null +++ b/include/linux/kthread.h @@ -0,0 +1,118 @@ +#ifndef _LINUX_KTHREAD_H +#define _LINUX_KTHREAD_H + +/* Simple interface for creating and stopping kernel threads without mess. */ +#include <linux/err.h> +#include <linux/lockdep.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +__printf(3, 4) +struct task_struct *kthread_create(int (*threadfn)(void *data), + void *data, + const char namefmt[], ...); + + +struct task_struct *kthread_create_on_cpu(int (*threadfn)(void *data), + void *data, + unsigned int cpu, + const char *namefmt); + +/** + * kthread_run - create and wake a thread. + * @threadfn: the function to run until signal_pending(current). + * @data: data ptr for @threadfn. + * @namefmt: printf-style name for the thread. + * + * Description: Convenient wrapper for kthread_create() followed by + * wake_up_process(). Returns the kthread or ERR_PTR(-ENOMEM). + */ +#define kthread_run(threadfn, data, namefmt, ...) \ +({ \ + struct task_struct *__k \ + = kthread_create(threadfn, data, namefmt, ## __VA_ARGS__); \ + if (!IS_ERR(__k)) \ + wake_up_process(__k); \ + __k; \ +}) + +int kthread_stop(struct task_struct *k); +bool kthread_should_stop(void); +bool kthread_should_park(void); +bool kthread_freezable_should_stop(bool *was_frozen); +void *kthread_data(struct task_struct *k); +void *probe_kthread_data(struct task_struct *k); +int kthread_park(struct task_struct *k); +void kthread_unpark(struct task_struct *k); +void kthread_parkme(void); + +int kthreadd(void *unused); +extern struct task_struct *kthreadd_task; +extern int tsk_fork_get_node(struct task_struct *tsk); + +/* + * Simple work processor based on kthread. + * + * This provides easier way to make use of kthreads. A kthread_work + * can be queued and flushed using queue/flush_kthread_work() + * respectively. Queued kthread_works are processed by a kthread + * running kthread_worker_fn(). + */ +struct kthread_work; +typedef void (*kthread_work_func_t)(struct kthread_work *work); + +struct kthread_worker { + spinlock_t lock; + struct list_head work_list; + struct task_struct *task; + struct kthread_work *current_work; +}; + +struct kthread_work { + struct list_head node; + kthread_work_func_t func; + struct kthread_worker *worker; +}; + +#define KTHREAD_WORKER_INIT(worker) { \ + .lock = __SPIN_LOCK_UNLOCKED((worker).lock), \ + .work_list = LIST_HEAD_INIT((worker).work_list), \ + } + +#define KTHREAD_WORK_INIT(work, fn) { \ + .node = LIST_HEAD_INIT((work).node), \ + .func = (fn), \ + } + +#define DEFINE_KTHREAD_WORKER(worker) \ + struct kthread_worker worker = KTHREAD_WORKER_INIT(worker) + +#define DEFINE_KTHREAD_WORK(work, fn) \ + struct kthread_work work = KTHREAD_WORK_INIT(work, fn) + +#define DEFINE_KTHREAD_WORKER_ONSTACK(worker) DEFINE_KTHREAD_WORKER(worker) + +extern void __init_kthread_worker(struct kthread_worker *worker, + const char *name, struct lock_class_key *key); + +#define init_kthread_worker(worker) \ + do { \ + static struct lock_class_key __key; \ + __init_kthread_worker((worker), "("#worker")->lock", &__key); \ + } while (0) + +#define init_kthread_work(work, fn) \ + do { \ + memset((work), 0, sizeof(struct kthread_work)); \ + INIT_LIST_HEAD(&(work)->node); \ + (work)->func = (fn); \ + } while (0) + +int kthread_worker_fn(void *worker_ptr); + +bool queue_kthread_work(struct kthread_worker *worker, + struct kthread_work *work); +void flush_kthread_work(struct kthread_work *work); +void flush_kthread_worker(struct kthread_worker *worker); + +#endif /* _LINUX_KTHREAD_H */ diff --git a/include/linux/lglock.h b/include/linux/lglock.h new file mode 100644 index 0000000..a9108bc --- /dev/null +++ b/include/linux/lglock.h @@ -0,0 +1,18 @@ +#ifndef __TOOLS_LINUX_LGLOCK_H +#define __TOOLS_LINUX_LGLOCK_H + +#include <pthread.h> + +struct lglock { + pthread_mutex_t lock; +}; + +#define lg_lock_free(l) do {} while (0) +#define lg_lock_init(l) pthread_mutex_init(&(l)->lock, NULL) + +#define lg_local_lock(l) pthread_mutex_lock(&(l)->lock) +#define lg_local_unlock(l) pthread_mutex_unlock(&(l)->lock) +#define lg_global_lock(l) pthread_mutex_lock(&(l)->lock) +#define lg_global_unlock(l) pthread_mutex_unlock(&(l)->lock) + +#endif /* __TOOLS_LINUX_LGLOCK_H */ diff --git a/include/linux/list.h b/include/linux/list.h new file mode 100644 index 0000000..1da4238 --- /dev/null +++ b/include/linux/list.h @@ -0,0 +1,771 @@ +#ifndef __TOOLS_LINUX_LIST_H +#define __TOOLS_LINUX_LIST_H + +#include <linux/types.h> +#include <linux/poison.h> +#include <linux/kernel.h> +#include <linux/compiler.h> + +/* + * Simple doubly linked list implementation. + * + * Some of the internal functions ("__xxx") are useful when + * manipulating whole lists rather than single entries, as + * sometimes we already know the next/prev entries and we can + * generate better code by using them directly rather than + * using the generic single-entry routines. + */ + +#define LIST_HEAD_INIT(name) { &(name), &(name) } + +#define LIST_HEAD(name) \ + struct list_head name = LIST_HEAD_INIT(name) + +static inline void INIT_LIST_HEAD(struct list_head *list) +{ + list->next = list; + list->prev = list; +} + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +#ifndef CONFIG_DEBUG_LIST +static inline void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next) +{ + next->prev = new; + new->next = next; + new->prev = prev; + prev->next = new; +} +#else +extern void __list_add(struct list_head *new, + struct list_head *prev, + struct list_head *next); +#endif + +/** + * list_add - add a new entry + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + */ +static inline void list_add(struct list_head *new, struct list_head *head) +{ + __list_add(new, head, head->next); +} + + +/** + * list_add_tail - add a new entry + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + */ +static inline void list_add_tail(struct list_head *new, struct list_head *head) +{ + __list_add(new, head->prev, head); +} + +/* + * Delete a list entry by making the prev/next entries + * point to each other. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +static inline void __list_del(struct list_head * prev, struct list_head * next) +{ + next->prev = prev; + WRITE_ONCE(prev->next, next); +} + +/** + * list_del - deletes entry from list. + * @entry: the element to delete from the list. + * Note: list_empty() on entry does not return true after this, the entry is + * in an undefined state. + */ +#ifndef CONFIG_DEBUG_LIST +static inline void __list_del_entry(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); +} + +static inline void list_del(struct list_head *entry) +{ + __list_del(entry->prev, entry->next); + entry->next = LIST_POISON1; + entry->prev = LIST_POISON2; +} +#else +extern void __list_del_entry(struct list_head *entry); +extern void list_del(struct list_head *entry); +#endif + +/** + * list_replace - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * If @old was empty, it will be overwritten. + */ +static inline void list_replace(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +static inline void list_replace_init(struct list_head *old, + struct list_head *new) +{ + list_replace(old, new); + INIT_LIST_HEAD(old); +} + +/** + * list_del_init - deletes entry from list and reinitialize it. + * @entry: the element to delete from the list. + */ +static inline void list_del_init(struct list_head *entry) +{ + __list_del_entry(entry); + INIT_LIST_HEAD(entry); +} + +/** + * list_move - delete from one list and add as another's head + * @list: the entry to move + * @head: the head that will precede our entry + */ +static inline void list_move(struct list_head *list, struct list_head *head) +{ + __list_del_entry(list); + list_add(list, head); +} + +/** + * list_move_tail - delete from one list and add as another's tail + * @list: the entry to move + * @head: the head that will follow our entry + */ +static inline void list_move_tail(struct list_head *list, + struct list_head *head) +{ + __list_del_entry(list); + list_add_tail(list, head); +} + +/** + * list_is_last - tests whether @list is the last entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_last(const struct list_head *list, + const struct list_head *head) +{ + return list->next == head; +} + +/** + * list_empty - tests whether a list is empty + * @head: the list to test. + */ +static inline int list_empty(const struct list_head *head) +{ + return head->next == head; +} + +/** + * list_empty_careful - tests whether a list is empty and not being modified + * @head: the list to test + * + * Description: + * tests whether a list is empty _and_ checks that no other CPU might be + * in the process of modifying either member (next or prev) + * + * NOTE: using list_empty_careful() without synchronization + * can only be safe if the only activity that can happen + * to the list entry is list_del_init(). Eg. it cannot be used + * if another CPU could re-list_add() it. + */ +static inline int list_empty_careful(const struct list_head *head) +{ + struct list_head *next = head->next; + return (next == head) && (next == head->prev); +} + +/** + * list_rotate_left - rotate the list to the left + * @head: the head of the list + */ +static inline void list_rotate_left(struct list_head *head) +{ + struct list_head *first; + + if (!list_empty(head)) { + first = head->next; + list_move_tail(first, head); + } +} + +/** + * list_is_singular - tests whether a list has just one entry. + * @head: the list to test. + */ +static inline int list_is_singular(const struct list_head *head) +{ + return !list_empty(head) && (head->next == head->prev); +} + +static inline void __list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + struct list_head *new_first = entry->next; + list->next = head->next; + list->next->prev = list; + list->prev = entry; + entry->next = list; + head->next = new_first; + new_first->prev = head; +} + +/** + * list_cut_position - cut a list into two + * @list: a new list to add all removed entries + * @head: a list with entries + * @entry: an entry within head, could be the head itself + * and if so we won't cut the list + * + * This helper moves the initial part of @head, up to and + * including @entry, from @head to @list. You should + * pass on @entry an element you know is on @head. @list + * should be an empty list or a list you do not care about + * losing its data. + * + */ +static inline void list_cut_position(struct list_head *list, + struct list_head *head, struct list_head *entry) +{ + if (list_empty(head)) + return; + if (list_is_singular(head) && + (head->next != entry && head != entry)) + return; + if (entry == head) + INIT_LIST_HEAD(list); + else + __list_cut_position(list, head, entry); +} + +static inline void __list_splice(const struct list_head *list, + struct list_head *prev, + struct list_head *next) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + first->prev = prev; + prev->next = first; + + last->next = next; + next->prev = last; +} + +/** + * list_splice - join two lists, this is designed for stacks + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice(const struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head, head->next); +} + +/** + * list_splice_tail - join two lists, each list being a queue + * @list: the new list to add. + * @head: the place to add it in the first list. + */ +static inline void list_splice_tail(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) + __list_splice(list, head->prev, head); +} + +/** + * list_splice_init - join two lists and reinitialise the emptied list. + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * The list at @list is reinitialised + */ +static inline void list_splice_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head, head->next); + INIT_LIST_HEAD(list); + } +} + +/** + * list_splice_tail_init - join two lists and reinitialise the emptied list + * @list: the new list to add. + * @head: the place to add it in the first list. + * + * Each of the lists is a queue. + * The list at @list is reinitialised + */ +static inline void list_splice_tail_init(struct list_head *list, + struct list_head *head) +{ + if (!list_empty(list)) { + __list_splice(list, head->prev, head); + INIT_LIST_HEAD(list); + } +} + +/** + * list_entry - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + */ +#define list_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * list_first_entry - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_first_entry(ptr, type, member) \ + list_entry((ptr)->next, type, member) + +/** + * list_last_entry - get the last element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note, that list is expected to be not empty. + */ +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +/** + * list_first_entry_or_null - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + */ +#define list_first_entry_or_null(ptr, type, member) \ + (!list_empty(ptr) ? list_first_entry(ptr, type, member) : NULL) + +/** + * list_next_entry - get the next element in list + * @pos: the type * to cursor + * @member: the name of the list_head within the struct. + */ +#define list_next_entry(pos, member) \ + list_entry((pos)->member.next, typeof(*(pos)), member) + +/** + * list_prev_entry - get the prev element in list + * @pos: the type * to cursor + * @member: the name of the list_head within the struct. + */ +#define list_prev_entry(pos, member) \ + list_entry((pos)->member.prev, typeof(*(pos)), member) + +/** + * list_for_each - iterate over a list + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + +/** + * list_for_each_prev - iterate over a list backwards + * @pos: the &struct list_head to use as a loop cursor. + * @head: the head for your list. + */ +#define list_for_each_prev(pos, head) \ + for (pos = (head)->prev; pos != (head); pos = pos->prev) + +/** + * list_for_each_safe - iterate over a list safe against removal of list entry + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) + +/** + * list_for_each_prev_safe - iterate over a list backwards safe against removal of list entry + * @pos: the &struct list_head to use as a loop cursor. + * @n: another &struct list_head to use as temporary storage + * @head: the head for your list. + */ +#define list_for_each_prev_safe(pos, n, head) \ + for (pos = (head)->prev, n = pos->prev; \ + pos != (head); \ + pos = n, n = pos->prev) + +/** + * list_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_for_each_entry(pos, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_next_entry(pos, member)) + +/** + * list_for_each_entry_reverse - iterate backwards over list of given type. + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_for_each_entry_reverse(pos, head, member) \ + for (pos = list_last_entry(head, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_prev_entry(pos, member)) + +/** + * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue() + * @pos: the type * to use as a start point + * @head: the head of the list + * @member: the name of the list_head within the struct. + * + * Prepares a pos entry for use as a start point in list_for_each_entry_continue(). + */ +#define list_prepare_entry(pos, head, member) \ + ((pos) ? : list_entry(head, typeof(*pos), member)) + +/** + * list_for_each_entry_continue - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_for_each_entry_continue(pos, head, member) \ + for (pos = list_next_entry(pos, member); \ + &pos->member != (head); \ + pos = list_next_entry(pos, member)) + +/** + * list_for_each_entry_continue_reverse - iterate backwards from the given point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Start to iterate over list of given type backwards, continuing after + * the current position. + */ +#define list_for_each_entry_continue_reverse(pos, head, member) \ + for (pos = list_prev_entry(pos, member); \ + &pos->member != (head); \ + pos = list_prev_entry(pos, member)) + +/** + * list_for_each_entry_from - iterate over list of given type from the current point + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type, continuing from current position. + */ +#define list_for_each_entry_from(pos, head, member) \ + for (; &pos->member != (head); \ + pos = list_next_entry(pos, member)) + +/** + * list_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + */ +#define list_for_each_entry_safe(pos, n, head, member) \ + for (pos = list_first_entry(head, typeof(*pos), member), \ + n = list_next_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_next_entry(n, member)) + +/** + * list_for_each_entry_safe_continue - continue list iteration safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type, continuing after current point, + * safe against removal of list entry. + */ +#define list_for_each_entry_safe_continue(pos, n, head, member) \ + for (pos = list_next_entry(pos, member), \ + n = list_next_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_next_entry(n, member)) + +/** + * list_for_each_entry_safe_from - iterate over list from current point safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate over list of given type from current point, safe against + * removal of list entry. + */ +#define list_for_each_entry_safe_from(pos, n, head, member) \ + for (n = list_next_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_next_entry(n, member)) + +/** + * list_for_each_entry_safe_reverse - iterate backwards over list safe against removal + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Iterate backwards over list of given type, safe against removal + * of list entry. + */ +#define list_for_each_entry_safe_reverse(pos, n, head, member) \ + for (pos = list_last_entry(head, typeof(*pos), member), \ + n = list_prev_entry(pos, member); \ + &pos->member != (head); \ + pos = n, n = list_prev_entry(n, member)) + +/** + * list_safe_reset_next - reset a stale list_for_each_entry_safe loop + * @pos: the loop cursor used in the list_for_each_entry_safe loop + * @n: temporary storage used in list_for_each_entry_safe + * @member: the name of the list_head within the struct. + * + * list_safe_reset_next is not safe to use in general if the list may be + * modified concurrently (eg. the lock is dropped in the loop body). An + * exception to this is if the cursor element (pos) is pinned in the list, + * and list_safe_reset_next is called after re-taking the lock and before + * completing the current iteration of the loop body. + */ +#define list_safe_reset_next(pos, n, member) \ + n = list_next_entry(pos, member) + +/* + * Double linked lists with a single pointer list head. + * Mostly useful for hash tables where the two pointer list head is + * too wasteful. + * You lose the ability to access the tail in O(1). + */ + +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +static inline int hlist_unhashed(const struct hlist_node *h) +{ + return !h->pprev; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + + WRITE_ONCE(*pprev, next); + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = LIST_POISON1; + n->pprev = LIST_POISON2; +} + +static inline void hlist_del_init(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + INIT_HLIST_NODE(n); + } +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +/* next must be != NULL */ +static inline void hlist_add_before(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + next->pprev = &n->next; + *(n->pprev) = n; +} + +static inline void hlist_add_behind(struct hlist_node *n, + struct hlist_node *prev) +{ + n->next = prev->next; + prev->next = n; + n->pprev = &prev->next; + + if (n->next) + n->next->pprev = &n->next; +} + +/* after that we'll appear to be on some hlist and hlist_del will work */ +static inline void hlist_add_fake(struct hlist_node *n) +{ + n->pprev = &n->next; +} + +static inline bool hlist_fake(struct hlist_node *h) +{ + return h->pprev == &h->next; +} + +/* + * Move a list from one list head to another. Fixup the pprev + * reference of the first entry if it exists. + */ +static inline void hlist_move_list(struct hlist_head *old, + struct hlist_head *new) +{ + new->first = old->first; + if (new->first) + new->first->pprev = &new->first; + old->first = NULL; +} + +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each(pos, head) \ + for (pos = (head)->first; pos ; pos = pos->next) + +#define hlist_for_each_safe(pos, n, head) \ + for (pos = (head)->first; pos && ({ n = pos->next; 1; }); \ + pos = n) + +#define hlist_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? hlist_entry(____ptr, type, member) : NULL; \ + }) + +/** + * hlist_for_each_entry - iterate over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_continue - iterate over a hlist continuing after current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue(pos, member) \ + for (pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member);\ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_from - iterate over a hlist continuing from current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another &struct hlist_node to use as temporary storage + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member);\ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) + +/** + * list_del_range - deletes range of entries from list. + * @begin: first element in the range to delete from the list. + * @end: last element in the range to delete from the list. + * Note: list_empty on the range of entries does not return true after this, + * the entries is in an undefined state. + */ +static inline void list_del_range(struct list_head *begin, + struct list_head *end) +{ + begin->prev->next = end->next; + end->next->prev = begin->prev; +} + +/** + * list_for_each_from - iterate over a list from one of its nodes + * @pos: the &struct list_head to use as a loop cursor, from where to start + * @head: the head for your list. + */ +#define list_for_each_from(pos, head) \ + for (; pos != (head); pos = pos->next) + +#endif /* __TOOLS_LINUX_LIST_H */ diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h new file mode 100644 index 0000000..b01fe10 --- /dev/null +++ b/include/linux/list_nulls.h @@ -0,0 +1,117 @@ +#ifndef _LINUX_LIST_NULLS_H +#define _LINUX_LIST_NULLS_H + +#include <linux/poison.h> +#include <linux/const.h> + +/* + * Special version of lists, where end of list is not a NULL pointer, + * but a 'nulls' marker, which can have many different values. + * (up to 2^31 different values guaranteed on all platforms) + * + * In the standard hlist, termination of a list is the NULL pointer. + * In this special 'nulls' variant, we use the fact that objects stored in + * a list are aligned on a word (4 or 8 bytes alignment). + * We therefore use the last significant bit of 'ptr' : + * Set to 1 : This is a 'nulls' end-of-list marker (ptr >> 1) + * Set to 0 : This is a pointer to some object (ptr) + */ + +struct hlist_nulls_head { + struct hlist_nulls_node *first; +}; + +struct hlist_nulls_node { + struct hlist_nulls_node *next, **pprev; +}; +#define NULLS_MARKER(value) (1UL | (((long)value) << 1)) +#define INIT_HLIST_NULLS_HEAD(ptr, nulls) \ + ((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls)) + +#define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member) +/** + * ptr_is_a_nulls - Test if a ptr is a nulls + * @ptr: ptr to be tested + * + */ +static inline int is_a_nulls(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr & 1); +} + +/** + * get_nulls_value - Get the 'nulls' value of the end of chain + * @ptr: end of chain + * + * Should be called only if is_a_nulls(ptr); + */ +static inline unsigned long get_nulls_value(const struct hlist_nulls_node *ptr) +{ + return ((unsigned long)ptr) >> 1; +} + +static inline int hlist_nulls_unhashed(const struct hlist_nulls_node *h) +{ + return !h->pprev; +} + +static inline int hlist_nulls_empty(const struct hlist_nulls_head *h) +{ + return is_a_nulls(READ_ONCE(h->first)); +} + +static inline void hlist_nulls_add_head(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + h->first = n; + if (!is_a_nulls(first)) + first->pprev = &n->next; +} + +static inline void __hlist_nulls_del(struct hlist_nulls_node *n) +{ + struct hlist_nulls_node *next = n->next; + struct hlist_nulls_node **pprev = n->pprev; + + WRITE_ONCE(*pprev, next); + if (!is_a_nulls(next)) + next->pprev = pprev; +} + +static inline void hlist_nulls_del(struct hlist_nulls_node *n) +{ + __hlist_nulls_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_nulls_for_each_entry - iterate over list of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +/** + * hlist_nulls_for_each_entry_from - iterate over a hlist continuing from current point + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct hlist_node to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + * + */ +#define hlist_nulls_for_each_entry_from(tpos, pos, member) \ + for (; (!is_a_nulls(pos)) && \ + ({ tpos = hlist_nulls_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#endif diff --git a/include/linux/llist.h b/include/linux/llist.h new file mode 100644 index 0000000..8abc2e0 --- /dev/null +++ b/include/linux/llist.h @@ -0,0 +1,201 @@ +#ifndef __TOOLS_LINUX_LLIST_H +#define __TOOLS_LINUX_LLIST_H + +/* + * Lock-less NULL terminated single linked list + * + * If there are multiple producers and multiple consumers, llist_add + * can be used in producers and llist_del_all can be used in + * consumers. They can work simultaneously without lock. But + * llist_del_first can not be used here. Because llist_del_first + * depends on list->first->next does not changed if list->first is not + * changed during its operation, but llist_del_first, llist_add, + * llist_add (or llist_del_all, llist_add, llist_add) sequence in + * another consumer may violate that. + * + * If there are multiple producers and one consumer, llist_add can be + * used in producers and llist_del_all or llist_del_first can be used + * in the consumer. + * + * This can be summarized as follow: + * + * | add | del_first | del_all + * add | - | - | - + * del_first | | L | L + * del_all | | | - + * + * Where "-" stands for no lock is needed, while "L" stands for lock + * is needed. + * + * The list entries deleted via llist_del_all can be traversed with + * traversing function such as llist_for_each etc. But the list + * entries can not be traversed safely before deleted from the list. + * The order of deleted entries is from the newest to the oldest added + * one. If you want to traverse from the oldest to the newest, you + * must reverse the order by yourself before traversing. + * + * The basic atomic operation of this list is cmpxchg on long. On + * architectures that don't have NMI-safe cmpxchg implementation, the + * list can NOT be used in NMI handlers. So code that uses the list in + * an NMI handler should depend on CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG. + * + * Copyright 2010,2011 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/atomic.h> +#include <linux/kernel.h> + +struct llist_head { + struct llist_node *first; +}; + +struct llist_node { + struct llist_node *next; +}; + +#define LLIST_HEAD_INIT(name) { NULL } +#define LLIST_HEAD(name) struct llist_head name = LLIST_HEAD_INIT(name) + +/** + * init_llist_head - initialize lock-less list head + * @head: the head for your lock-less list + */ +static inline void init_llist_head(struct llist_head *list) +{ + list->first = NULL; +} + +/** + * llist_entry - get the struct of this entry + * @ptr: the &struct llist_node pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the llist_node within the struct. + */ +#define llist_entry(ptr, type, member) \ + container_of(ptr, type, member) + +/** + * llist_for_each - iterate over some deleted entries of a lock-less list + * @pos: the &struct llist_node to use as a loop cursor + * @node: the first entry of deleted list entries + * + * In general, some entries of the lock-less list can be traversed + * safely only after being deleted from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each(pos, node) \ + for ((pos) = (node); pos; (pos) = (pos)->next) + +/** + * llist_for_each_entry - iterate over some deleted entries of lock-less list of given type + * @pos: the type * to use as a loop cursor. + * @node: the fist entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry(pos, node, member) \ + for ((pos) = llist_entry((node), typeof(*(pos)), member); \ + &(pos)->member != NULL; \ + (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) + +/** + * llist_for_each_entry_safe - iterate over some deleted entries of lock-less list of given type + * safe against removal of list entry + * @pos: the type * to use as a loop cursor. + * @n: another type * to use as temporary storage + * @node: the first entry of deleted list entries. + * @member: the name of the llist_node with the struct. + * + * In general, some entries of the lock-less list can be traversed + * safely only after being removed from list, so start with an entry + * instead of list head. + * + * If being used on entries deleted from lock-less list directly, the + * traverse order is from the newest to the oldest added entry. If + * you want to traverse from the oldest to the newest, you must + * reverse the order by yourself before traversing. + */ +#define llist_for_each_entry_safe(pos, n, node, member) \ + for (pos = llist_entry((node), typeof(*pos), member); \ + &pos->member != NULL && \ + (n = llist_entry(pos->member.next, typeof(*n), member), true); \ + pos = n) + +/** + * llist_empty - tests whether a lock-less list is empty + * @head: the list to test + * + * Not guaranteed to be accurate or up to date. Just a quick way to + * test whether the list is empty without deleting something from the + * list. + */ +static inline bool llist_empty(const struct llist_head *head) +{ + return ACCESS_ONCE(head->first) == NULL; +} + +static inline struct llist_node *llist_next(struct llist_node *node) +{ + return node->next; +} + +extern bool llist_add_batch(struct llist_node *new_first, + struct llist_node *new_last, + struct llist_head *head); +/** + * llist_add - add a new entry + * @new: new entry to be added + * @head: the head for your lock-less list + * + * Returns true if the list was empty prior to adding this entry. + */ +static inline bool llist_add(struct llist_node *new, struct llist_head *head) +{ + return llist_add_batch(new, new, head); +} + +/** + * llist_del_all - delete all entries from lock-less list + * @head: the head of lock-less list to delete all entries + * + * If list is empty, return NULL, otherwise, delete all entries and + * return the pointer to the first entry. The order of entries + * deleted is from the newest to the oldest added one. + */ +static inline struct llist_node *llist_del_all(struct llist_head *head) +{ + return xchg(&head->first, NULL); +} + +extern struct llist_node *llist_del_first(struct llist_head *head); + +struct llist_node *llist_reverse_order(struct llist_node *head); + +#endif /* __TOOLS_LINUX_LLIST_H */ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h new file mode 100644 index 0000000..d95d8da --- /dev/null +++ b/include/linux/lockdep.h @@ -0,0 +1,55 @@ +#ifndef __TOOLS_LINUX_LOCKDEP_H +#define __TOOLS_LINUX_LOCKDEP_H + +struct lock_class_key {}; +struct task_struct; + +# define lock_acquire(l, s, t, r, c, n, i) do { } while (0) +# define lock_release(l, n, i) do { } while (0) +# define lock_set_class(l, n, k, s, i) do { } while (0) +# define lock_set_subclass(l, s, i) do { } while (0) +# define lockdep_set_current_reclaim_state(g) do { } while (0) +# define lockdep_clear_current_reclaim_state() do { } while (0) +# define lockdep_trace_alloc(g) do { } while (0) +# define lockdep_info() do { } while (0) +# define lockdep_init_map(lock, name, key, sub) \ + do { (void)(name); (void)(key); } while (0) +# define lockdep_set_class(lock, key) do { (void)(key); } while (0) +# define lockdep_set_class_and_name(lock, key, name) \ + do { (void)(key); (void)(name); } while (0) +#define lockdep_set_class_and_subclass(lock, key, sub) \ + do { (void)(key); } while (0) +#define lockdep_set_subclass(lock, sub) do { } while (0) + +#define lockdep_set_novalidate_class(lock) do { } while (0) + +#define lockdep_assert_held(l) do { (void)(l); } while (0) +#define lockdep_assert_held_once(l) do { (void)(l); } while (0) + +#define lock_acquire_shared(l, s, t, n, i) + +#define lockdep_acquire_shared(lock) + +#define lock_contended(lockdep_map, ip) do {} while (0) +#define lock_acquired(lockdep_map, ip) do {} while (0) + +static inline void debug_show_all_locks(void) +{ +} + +static inline void debug_show_held_locks(struct task_struct *task) +{ +} + +static inline void +debug_check_no_locks_freed(const void *from, unsigned long len) +{ +} + +static inline void +debug_check_no_locks_held(void) +{ +} + +#endif /* __TOOLS_LINUX_LOCKDEP_H */ + diff --git a/include/linux/log2.h b/include/linux/log2.h new file mode 100644 index 0000000..395cda2 --- /dev/null +++ b/include/linux/log2.h @@ -0,0 +1,187 @@ +/* Integer base 2 logarithm calculation + * + * Copyright (C) 2006 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _TOOLS_LINUX_LOG2_H +#define _TOOLS_LINUX_LOG2_H + +#include <linux/bitops.h> + +/* + * deal with unrepresentable constant logarithms + */ +extern __attribute__((const, noreturn)) +int ____ilog2_NaN(void); + +/* + * non-constant log of base 2 calculators + * - the arch may override these in asm/bitops.h if they can be implemented + * more efficiently than using fls() and fls64() + * - the arch is not required to handle n==0 if implementing the fallback + */ +static inline __attribute__((const)) +int __ilog2_u32(u32 n) +{ + return fls(n) - 1; +} + +static inline __attribute__((const)) +int __ilog2_u64(u64 n) +{ + return fls64(n) - 1; +} + +/* + * Determine whether some value is a power of two, where zero is + * *not* considered a power of two. + */ + +static inline __attribute__((const)) +bool is_power_of_2(unsigned long n) +{ + return (n != 0 && ((n & (n - 1)) == 0)); +} + +/* + * round up to nearest power of two + */ +static inline __attribute__((const)) +unsigned long __roundup_pow_of_two(unsigned long n) +{ + return 1UL << fls_long(n - 1); +} + +/* + * round down to nearest power of two + */ +static inline __attribute__((const)) +unsigned long __rounddown_pow_of_two(unsigned long n) +{ + return 1UL << (fls_long(n) - 1); +} + +/** + * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value + * @n - parameter + * + * constant-capable log of base 2 calculation + * - this can be used to initialise global variables from constant data, hence + * the massive ternary operator construction + * + * selects the appropriately-sized optimised version depending on sizeof(n) + */ +#define ilog2(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n) < 1 ? ____ilog2_NaN() : \ + (n) & (1ULL << 63) ? 63 : \ + (n) & (1ULL << 62) ? 62 : \ + (n) & (1ULL << 61) ? 61 : \ + (n) & (1ULL << 60) ? 60 : \ + (n) & (1ULL << 59) ? 59 : \ + (n) & (1ULL << 58) ? 58 : \ + (n) & (1ULL << 57) ? 57 : \ + (n) & (1ULL << 56) ? 56 : \ + (n) & (1ULL << 55) ? 55 : \ + (n) & (1ULL << 54) ? 54 : \ + (n) & (1ULL << 53) ? 53 : \ + (n) & (1ULL << 52) ? 52 : \ + (n) & (1ULL << 51) ? 51 : \ + (n) & (1ULL << 50) ? 50 : \ + (n) & (1ULL << 49) ? 49 : \ + (n) & (1ULL << 48) ? 48 : \ + (n) & (1ULL << 47) ? 47 : \ + (n) & (1ULL << 46) ? 46 : \ + (n) & (1ULL << 45) ? 45 : \ + (n) & (1ULL << 44) ? 44 : \ + (n) & (1ULL << 43) ? 43 : \ + (n) & (1ULL << 42) ? 42 : \ + (n) & (1ULL << 41) ? 41 : \ + (n) & (1ULL << 40) ? 40 : \ + (n) & (1ULL << 39) ? 39 : \ + (n) & (1ULL << 38) ? 38 : \ + (n) & (1ULL << 37) ? 37 : \ + (n) & (1ULL << 36) ? 36 : \ + (n) & (1ULL << 35) ? 35 : \ + (n) & (1ULL << 34) ? 34 : \ + (n) & (1ULL << 33) ? 33 : \ + (n) & (1ULL << 32) ? 32 : \ + (n) & (1ULL << 31) ? 31 : \ + (n) & (1ULL << 30) ? 30 : \ + (n) & (1ULL << 29) ? 29 : \ + (n) & (1ULL << 28) ? 28 : \ + (n) & (1ULL << 27) ? 27 : \ + (n) & (1ULL << 26) ? 26 : \ + (n) & (1ULL << 25) ? 25 : \ + (n) & (1ULL << 24) ? 24 : \ + (n) & (1ULL << 23) ? 23 : \ + (n) & (1ULL << 22) ? 22 : \ + (n) & (1ULL << 21) ? 21 : \ + (n) & (1ULL << 20) ? 20 : \ + (n) & (1ULL << 19) ? 19 : \ + (n) & (1ULL << 18) ? 18 : \ + (n) & (1ULL << 17) ? 17 : \ + (n) & (1ULL << 16) ? 16 : \ + (n) & (1ULL << 15) ? 15 : \ + (n) & (1ULL << 14) ? 14 : \ + (n) & (1ULL << 13) ? 13 : \ + (n) & (1ULL << 12) ? 12 : \ + (n) & (1ULL << 11) ? 11 : \ + (n) & (1ULL << 10) ? 10 : \ + (n) & (1ULL << 9) ? 9 : \ + (n) & (1ULL << 8) ? 8 : \ + (n) & (1ULL << 7) ? 7 : \ + (n) & (1ULL << 6) ? 6 : \ + (n) & (1ULL << 5) ? 5 : \ + (n) & (1ULL << 4) ? 4 : \ + (n) & (1ULL << 3) ? 3 : \ + (n) & (1ULL << 2) ? 2 : \ + (n) & (1ULL << 1) ? 1 : \ + (n) & (1ULL << 0) ? 0 : \ + ____ilog2_NaN() \ + ) : \ + (sizeof(n) <= 4) ? \ + __ilog2_u32(n) : \ + __ilog2_u64(n) \ + ) + +/** + * roundup_pow_of_two - round the given value up to nearest power of two + * @n - parameter + * + * round the given value up to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define roundup_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (n == 1) ? 1 : \ + (1UL << (ilog2((n) - 1) + 1)) \ + ) : \ + __roundup_pow_of_two(n) \ + ) + +/** + * rounddown_pow_of_two - round the given value down to nearest power of two + * @n - parameter + * + * round the given value down to the nearest power of two + * - the result is undefined when n == 0 + * - this can be used to initialise global variables from constant data + */ +#define rounddown_pow_of_two(n) \ +( \ + __builtin_constant_p(n) ? ( \ + (1UL << ilog2(n))) : \ + __rounddown_pow_of_two(n) \ + ) + +#endif /* _TOOLS_LINUX_LOG2_H */ diff --git a/include/linux/lz4.h b/include/linux/lz4.h new file mode 100644 index 0000000..6b784c5 --- /dev/null +++ b/include/linux/lz4.h @@ -0,0 +1,87 @@ +#ifndef __LZ4_H__ +#define __LZ4_H__ +/* + * LZ4 Kernel Interface + * + * Copyright (C) 2013, LG Electronics, Kyungsik Lee <kyungsik.lee@lge.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define LZ4_MEM_COMPRESS (16384) +#define LZ4HC_MEM_COMPRESS (262144 + (2 * sizeof(unsigned char *))) + +/* + * lz4_compressbound() + * Provides the maximum size that LZ4 may output in a "worst case" scenario + * (input data not compressible) + */ +static inline size_t lz4_compressbound(size_t isize) +{ + return isize + (isize / 255) + 16; +} + +/* + * lz4_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + + /* + * lz4hc_compress() + * src : source address of the original data + * src_len : size of the original data + * dst : output buffer address of the compressed data + * This requires 'dst' of size LZ4_COMPRESSBOUND. + * dst_len : is the output size, which is returned after compress done + * workmem : address of the working memory. + * This requires 'workmem' of size LZ4HC_MEM_COMPRESS. + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer and workmem must be already allocated with + * the defined size. + */ +int lz4hc_compress(const unsigned char *src, size_t src_len, + unsigned char *dst, size_t *dst_len, void *wrkmem); + +/* + * lz4_decompress() + * src : source address of the compressed data + * src_len : is the input size, whcih is returned after decompress done + * dest : output buffer address of the decompressed data + * actual_dest_len: is the size of uncompressed data, supposing it's known + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + * slightly faster than lz4_decompress_unknownoutputsize() + */ +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len); + +/* + * lz4_decompress_unknownoutputsize() + * src : source address of the compressed data + * src_len : is the input size, therefore the compressed size + * dest : output buffer address of the decompressed data + * dest_len: is the max size of the destination buffer, which is + * returned with actual size of decompressed data after + * decompress done + * return : Success if return 0 + * Error if return (< 0) + * note : Destination buffer must be already allocated. + */ +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len); +#endif diff --git a/include/linux/math64.h b/include/linux/math64.h new file mode 100644 index 0000000..5eb6f06 --- /dev/null +++ b/include/linux/math64.h @@ -0,0 +1,85 @@ +#ifndef _LINUX_MATH64_H +#define _LINUX_MATH64_H + +#include <linux/types.h> + +#define do_div(n,base) ({ \ + u32 __base = (base); \ + u32 __rem; \ + __rem = ((u64)(n)) % __base; \ + (n) = ((u64)(n)) / __base; \ + __rem; \ + }) + +#define div64_long(x, y) div64_s64((x), (y)) +#define div64_ul(x, y) div64_u64((x), (y)) + +/** + * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder + * + * This is commonly provided by 32bit archs to provide an optimized 64bit + * divide. + */ +static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div_s64_rem - signed 64bit divide with 32bit divisor with remainder + */ +static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + */ +static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + *remainder = dividend % divisor; + return dividend / divisor; +} + +/** + * div64_u64 - unsigned 64bit divide with 64bit divisor + */ +static inline u64 div64_u64(u64 dividend, u64 divisor) +{ + return dividend / divisor; +} + +/** + * div64_s64 - signed 64bit divide with 64bit divisor + */ +static inline s64 div64_s64(s64 dividend, s64 divisor) +{ + return dividend / divisor; +} + +/** + * div_u64 - unsigned 64bit divide with 32bit divisor + * + * This is the most common 64bit divide and should be used if possible, + * as many 32bit archs can optimize this variant better than a full 64bit + * divide. + */ +static inline u64 div_u64(u64 dividend, u32 divisor) +{ + u32 remainder; + return div_u64_rem(dividend, divisor, &remainder); +} + +/** + * div_s64 - signed 64bit divide with 32bit divisor + */ +static inline s64 div_s64(s64 dividend, s32 divisor) +{ + s32 remainder; + return div_s64_rem(dividend, divisor, &remainder); +} + +#endif /* _LINUX_MATH64_H */ diff --git a/include/linux/mempool.h b/include/linux/mempool.h new file mode 100644 index 0000000..c2789f9 --- /dev/null +++ b/include/linux/mempool.h @@ -0,0 +1,78 @@ +/* + * memory buffer pool support + */ +#ifndef _LINUX_MEMPOOL_H +#define _LINUX_MEMPOOL_H + +#include <linux/compiler.h> +#include <linux/bug.h> +#include <linux/slab.h> + +struct kmem_cache; + +typedef struct mempool_s { + size_t elem_size; +} mempool_t; + +extern int mempool_resize(mempool_t *pool, int new_min_nr); + +static inline void mempool_free(void *element, mempool_t *pool) +{ + free(element); +} + +static inline void *mempool_alloc(mempool_t *pool, gfp_t gfp_mask) __malloc +{ + BUG_ON(!pool->elem_size); + return kmalloc(pool->elem_size, gfp_mask); +} + +static inline void mempool_exit(mempool_t *pool) {} + +static inline void mempool_destroy(mempool_t *pool) +{ + free(pool); +} + +static inline int +mempool_init_slab_pool(mempool_t *pool, int min_nr, struct kmem_cache *kc) +{ + pool->elem_size = 0; + return 0; +} + +static inline mempool_t * +mempool_create_slab_pool(int min_nr, struct kmem_cache *kc) +{ + mempool_t *pool = malloc(sizeof(*pool)); + pool->elem_size = 0; + return pool; +} + +static inline int mempool_init_kmalloc_pool(mempool_t *pool, int min_nr, size_t size) +{ + pool->elem_size = size; + return 0; +} + +static inline mempool_t *mempool_create_kmalloc_pool(int min_nr, size_t size) +{ + mempool_t *pool = malloc(sizeof(*pool)); + pool->elem_size = size; + return pool; +} + +static inline int mempool_init_page_pool(mempool_t *pool, int min_nr, int order) +{ + pool->elem_size = PAGE_SIZE << order; + return 0; +} + +static inline mempool_t *mempool_create_page_pool(int min_nr, int order) +{ + mempool_t *pool = malloc(sizeof(*pool)); + pool->elem_size = PAGE_SIZE << order; + return pool; +} + +#endif /* _LINUX_MEMPOOL_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h new file mode 100644 index 0000000..3830bc2 --- /dev/null +++ b/include/linux/mm.h @@ -0,0 +1 @@ +#include <linux/slab.h> diff --git a/include/linux/module.h b/include/linux/module.h new file mode 100644 index 0000000..3d988c1 --- /dev/null +++ b/include/linux/module.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_MODULE_H +#define _LINUX_MODULE_H + +#include <linux/stat.h> +#include <linux/compiler.h> +#include <linux/moduleparam.h> +#include <linux/export.h> + +struct module; + +#define module_init(initfn) \ + __attribute__((constructor(109))) \ + static void __call_##initfn(void) { BUG_ON(initfn()); } + +#if 0 +#define module_exit(exitfn) \ + __attribute__((destructor(109))) \ + static void __call_##exitfn(void) { exitfn(); } +#endif + +#define module_exit(exitfn) \ + __attribute__((unused)) \ + static void __call_##exitfn(void) { exitfn(); } + +#define MODULE_INFO(tag, info) +#define MODULE_ALIAS(_alias) +#define MODULE_SOFTDEP(_softdep) +#define MODULE_LICENSE(_license) +#define MODULE_AUTHOR(_author) +#define MODULE_DESCRIPTION(_description) +#define MODULE_VERSION(_version) + +static inline void __module_get(struct module *module) +{ +} + +static inline int try_module_get(struct module *module) +{ + return 1; +} + +static inline void module_put(struct module *module) +{ +} + +#endif /* _LINUX_MODULE_H */ diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h new file mode 100644 index 0000000..6002673 --- /dev/null +++ b/include/linux/moduleparam.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_MODULE_PARAMS_H +#define _LINUX_MODULE_PARAMS_H + +#define module_param_named(name, value, type, perm) +#define MODULE_PARM_DESC(_parm, desc) + +#endif /* _LINUX_MODULE_PARAMS_H */ diff --git a/include/linux/mutex.h b/include/linux/mutex.h new file mode 100644 index 0000000..964bd33 --- /dev/null +++ b/include/linux/mutex.h @@ -0,0 +1,15 @@ +#ifndef __TOOLS_LINUX_MUTEX_H +#define __TOOLS_LINUX_MUTEX_H + +#include <pthread.h> + +struct mutex { + pthread_mutex_t lock; +}; + +#define mutex_init(l) pthread_mutex_init(&(l)->lock, NULL) +#define mutex_lock(l) pthread_mutex_lock(&(l)->lock) +#define mutex_trylock(l) (!pthread_mutex_trylock(&(l)->lock)) +#define mutex_unlock(l) pthread_mutex_unlock(&(l)->lock) + +#endif /* __TOOLS_LINUX_MUTEX_H */ diff --git a/include/linux/notifier.h b/include/linux/notifier.h new file mode 100644 index 0000000..29bd2e1 --- /dev/null +++ b/include/linux/notifier.h @@ -0,0 +1,197 @@ +/* + * Routines to manage notifier chains for passing status changes to any + * interested routines. We need this instead of hard coded call lists so + * that modules can poke their nose into the innards. The network devices + * needed them so here they are for the rest of you. + * + * Alan Cox <Alan.Cox@linux.org> + */ + +#ifndef _LINUX_NOTIFIER_H +#define _LINUX_NOTIFIER_H +#include <linux/errno.h> +#include <linux/mutex.h> +#include <linux/rwsem.h> +//#include <linux/srcu.h> + +/* + * Notifier chains are of four types: + * + * Atomic notifier chains: Chain callbacks run in interrupt/atomic + * context. Callouts are not allowed to block. + * Blocking notifier chains: Chain callbacks run in process context. + * Callouts are allowed to block. + * Raw notifier chains: There are no restrictions on callbacks, + * registration, or unregistration. All locking and protection + * must be provided by the caller. + * SRCU notifier chains: A variant of blocking notifier chains, with + * the same restrictions. + * + * atomic_notifier_chain_register() may be called from an atomic context, + * but blocking_notifier_chain_register() and srcu_notifier_chain_register() + * must be called from a process context. Ditto for the corresponding + * _unregister() routines. + * + * atomic_notifier_chain_unregister(), blocking_notifier_chain_unregister(), + * and srcu_notifier_chain_unregister() _must not_ be called from within + * the call chain. + * + * SRCU notifier chains are an alternative form of blocking notifier chains. + * They use SRCU (Sleepable Read-Copy Update) instead of rw-semaphores for + * protection of the chain links. This means there is _very_ low overhead + * in srcu_notifier_call_chain(): no cache bounces and no memory barriers. + * As compensation, srcu_notifier_chain_unregister() is rather expensive. + * SRCU notifier chains should be used when the chain will be called very + * often but notifier_blocks will seldom be removed. Also, SRCU notifier + * chains are slightly more difficult to use because they require special + * runtime initialization. + */ + +struct notifier_block; + +typedef int (*notifier_fn_t)(struct notifier_block *nb, + unsigned long action, void *data); + +struct notifier_block { + notifier_fn_t notifier_call; + struct notifier_block __rcu *next; + int priority; +}; + +struct atomic_notifier_head { + spinlock_t lock; + struct notifier_block __rcu *head; +}; + +struct blocking_notifier_head { + struct rw_semaphore rwsem; + struct notifier_block __rcu *head; +}; + +struct raw_notifier_head { + struct notifier_block __rcu *head; +}; + +#define ATOMIC_INIT_NOTIFIER_HEAD(name) do { \ + spin_lock_init(&(name)->lock); \ + (name)->head = NULL; \ + } while (0) +#define BLOCKING_INIT_NOTIFIER_HEAD(name) do { \ + init_rwsem(&(name)->rwsem); \ + (name)->head = NULL; \ + } while (0) +#define RAW_INIT_NOTIFIER_HEAD(name) do { \ + (name)->head = NULL; \ + } while (0) + +#define ATOMIC_NOTIFIER_INIT(name) { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .head = NULL } +#define BLOCKING_NOTIFIER_INIT(name) { \ + .rwsem = __RWSEM_INITIALIZER((name).rwsem), \ + .head = NULL } +#define RAW_NOTIFIER_INIT(name) { \ + .head = NULL } + +#define ATOMIC_NOTIFIER_HEAD(name) \ + struct atomic_notifier_head name = \ + ATOMIC_NOTIFIER_INIT(name) +#define BLOCKING_NOTIFIER_HEAD(name) \ + struct blocking_notifier_head name = \ + BLOCKING_NOTIFIER_INIT(name) +#define RAW_NOTIFIER_HEAD(name) \ + struct raw_notifier_head name = \ + RAW_NOTIFIER_INIT(name) + +#define NOTIFY_DONE 0x0000 /* Don't care */ +#define NOTIFY_OK 0x0001 /* Suits me */ +#define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ +#define NOTIFY_BAD (NOTIFY_STOP_MASK|0x0002) + /* Bad/Veto action */ +/* + * Clean way to return from the notifier and stop further calls. + */ +#define NOTIFY_STOP (NOTIFY_OK|NOTIFY_STOP_MASK) + +#ifdef __KERNEL__ + +extern int atomic_notifier_chain_register(struct atomic_notifier_head *nh, + struct notifier_block *nb); +extern int blocking_notifier_chain_register(struct blocking_notifier_head *nh, + struct notifier_block *nb); +extern int raw_notifier_chain_register(struct raw_notifier_head *nh, + struct notifier_block *nb); + +extern int blocking_notifier_chain_cond_register( + struct blocking_notifier_head *nh, + struct notifier_block *nb); + +extern int atomic_notifier_chain_unregister(struct atomic_notifier_head *nh, + struct notifier_block *nb); +extern int blocking_notifier_chain_unregister(struct blocking_notifier_head *nh, + struct notifier_block *nb); +extern int raw_notifier_chain_unregister(struct raw_notifier_head *nh, + struct notifier_block *nb); + +extern int atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v); +extern int __atomic_notifier_call_chain(struct atomic_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); +extern int blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v); +extern int __blocking_notifier_call_chain(struct blocking_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); +extern int raw_notifier_call_chain(struct raw_notifier_head *nh, + unsigned long val, void *v); +extern int __raw_notifier_call_chain(struct raw_notifier_head *nh, + unsigned long val, void *v, int nr_to_call, int *nr_calls); + +/* Encapsulate (negative) errno value (in particular, NOTIFY_BAD <=> EPERM). */ +static inline int notifier_from_errno(int err) +{ + if (err) + return NOTIFY_STOP_MASK | (NOTIFY_OK - err); + + return NOTIFY_OK; +} + +/* Restore (negative) errno value from notify return value. */ +static inline int notifier_to_errno(int ret) +{ + ret &= ~NOTIFY_STOP_MASK; + return ret > NOTIFY_OK ? NOTIFY_OK - ret : 0; +} + +/* + * Declared notifiers so far. I can imagine quite a few more chains + * over time (eg laptop power reset chains, reboot chain (to clean + * device units up), device [un]mount chain, module load/unload chain, + * low memory chain, screenblank chain (for plug in modular screenblankers) + * VC switch chains (for loadable kernel svgalib VC switch helpers) etc... + */ + +/* CPU notfiers are defined in include/linux/cpu.h. */ + +/* netdevice notifiers are defined in include/linux/netdevice.h */ + +/* reboot notifiers are defined in include/linux/reboot.h. */ + +/* Hibernation and suspend events are defined in include/linux/suspend.h. */ + +/* Virtual Terminal events are defined in include/linux/vt.h. */ + +#define NETLINK_URELEASE 0x0001 /* Unicast netlink socket released */ + +/* Console keyboard events. + * Note: KBD_KEYCODE is always sent before KBD_UNBOUND_KEYCODE, KBD_UNICODE and + * KBD_KEYSYM. */ +#define KBD_KEYCODE 0x0001 /* Keyboard keycode, called before any other */ +#define KBD_UNBOUND_KEYCODE 0x0002 /* Keyboard keycode which is not bound to any other */ +#define KBD_UNICODE 0x0003 /* Keyboard unicode */ +#define KBD_KEYSYM 0x0004 /* Keyboard keysym */ +#define KBD_POST_KEYSYM 0x0005 /* Called after keyboard keysym interpretation */ + +extern struct blocking_notifier_head reboot_notifier_list; + +#endif /* __KERNEL__ */ +#endif /* _LINUX_NOTIFIER_H */ diff --git a/include/linux/osq_lock.h b/include/linux/osq_lock.h new file mode 100644 index 0000000..bde9f0d --- /dev/null +++ b/include/linux/osq_lock.h @@ -0,0 +1,44 @@ +#ifndef __LINUX_OSQ_LOCK_H +#define __LINUX_OSQ_LOCK_H + +/* + * An MCS like lock especially tailored for optimistic spinning for sleeping + * lock implementations (mutex, rwsem, etc). + */ +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; + +struct optimistic_spin_queue { + /* + * Stores an encoded value of the CPU # of the tail node in the queue. + * If the queue is empty, then it's set to OSQ_UNLOCKED_VAL. + */ + atomic_t tail; +}; + +#define OSQ_UNLOCKED_VAL (0) + +/* Init macro and function. */ +#define OSQ_LOCK_UNLOCKED { ATOMIC_INIT(OSQ_UNLOCKED_VAL) } + +static inline void osq_lock_init(struct optimistic_spin_queue *lock) +{ + atomic_set(&lock->tail, OSQ_UNLOCKED_VAL); +} + +static inline bool osq_lock(struct optimistic_spin_queue *lock) +{ + return false; +} + +static inline void osq_unlock(struct optimistic_spin_queue *lock) {} + +static inline bool osq_is_locked(struct optimistic_spin_queue *lock) +{ + return atomic_read(&lock->tail) != OSQ_UNLOCKED_VAL; +} + +#endif diff --git a/include/linux/page.h b/include/linux/page.h new file mode 100644 index 0000000..c99d9de --- /dev/null +++ b/include/linux/page.h @@ -0,0 +1,18 @@ +#ifndef _LINUX_PAGE_H +#define _LINUX_PAGE_H + +#include <sys/user.h> + +struct page; + +#define virt_to_page(kaddr) ((struct page *) (kaddr)) +#define page_address(kaddr) ((void *) (kaddr)) + +#define kmap_atomic(page) page_address(page) +#define kunmap_atomic(addr) do {} while (0) + +static const char zero_page[PAGE_SIZE]; + +#define ZERO_PAGE(o) ((struct page *) &zero_page[0]) + +#endif /* _LINUX_PAGE_H */ diff --git a/include/linux/path.h b/include/linux/path.h new file mode 100644 index 0000000..d137218 --- /dev/null +++ b/include/linux/path.h @@ -0,0 +1,20 @@ +#ifndef _LINUX_PATH_H +#define _LINUX_PATH_H + +struct dentry; +struct vfsmount; + +struct path { + struct vfsmount *mnt; + struct dentry *dentry; +}; + +extern void path_get(const struct path *); +extern void path_put(const struct path *); + +static inline int path_equal(const struct path *path1, const struct path *path2) +{ + return path1->mnt == path2->mnt && path1->dentry == path2->dentry; +} + +#endif /* _LINUX_PATH_H */ diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h new file mode 100644 index 0000000..5a98618 --- /dev/null +++ b/include/linux/percpu-refcount.h @@ -0,0 +1,183 @@ +#ifndef __TOOLS_LINUX_PERCPU_REFCOUNT_H +#define __TOOLS_LINUX_PERCPU_REFCOUNT_H + +#include <linux/atomic.h> +#include <linux/kernel.h> +#include <linux/percpu.h> + +struct percpu_ref; +typedef void (percpu_ref_func_t)(struct percpu_ref *); + +/* flags set in the lower bits of percpu_ref->percpu_count_ptr */ +enum { + __PERCPU_REF_ATOMIC = 1LU << 0, /* operating in atomic mode */ + __PERCPU_REF_DEAD = 1LU << 1, /* (being) killed */ + __PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD, + + __PERCPU_REF_FLAG_BITS = 2, +}; + +/* @flags for percpu_ref_init() */ +enum { + /* + * Start w/ ref == 1 in atomic mode. Can be switched to percpu + * operation using percpu_ref_switch_to_percpu(). If initialized + * with this flag, the ref will stay in atomic mode until + * percpu_ref_switch_to_percpu() is invoked on it. + */ + PERCPU_REF_INIT_ATOMIC = 1 << 0, + + /* + * Start dead w/ ref == 0 in atomic mode. Must be revived with + * percpu_ref_reinit() before used. Implies INIT_ATOMIC. + */ + PERCPU_REF_INIT_DEAD = 1 << 1, +}; + +struct percpu_ref { + atomic_long_t count; + percpu_ref_func_t *release; + percpu_ref_func_t *confirm_switch; +}; + +static inline void percpu_ref_exit(struct percpu_ref *ref) {} + +static inline int __must_check percpu_ref_init(struct percpu_ref *ref, + percpu_ref_func_t *release, unsigned int flags, + gfp_t gfp) +{ + unsigned long start_count = 0; + + if (!(flags & PERCPU_REF_INIT_DEAD)) + start_count++; + + atomic_long_set(&ref->count, start_count); + + ref->release = release; + return 0; +} + +static inline void percpu_ref_switch_to_atomic(struct percpu_ref *ref, + percpu_ref_func_t *confirm_switch) {} + +static inline void percpu_ref_switch_to_percpu(struct percpu_ref *ref) {} + +static inline void percpu_ref_reinit(struct percpu_ref *ref) {} + +/** + * percpu_ref_get_many - increment a percpu refcount + * @ref: percpu_ref to get + * @nr: number of references to get + * + * Analogous to atomic_long_add(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr) +{ + atomic_long_add(nr, &ref->count); +} + +/** + * percpu_ref_get - increment a percpu refcount + * @ref: percpu_ref to get + * + * Analagous to atomic_long_inc(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_get(struct percpu_ref *ref) +{ + percpu_ref_get_many(ref, 1); +} + +/** + * percpu_ref_tryget - try to increment a percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless its count already reached zero. + * Returns %true on success; %false on failure. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget(struct percpu_ref *ref) +{ + return atomic_long_inc_not_zero(&ref->count); +} + +/** + * percpu_ref_tryget_live - try to increment a live percpu refcount + * @ref: percpu_ref to try-get + * + * Increment a percpu refcount unless it has already been killed. Returns + * %true on success; %false on failure. + * + * Completion of percpu_ref_kill() in itself doesn't guarantee that this + * function will fail. For such guarantee, percpu_ref_kill_and_confirm() + * should be used. After the confirm_kill callback is invoked, it's + * guaranteed that no new reference will be given out by + * percpu_ref_tryget_live(). + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_tryget_live(struct percpu_ref *ref) +{ + return atomic_long_inc_not_zero(&ref->count); +} + +/** + * percpu_ref_put_many - decrement a percpu refcount + * @ref: percpu_ref to put + * @nr: number of references to put + * + * Decrement the refcount, and if 0, call the release function (which was passed + * to percpu_ref_init()) + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr) +{ + if (unlikely(atomic_long_sub_and_test(nr, &ref->count))) + ref->release(ref); +} + +/** + * percpu_ref_put - decrement a percpu refcount + * @ref: percpu_ref to put + * + * Decrement the refcount, and if 0, call the release function (which was passed + * to percpu_ref_init()) + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline void percpu_ref_put(struct percpu_ref *ref) +{ + percpu_ref_put_many(ref, 1); +} + +/** + * percpu_ref_kill - drop the initial ref + * @ref: percpu_ref to kill + * + * Must be used to drop the initial ref on a percpu refcount; must be called + * precisely once before shutdown. + */ +static inline void percpu_ref_kill(struct percpu_ref *ref) +{ + percpu_ref_put(ref); +} + +/** + * percpu_ref_is_zero - test whether a percpu refcount reached zero + * @ref: percpu_ref to test + * + * Returns %true if @ref reached zero. + * + * This function is safe to call as long as @ref is between init and exit. + */ +static inline bool percpu_ref_is_zero(struct percpu_ref *ref) +{ + return !atomic_long_read(&ref->count); +} + +#endif /* __TOOLS_LINUX_PERCPU_REFCOUNT_H */ diff --git a/include/linux/percpu.h b/include/linux/percpu.h new file mode 100644 index 0000000..ad24977 --- /dev/null +++ b/include/linux/percpu.h @@ -0,0 +1,189 @@ +#ifndef __TOOLS_LINUX_PERCPU_H +#define __TOOLS_LINUX_PERCPU_H + +#define __percpu + +#define free_percpu(percpu) free(percpu) + +#define __alloc_percpu_gfp(size, align, gfp) calloc(1, size) +#define __alloc_percpu(size, align) calloc(1, size) + +#define alloc_percpu_gfp(type, gfp) \ + (typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type), \ + __alignof__(type), gfp) +#define alloc_percpu(type) \ + (typeof(type) __percpu *)__alloc_percpu(sizeof(type), \ + __alignof__(type)) + +#define __verify_pcpu_ptr(ptr) + +#define per_cpu_ptr(ptr, cpu) (ptr) +#define raw_cpu_ptr(ptr) (ptr) +#define this_cpu_ptr(ptr) raw_cpu_ptr(ptr) + +#define __pcpu_size_call_return(stem, variable) \ +({ \ + typeof(variable) pscr_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr_ret__ = stem##1(variable); break; \ + case 2: pscr_ret__ = stem##2(variable); break; \ + case 4: pscr_ret__ = stem##4(variable); break; \ + case 8: pscr_ret__ = stem##8(variable); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr_ret__; \ +}) + +#define __pcpu_size_call_return2(stem, variable, ...) \ +({ \ + typeof(variable) pscr2_ret__; \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: pscr2_ret__ = stem##1(variable, __VA_ARGS__); break; \ + case 2: pscr2_ret__ = stem##2(variable, __VA_ARGS__); break; \ + case 4: pscr2_ret__ = stem##4(variable, __VA_ARGS__); break; \ + case 8: pscr2_ret__ = stem##8(variable, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pscr2_ret__; \ +}) + +/* + * Special handling for cmpxchg_double. cmpxchg_double is passed two + * percpu variables. The first has to be aligned to a double word + * boundary and the second has to follow directly thereafter. + * We enforce this on all architectures even if they don't support + * a double cmpxchg instruction, since it's a cheap requirement, and it + * avoids breaking the requirement for architectures with the instruction. + */ +#define __pcpu_double_call_return_bool(stem, pcp1, pcp2, ...) \ +({ \ + bool pdcrb_ret__; \ + __verify_pcpu_ptr(&(pcp1)); \ + BUILD_BUG_ON(sizeof(pcp1) != sizeof(pcp2)); \ + VM_BUG_ON((unsigned long)(&(pcp1)) % (2 * sizeof(pcp1))); \ + VM_BUG_ON((unsigned long)(&(pcp2)) != \ + (unsigned long)(&(pcp1)) + sizeof(pcp1)); \ + switch(sizeof(pcp1)) { \ + case 1: pdcrb_ret__ = stem##1(pcp1, pcp2, __VA_ARGS__); break; \ + case 2: pdcrb_ret__ = stem##2(pcp1, pcp2, __VA_ARGS__); break; \ + case 4: pdcrb_ret__ = stem##4(pcp1, pcp2, __VA_ARGS__); break; \ + case 8: pdcrb_ret__ = stem##8(pcp1, pcp2, __VA_ARGS__); break; \ + default: \ + __bad_size_call_parameter(); break; \ + } \ + pdcrb_ret__; \ +}) + +#define __pcpu_size_call(stem, variable, ...) \ +do { \ + __verify_pcpu_ptr(&(variable)); \ + switch(sizeof(variable)) { \ + case 1: stem##1(variable, __VA_ARGS__);break; \ + case 2: stem##2(variable, __VA_ARGS__);break; \ + case 4: stem##4(variable, __VA_ARGS__);break; \ + case 8: stem##8(variable, __VA_ARGS__);break; \ + default: \ + __bad_size_call_parameter();break; \ + } \ +} while (0) + +#define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, pcp) +#define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, pcp, val) +#define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, pcp, val) +#define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, pcp, val) +#define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, pcp, val) +#define raw_cpu_add_return(pcp, val) __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#define raw_cpu_xchg(pcp, nval) __pcpu_size_call_return2(raw_cpu_xchg_, pcp, nval) +#define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +#define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) + +#define raw_cpu_sub(pcp, val) raw_cpu_add(pcp, -(val)) +#define raw_cpu_inc(pcp) raw_cpu_add(pcp, 1) +#define raw_cpu_dec(pcp) raw_cpu_sub(pcp, 1) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) + +#define __this_cpu_read(pcp) \ +({ \ + raw_cpu_read(pcp); \ +}) + +#define __this_cpu_write(pcp, val) \ +({ \ + raw_cpu_write(pcp, val); \ +}) + +#define __this_cpu_add(pcp, val) \ +({ \ + raw_cpu_add(pcp, val); \ +}) + +#define __this_cpu_and(pcp, val) \ +({ \ + raw_cpu_and(pcp, val); \ +}) + +#define __this_cpu_or(pcp, val) \ +({ \ + raw_cpu_or(pcp, val); \ +}) + +#define __this_cpu_add_return(pcp, val) \ +({ \ + raw_cpu_add_return(pcp, val); \ +}) + +#define __this_cpu_xchg(pcp, nval) \ +({ \ + raw_cpu_xchg(pcp, nval); \ +}) + +#define __this_cpu_cmpxchg(pcp, oval, nval) \ +({ \ + raw_cpu_cmpxchg(pcp, oval, nval); \ +}) + +#define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2); \ +}) + +#define __this_cpu_sub(pcp, val) __this_cpu_add(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1) +#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1) +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) + +#define this_cpu_read(pcp) ((pcp)) +#define this_cpu_write(pcp, val) ((pcp) = val) +#define this_cpu_add(pcp, val) ((pcp) += val) +#define this_cpu_and(pcp, val) ((pcp) &= val) +#define this_cpu_or(pcp, val) ((pcp) |= val) +#define this_cpu_add_return(pcp, val) ((pcp) += val) +#define this_cpu_xchg(pcp, nval) \ +({ \ + typeof(pcp) _r = (pcp); \ + (pcp) = (nval); \ + _r; \ +}) + +#define this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(this_cpu_cmpxchg_, pcp, oval, nval) +#define this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(this_cpu_cmpxchg_double_, pcp1, pcp2, oval1, oval2, nval1, nval2) + +#define this_cpu_sub(pcp, val) this_cpu_add(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc(pcp) this_cpu_add(pcp, 1) +#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1) +#define this_cpu_sub_return(pcp, val) this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define this_cpu_inc_return(pcp) this_cpu_add_return(pcp, 1) +#define this_cpu_dec_return(pcp) this_cpu_add_return(pcp, -1) + +#endif /* __TOOLS_LINUX_PERCPU_H */ diff --git a/include/linux/poison.h b/include/linux/poison.h new file mode 100644 index 0000000..51334ed --- /dev/null +++ b/include/linux/poison.h @@ -0,0 +1,90 @@ +#ifndef _LINUX_POISON_H +#define _LINUX_POISON_H + +/********** include/linux/list.h **********/ + +/* + * Architectures might want to move the poison pointer offset + * into some well-recognized area such as 0xdead000000000000, + * that is also not mappable by user-space exploits: + */ +#ifdef CONFIG_ILLEGAL_POINTER_VALUE +# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL) +#else +# define POISON_POINTER_DELTA 0 +#endif + +/* + * These are non-NULL pointers that will result in page faults + * under normal circumstances, used to verify that nobody uses + * non-initialized list entries. + */ +#define LIST_POISON1 ((void *) 0x100 + POISON_POINTER_DELTA) +#define LIST_POISON2 ((void *) 0x200 + POISON_POINTER_DELTA) + +/********** include/linux/timer.h **********/ +/* + * Magic number "tsta" to indicate a static timer initializer + * for the object debugging code. + */ +#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA) + +/********** mm/debug-pagealloc.c **********/ +#ifdef CONFIG_PAGE_POISONING_ZERO +#define PAGE_POISON 0x00 +#else +#define PAGE_POISON 0xaa +#endif + +/********** mm/page_alloc.c ************/ + +#define TAIL_MAPPING ((void *) 0x400 + POISON_POINTER_DELTA) + +/********** mm/slab.c **********/ +/* + * Magic nums for obj red zoning. + * Placed in the first word before and the first word after an obj. + */ +#define RED_INACTIVE 0x09F911029D74E35BULL /* when obj is inactive */ +#define RED_ACTIVE 0xD84156C5635688C0ULL /* when obj is active */ + +#define SLUB_RED_INACTIVE 0xbb +#define SLUB_RED_ACTIVE 0xcc + +/* ...and for poisoning */ +#define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ +#define POISON_FREE 0x6b /* for use-after-free poisoning */ +#define POISON_END 0xa5 /* end-byte of poisoning */ + +/********** arch/$ARCH/mm/init.c **********/ +#define POISON_FREE_INITMEM 0xcc + +/********** arch/ia64/hp/common/sba_iommu.c **********/ +/* + * arch/ia64/hp/common/sba_iommu.c uses a 16-byte poison string with a + * value of "SBAIOMMU POISON\0" for spill-over poisoning. + */ + +/********** fs/jbd/journal.c **********/ +#define JBD_POISON_FREE 0x5b +#define JBD2_POISON_FREE 0x5c + +/********** drivers/base/dmapool.c **********/ +#define POOL_POISON_FREED 0xa7 /* !inuse */ +#define POOL_POISON_ALLOCATED 0xa9 /* !initted */ + +/********** drivers/atm/ **********/ +#define ATM_POISON_FREE 0x12 +#define ATM_POISON 0xdeadbeef + +/********** kernel/mutexes **********/ +#define MUTEX_DEBUG_INIT 0x11 +#define MUTEX_DEBUG_FREE 0x22 + +/********** lib/flex_array.c **********/ +#define FLEX_ARRAY_FREE 0x6c /* for use-after-free poisoning */ + +/********** security/ **********/ +#define KEY_DESTROY 0xbd + +#endif diff --git a/include/linux/posix_acl.h b/include/linux/posix_acl.h new file mode 100644 index 0000000..1d21bfe --- /dev/null +++ b/include/linux/posix_acl.h @@ -0,0 +1,49 @@ +/* + File: linux/posix_acl.h + + (C) 2002 Andreas Gruenbacher, <a.gruenbacher@computer.org> +*/ + + +#ifndef __LINUX_POSIX_ACL_H +#define __LINUX_POSIX_ACL_H + +#include <linux/bug.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> + +#define ACL_UNDEFINED_ID (-1) + +/* a_type field in acl_user_posix_entry_t */ +#define ACL_TYPE_ACCESS (0x8000) +#define ACL_TYPE_DEFAULT (0x4000) + +/* e_tag entry in struct posix_acl_entry */ +#define ACL_USER_OBJ (0x01) +#define ACL_USER (0x02) +#define ACL_GROUP_OBJ (0x04) +#define ACL_GROUP (0x08) +#define ACL_MASK (0x10) +#define ACL_OTHER (0x20) + +/* permissions in the e_perm field */ +#define ACL_READ (0x04) +#define ACL_WRITE (0x02) +#define ACL_EXECUTE (0x01) + +struct posix_acl_entry { + short e_tag; + unsigned short e_perm; + union { + uid_t e_uid; + gid_t e_gid; + }; +}; + +struct posix_acl { + struct rcu_head a_rcu; + unsigned int a_count; + struct posix_acl_entry a_entries[0]; +}; + +#endif /* __LINUX_POSIX_ACL_H */ diff --git a/include/linux/posix_acl_xattr.h b/include/linux/posix_acl_xattr.h new file mode 100644 index 0000000..65beeb1 --- /dev/null +++ b/include/linux/posix_acl_xattr.h @@ -0,0 +1,34 @@ +/* + File: linux/posix_acl_xattr.h + + Extended attribute system call representation of Access Control Lists. + + Copyright (C) 2000 by Andreas Gruenbacher <a.gruenbacher@computer.org> + Copyright (C) 2002 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com> + */ +#ifndef _POSIX_ACL_XATTR_H +#define _POSIX_ACL_XATTR_H + +#include <uapi/linux/xattr.h> + +/* Supported ACL a_version fields */ +#define POSIX_ACL_XATTR_VERSION 0x0002 + +/* An undefined entry e_id value */ +#define ACL_UNDEFINED_ID (-1) + +typedef struct { + __le16 e_tag; + __le16 e_perm; + __le32 e_id; +} posix_acl_xattr_entry; + +typedef struct { + __le32 a_version; + posix_acl_xattr_entry a_entries[0]; +} posix_acl_xattr_header; + +extern const struct xattr_handler posix_acl_access_xattr_handler; +extern const struct xattr_handler posix_acl_default_xattr_handler; + +#endif /* _POSIX_ACL_XATTR_H */ diff --git a/include/linux/preempt.h b/include/linux/preempt.h new file mode 100644 index 0000000..0618601 --- /dev/null +++ b/include/linux/preempt.h @@ -0,0 +1,15 @@ +#ifndef __LINUX_PREEMPT_H +#define __LINUX_PREEMPT_H + +#define preempt_disable() barrier() +#define sched_preempt_enable_no_resched() barrier() +#define preempt_enable_no_resched() barrier() +#define preempt_enable() barrier() +#define preempt_check_resched() do { } while (0) + +#define preempt_disable_notrace() barrier() +#define preempt_enable_no_resched_notrace() barrier() +#define preempt_enable_notrace() barrier() +#define preemptible() 0 + +#endif /* __LINUX_PREEMPT_H */ diff --git a/include/linux/prefetch.h b/include/linux/prefetch.h new file mode 100644 index 0000000..13cb826 --- /dev/null +++ b/include/linux/prefetch.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_PREFETCH_H +#define _LINUX_PREFETCH_H + +#define prefetch(p) \ + ({ __maybe_unused typeof(p) __var = (p); }) + +#endif /* _LINUX_PREFETCH_H */ diff --git a/include/linux/printk.h b/include/linux/printk.h new file mode 100644 index 0000000..4e29af4 --- /dev/null +++ b/include/linux/printk.h @@ -0,0 +1,205 @@ +#ifndef __TOOLS_LINUX_PRINTK_H +#define __TOOLS_LINUX_PRINTK_H + +#ifndef pr_fmt +#define pr_fmt(fmt) fmt +#endif + +#include <stdarg.h> +#include <stdio.h> + +#define KERN_EMERG "" +#define KERN_ALERT "" +#define KERN_CRIT "" +#define KERN_ERR "" +#define KERN_WARNING "" +#define KERN_NOTICE "" +#define KERN_INFO "" +#define KERN_DEBUG "" +#define KERN_DEFAULT "" +#define KERN_CONT "" + +static inline int vscnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int i = vsnprintf(buf, size, fmt, args); + ssize_t ssize = size; + + return (i >= ssize) ? (ssize - 1) : i; +} + +static inline int scnprintf(char * buf, size_t size, const char * fmt, ...) +{ + ssize_t ssize = size; + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + + return (i >= ssize) ? (ssize - 1) : i; +} + +#define printk(...) printf(__VA_ARGS__) + +#define no_printk(fmt, ...) \ +({ \ + do { \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ + } while (0); \ + 0; \ +}) + +#define pr_emerg(fmt, ...) \ + printk(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert(fmt, ...) \ + printk(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit(fmt, ...) \ + printk(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err(fmt, ...) \ + printk(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warning(fmt, ...) \ + printk(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn pr_warning +#define pr_notice(fmt, ...) \ + printk(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info(fmt, ...) \ + printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* + * Like KERN_CONT, pr_cont() should only be used when continuing + * a line with no newline ('\n') enclosed. Otherwise it defaults + * back to KERN_DEFAULT. + */ +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) + +/* pr_devel() should produce zero code unless DEBUG is defined */ +#ifdef DEBUG +#define pr_devel(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + + +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(CONFIG_DYNAMIC_DEBUG) +#include <linux/dynamic_debug.h> + +/* dynamic_pr_debug() uses pr_fmt() internally so we don't need it here */ +#define pr_debug(fmt, ...) \ + dynamic_pr_debug(fmt, ##__VA_ARGS__) +#elif defined(DEBUG) +#define pr_debug(fmt, ...) \ + printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* + * Print a one-time message (analogous to WARN_ONCE() et al): + */ + +#define printk_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + bool __ret_print_once = !__print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk(fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_print_once); \ +}) +#define printk_deferred_once(fmt, ...) \ +({ \ + static bool __print_once __read_mostly; \ + bool __ret_print_once = !__print_once; \ + \ + if (!__print_once) { \ + __print_once = true; \ + printk_deferred(fmt, ##__VA_ARGS__); \ + } \ + unlikely(__ret_print_once); \ +}) + +#define pr_emerg_once(fmt, ...) \ + printk_once(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert_once(fmt, ...) \ + printk_once(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit_once(fmt, ...) \ + printk_once(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err_once(fmt, ...) \ + printk_once(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn_once(fmt, ...) \ + printk_once(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice_once(fmt, ...) \ + printk_once(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_once(fmt, ...) \ + printk_once(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +#define pr_cont_once(fmt, ...) \ + printk_once(KERN_CONT pr_fmt(fmt), ##__VA_ARGS__) + +#if defined(DEBUG) +#define pr_devel_once(fmt, ...) \ + printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_once(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* If you are writing a driver, please use dev_dbg instead */ +#if defined(DEBUG) +#define pr_debug_once(fmt, ...) \ + printk_once(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_debug_once(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif + +/* + * ratelimited messages with local ratelimit_state, + * no local ratelimit_state used in the !PRINTK case + */ +#ifdef CONFIG_PRINTK +#define printk_ratelimited(fmt, ...) \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + \ + if (__ratelimit(&_rs)) \ + printk(fmt, ##__VA_ARGS__); \ +}) +#else +#define printk_ratelimited(fmt, ...) \ + no_printk(fmt, ##__VA_ARGS__) +#endif + +#define pr_emerg_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_EMERG pr_fmt(fmt), ##__VA_ARGS__) +#define pr_alert_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ALERT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_crit_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_CRIT pr_fmt(fmt), ##__VA_ARGS__) +#define pr_err_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_ERR pr_fmt(fmt), ##__VA_ARGS__) +#define pr_warn_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_WARNING pr_fmt(fmt), ##__VA_ARGS__) +#define pr_notice_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_NOTICE pr_fmt(fmt), ##__VA_ARGS__) +#define pr_info_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) +/* no pr_cont_ratelimited, don't do that... */ + +#if defined(DEBUG) +#define pr_devel_ratelimited(fmt, ...) \ + printk_ratelimited(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#else +#define pr_devel_ratelimited(fmt, ...) \ + no_printk(KERN_DEBUG pr_fmt(fmt), ##__VA_ARGS__) +#endif +#endif /* __TOOLS_LINUX_PRINTK_H */ diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h new file mode 100644 index 0000000..ae5cc6d --- /dev/null +++ b/include/linux/radix-tree.h @@ -0,0 +1,14 @@ +#ifndef _LINUX_RADIX_TREE_H +#define _LINUX_RADIX_TREE_H + +struct radix_tree_root { +}; + +#define INIT_RADIX_TREE(root, mask) do {} while (0) + +static inline void *radix_tree_lookup(struct radix_tree_root *r, unsigned long i) +{ + return NULL; +} + +#endif /* _LINUX_RADIX_TREE_H */ diff --git a/include/linux/random.h b/include/linux/random.h new file mode 100644 index 0000000..bd3dc61 --- /dev/null +++ b/include/linux/random.h @@ -0,0 +1,31 @@ +/* + * include/linux/random.h + * + * Include file for the random number generator. + */ +#ifndef _LINUX_RANDOM_H +#define _LINUX_RANDOM_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <linux/bug.h> + +static inline int getrandom(void *buf, size_t buflen, unsigned int flags) +{ + return syscall(SYS_getrandom, buf, buflen, flags); +} + +static inline void get_random_bytes(void *buf, int nbytes) +{ + BUG_ON(getrandom(buf, nbytes, 0) != nbytes); +} + +static inline int get_random_int(void) +{ + int v; + + get_random_bytes(&v, sizeof(v)); + return v; +} + +#endif /* _LINUX_RANDOM_H */ diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h new file mode 100644 index 0000000..680181d --- /dev/null +++ b/include/linux/ratelimit.h @@ -0,0 +1,109 @@ +#ifndef _LINUX_RATELIMIT_H +#define _LINUX_RATELIMIT_H + +#include <linux/printk.h> +#include <linux/sched.h> +#include <linux/spinlock.h> + +#define DEFAULT_RATELIMIT_INTERVAL (5 * HZ) +#define DEFAULT_RATELIMIT_BURST 10 + +/* issue num suppressed message on exit */ +#define RATELIMIT_MSG_ON_RELEASE 1 + +struct ratelimit_state { + raw_spinlock_t lock; /* protect the state */ + + int interval; + int burst; + int printed; + int missed; + unsigned long begin; + unsigned long flags; +}; + +#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \ + .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \ + .interval = interval_init, \ + .burst = burst_init, \ + } + +#define RATELIMIT_STATE_INIT_DISABLED \ + RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST) + +#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) \ + \ + struct ratelimit_state name = \ + RATELIMIT_STATE_INIT(name, interval_init, burst_init) \ + +static inline void ratelimit_state_init(struct ratelimit_state *rs, + int interval, int burst) +{ + memset(rs, 0, sizeof(*rs)); + + raw_spin_lock_init(&rs->lock); + rs->interval = interval; + rs->burst = burst; +} + +static inline void ratelimit_default_init(struct ratelimit_state *rs) +{ + return ratelimit_state_init(rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); +} + +static inline void ratelimit_state_exit(struct ratelimit_state *rs) +{ + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) + return; + + if (rs->missed) { + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", + current->comm, rs->missed); + rs->missed = 0; + } +} + +static inline void +ratelimit_set_flags(struct ratelimit_state *rs, unsigned long flags) +{ + rs->flags = flags; +} + +extern struct ratelimit_state printk_ratelimit_state; + +extern int ___ratelimit(struct ratelimit_state *rs, const char *func); +#define __ratelimit(state) ___ratelimit(state, __func__) + +#ifdef CONFIG_PRINTK + +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON((condition) && __ratelimit(state)) + +#define WARN_RATELIMIT(condition, format, ...) \ +({ \ + static DEFINE_RATELIMIT_STATE(_rs, \ + DEFAULT_RATELIMIT_INTERVAL, \ + DEFAULT_RATELIMIT_BURST); \ + int rtn = !!(condition); \ + \ + if (unlikely(rtn && __ratelimit(&_rs))) \ + WARN(rtn, format, ##__VA_ARGS__); \ + \ + rtn; \ +}) + +#else + +#define WARN_ON_RATELIMIT(condition, state) \ + WARN_ON(condition) + +#define WARN_RATELIMIT(condition, format, ...) \ +({ \ + int rtn = WARN(condition, format, ##__VA_ARGS__); \ + rtn; \ +}) + +#endif + +#endif /* _LINUX_RATELIMIT_H */ diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h new file mode 100644 index 0000000..68ba8ce --- /dev/null +++ b/include/linux/rbtree.h @@ -0,0 +1,127 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef _LINUX_RBTREE_H +#define _LINUX_RBTREE_H + +#include <linux/kernel.h> +#include <linux/rcupdate.h> + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) (READ_ONCE((root)->rb_node) == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); +extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + rcu_assign_pointer(*rb_link, node); +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + +/** + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated + * + * @pos: the 'type *' to use as a loop cursor. + * @n: another 'type *' to use as temporary storage + * @root: 'rb_root *' of the rbtree. + * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. + */ +#define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ + for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ + pos && ({ n = rb_entry_safe(rb_next_postorder(&pos->field), \ + typeof(*pos), field); 1; }); \ + pos = n) + +#endif /* _LINUX_RBTREE_H */ diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h new file mode 100644 index 0000000..d076183 --- /dev/null +++ b/include/linux/rbtree_augmented.h @@ -0,0 +1,262 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _LINUX_RBTREE_AUGMENTED_H +#define _LINUX_RBTREE_AUGMENTED_H + +#include <linux/compiler.h> +#include <linux/rbtree.h> + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + WRITE_ONCE(parent->rb_left, new); + else + WRITE_ONCE(parent->rb_right, new); + } else + WRITE_ONCE(root->rb_node, new); +} + +static inline void +__rb_change_child_rcu(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + rcu_assign_pointer(parent->rb_left, new); + else + rcu_assign_pointer(parent->rb_right, new); + } else + rcu_assign_pointer(root->rb_node, new); +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right; + struct rb_node *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + child2 = successor->rb_right; + WRITE_ONCE(parent->rb_left, child2); + WRITE_ONCE(successor->rb_right, child); + rb_set_parent(child, successor); + + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + tmp = node->rb_left; + WRITE_ONCE(successor->rb_left, tmp); + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _LINUX_RBTREE_AUGMENTED_H */ diff --git a/include/linux/rculist.h b/include/linux/rculist.h new file mode 100644 index 0000000..8beb98d --- /dev/null +++ b/include/linux/rculist.h @@ -0,0 +1,675 @@ +#ifndef _LINUX_RCULIST_H +#define _LINUX_RCULIST_H + +#ifdef __KERNEL__ + +/* + * RCU-protected list version + */ +#include <linux/list.h> +#include <linux/rcupdate.h> + +/* + * Why is there no list_empty_rcu()? Because list_empty() serves this + * purpose. The list_empty() function fetches the RCU-protected pointer + * and compares it to the address of the list head, but neither dereferences + * this pointer itself nor provides this pointer to the caller. Therefore, + * it is not necessary to use rcu_dereference(), so that list_empty() can + * be used anywhere you would want to use a list_empty_rcu(). + */ + +/* + * INIT_LIST_HEAD_RCU - Initialize a list_head visible to RCU readers + * @list: list to be initialized + * + * You should instead use INIT_LIST_HEAD() for normal initialization and + * cleanup tasks, when readers have no access to the list being initialized. + * However, if the list being initialized is visible to readers, you + * need to keep the compiler from being too mischievous. + */ +static inline void INIT_LIST_HEAD_RCU(struct list_head *list) +{ + WRITE_ONCE(list->next, list); + WRITE_ONCE(list->prev, list); +} + +/* + * return the ->next pointer of a list_head in an rcu safe + * way, we must not access it directly + */ +#define list_next_rcu(list) (*((struct list_head __rcu **)(&(list)->next))) + +/* + * Insert a new entry between two known consecutive entries. + * + * This is only for internal list manipulation where we know + * the prev/next entries already! + */ +#ifndef CONFIG_DEBUG_LIST +static inline void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next) +{ + new->next = next; + new->prev = prev; + rcu_assign_pointer(list_next_rcu(prev), new); + next->prev = new; +} +#else +void __list_add_rcu(struct list_head *new, + struct list_head *prev, struct list_head *next); +#endif + +/** + * list_add_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it after + * + * Insert a new entry after the specified head. + * This is good for implementing stacks. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_rcu(struct list_head *new, struct list_head *head) +{ + __list_add_rcu(new, head, head->next); +} + +/** + * list_add_tail_rcu - add a new entry to rcu-protected list + * @new: new entry to be added + * @head: list head to add it before + * + * Insert a new entry before the specified head. + * This is useful for implementing queues. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_add_tail_rcu() + * or list_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + */ +static inline void list_add_tail_rcu(struct list_head *new, + struct list_head *head) +{ + __list_add_rcu(new, head->prev, head); +} + +/** + * list_del_rcu - deletes entry from list without re-initialization + * @entry: the element to delete from the list. + * + * Note: list_empty() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as list_del_rcu() + * or list_add_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * list_for_each_entry_rcu(). + * + * Note that the caller is not permitted to immediately free + * the newly deleted entry. Instead, either synchronize_rcu() + * or call_rcu() must be used to defer freeing until an RCU + * grace period has elapsed. + */ +static inline void list_del_rcu(struct list_head *entry) +{ + __list_del_entry(entry); + entry->prev = LIST_POISON2; +} + +/** + * hlist_del_init_rcu - deletes entry from hash list with re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on the node return true after this. It is + * useful for RCU based read lockfree traversal if the writer side + * must know if the list entry is still hashed or already unhashed. + * + * In particular, it means that we can not poison the forward pointers + * that may still be used for walking the hash list and we can only + * zero the pprev pointer so list_unhashed() will return true after + * this. + * + * The caller must take whatever precautions are necessary (such as + * holding appropriate locks) to avoid racing with another + * list-mutation primitive, such as hlist_add_head_rcu() or + * hlist_del_rcu(), running on this same list. However, it is + * perfectly legal to run concurrently with the _rcu list-traversal + * primitives, such as hlist_for_each_entry_rcu(). + */ +static inline void hlist_del_init_rcu(struct hlist_node *n) +{ + if (!hlist_unhashed(n)) { + __hlist_del(n); + n->pprev = NULL; + } +} + +/** + * list_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + * Note: @old should not be empty. + */ +static inline void list_replace_rcu(struct list_head *old, + struct list_head *new) +{ + new->next = old->next; + new->prev = old->prev; + rcu_assign_pointer(list_next_rcu(new->prev), new); + new->next->prev = new; + old->prev = LIST_POISON2; +} + +/** + * __list_splice_init_rcu - join an RCU-protected list into an existing list. + * @list: the RCU-protected list to splice + * @prev: points to the last element of the existing list + * @next: points to the first element of the existing list + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + * + * The list pointed to by @prev and @next can be RCU-read traversed + * concurrently with this function. + * + * Note that this function blocks. + * + * Important note: the caller must take whatever action is necessary to prevent + * any other updates to the existing list. In principle, it is possible to + * modify the list as soon as sync() begins execution. If this sort of thing + * becomes necessary, an alternative version based on call_rcu() could be + * created. But only if -really- needed -- there is no shortage of RCU API + * members. + */ +static inline void __list_splice_init_rcu(struct list_head *list, + struct list_head *prev, + struct list_head *next, + void (*sync)(void)) +{ + struct list_head *first = list->next; + struct list_head *last = list->prev; + + /* + * "first" and "last" tracking list, so initialize it. RCU readers + * have access to this list, so we must use INIT_LIST_HEAD_RCU() + * instead of INIT_LIST_HEAD(). + */ + + INIT_LIST_HEAD_RCU(list); + + /* + * At this point, the list body still points to the source list. + * Wait for any readers to finish using the list before splicing + * the list body into the new list. Any new readers will see + * an empty list. + */ + + sync(); + + /* + * Readers are finished with the source list, so perform splice. + * The order is important if the new list is global and accessible + * to concurrent RCU readers. Note that RCU readers are not + * permitted to traverse the prev pointers without excluding + * this function. + */ + + last->next = next; + rcu_assign_pointer(list_next_rcu(prev), first); + first->prev = prev; + next->prev = last; +} + +/** + * list_splice_init_rcu - splice an RCU-protected list into an existing list, + * designed for stacks. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head, head->next, sync); +} + +/** + * list_splice_tail_init_rcu - splice an RCU-protected list into an existing + * list, designed for queues. + * @list: the RCU-protected list to splice + * @head: the place in the existing list to splice the first list into + * @sync: function to sync: synchronize_rcu(), synchronize_sched(), ... + */ +static inline void list_splice_tail_init_rcu(struct list_head *list, + struct list_head *head, + void (*sync)(void)) +{ + if (!list_empty(list)) + __list_splice_init_rcu(list, head->prev, head, sync); +} + +/** + * list_entry_rcu - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_entry_rcu(ptr, type, member) \ + container_of(lockless_dereference(ptr), type, member) + +/** + * Where are list_empty_rcu() and list_first_entry_rcu()? + * + * Implementing those functions following their counterparts list_empty() and + * list_first_entry() is not advisable because they lead to subtle race + * conditions as the following snippet shows: + * + * if (!list_empty_rcu(mylist)) { + * struct foo *bar = list_first_entry_rcu(mylist, struct foo, list_member); + * do_something(bar); + * } + * + * The list may not be empty when list_empty_rcu checks it, but it may be when + * list_first_entry_rcu rereads the ->next pointer. + * + * Rereading the ->next pointer is not a problem for list_empty() and + * list_first_entry() because they would be protected by a lock that blocks + * writers. + * + * See list_first_or_null_rcu for an alternative. + */ + +/** + * list_first_or_null_rcu - get the first element from a list + * @ptr: the list head to take the element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the list is empty, it returns NULL. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_first_or_null_rcu(ptr, type, member) \ +({ \ + struct list_head *__ptr = (ptr); \ + struct list_head *__next = READ_ONCE(__ptr->next); \ + likely(__ptr != __next) ? list_entry_rcu(__next, type, member) : NULL; \ +}) + +/** + * list_next_or_null_rcu - get the first element from a list + * @head: the head for the list. + * @ptr: the list head to take the next element from. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * Note that if the ptr is at the end of the list, NULL is returned. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). + */ +#define list_next_or_null_rcu(head, ptr, type, member) \ +({ \ + struct list_head *__head = (head); \ + struct list_head *__ptr = (ptr); \ + struct list_head *__next = READ_ONCE(__ptr->next); \ + likely(__next != __head) ? list_entry_rcu(__next, type, \ + member) : NULL; \ +}) + +/** + * list_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as list_add_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define list_for_each_entry_rcu(pos, head, member) \ + for (pos = list_entry_rcu((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) + +/** + * list_entry_lockless - get the struct for this entry + * @ptr: the &struct list_head pointer. + * @type: the type of the struct this is embedded in. + * @member: the name of the list_head within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_entry_lockless(ptr, type, member) \ + container_of((typeof(ptr))lockless_dereference(ptr), type, member) + +/** + * list_for_each_entry_lockless - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_struct within the struct. + * + * This primitive may safely run concurrently with the _rcu list-mutation + * primitives such as list_add_rcu(), but requires some implicit RCU + * read-side guarding. One example is running within a special + * exception-time environment where preemption is disabled and where + * lockdep cannot be invoked (in which case updaters must use RCU-sched, + * as in synchronize_sched(), call_rcu_sched(), and friends). Another + * example is when items are added to the list, but never deleted. + */ +#define list_for_each_entry_lockless(pos, head, member) \ + for (pos = list_entry_lockless((head)->next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_lockless(pos->member.next, typeof(*pos), member)) + +/** + * list_for_each_entry_continue_rcu - continue iteration over list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the list_head within the struct. + * + * Continue to iterate over list of given type, continuing after + * the current position. + */ +#define list_for_each_entry_continue_rcu(pos, head, member) \ + for (pos = list_entry_rcu(pos->member.next, typeof(*pos), member); \ + &pos->member != (head); \ + pos = list_entry_rcu(pos->member.next, typeof(*pos), member)) + +/** + * hlist_del_rcu - deletes entry from hash list without re-initialization + * @n: the element to delete from the hash list. + * + * Note: list_unhashed() on entry does not return true after this, + * the entry is in an undefined state. It is useful for RCU based + * lockfree traversal. + * + * In particular, it means that we can not poison the forward + * pointers that may still be used for walking the hash list. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry(). + */ +static inline void hlist_del_rcu(struct hlist_node *n) +{ + __hlist_del(n); + n->pprev = LIST_POISON2; +} + +/** + * hlist_replace_rcu - replace old entry by new one + * @old : the element to be replaced + * @new : the new element to insert + * + * The @old entry will be replaced with the @new entry atomically. + */ +static inline void hlist_replace_rcu(struct hlist_node *old, + struct hlist_node *new) +{ + struct hlist_node *next = old->next; + + new->next = next; + new->pprev = old->pprev; + rcu_assign_pointer(*(struct hlist_node __rcu **)new->pprev, new); + if (next) + new->next->pprev = &new->next; + old->pprev = LIST_POISON2; +} + +/* + * return the first or the next element in an RCU protected hlist + */ +#define hlist_first_rcu(head) (*((struct hlist_node __rcu **)(&(head)->first))) +#define hlist_next_rcu(node) (*((struct hlist_node __rcu **)(&(node)->next))) +#define hlist_pprev_rcu(node) (*((struct hlist_node __rcu **)((node)->pprev))) + +/** + * hlist_add_head_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_head_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *first = h->first; + + n->next = first; + n->pprev = &h->first; + rcu_assign_pointer(hlist_first_rcu(h), n); + if (first) + first->pprev = &n->next; +} + +/** + * hlist_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the specified hlist, + * while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_add_tail_rcu(struct hlist_node *n, + struct hlist_head *h) +{ + struct hlist_node *i, *last = NULL; + + for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_next_rcu(last), n); + } else { + hlist_add_head_rcu(n, h); + } +} + +/** + * hlist_add_before_rcu + * @n: the new element to add to the hash list. + * @next: the existing element to add the new element before. + * + * Description: + * Adds the specified element to the specified hlist + * before the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_before_rcu(struct hlist_node *n, + struct hlist_node *next) +{ + n->pprev = next->pprev; + n->next = next; + rcu_assign_pointer(hlist_pprev_rcu(n), n); + next->pprev = &n->next; +} + +/** + * hlist_add_behind_rcu + * @n: the new element to add to the hash list. + * @prev: the existing element to add the new element after. + * + * Description: + * Adds the specified element to the specified hlist + * after the specified node while permitting racing traversals. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_add_head_rcu() + * or hlist_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. + */ +static inline void hlist_add_behind_rcu(struct hlist_node *n, + struct hlist_node *prev) +{ + n->next = prev->next; + n->pprev = &prev->next; + rcu_assign_pointer(hlist_next_rcu(prev), n); + if (n->next) + n->next->pprev = &n->next; +} + +#define __hlist_for_each_rcu(pos, head) \ + for (pos = rcu_dereference(hlist_first_rcu(head)); \ + pos; \ + pos = rcu_dereference(hlist_next_rcu(pos))) + +/** + * hlist_for_each_entry_rcu - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_rcu_notrace - iterate over rcu list of given type (for tracing) + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + * + * This is the same as hlist_for_each_entry_rcu() except that it does + * not do any RCU debugging or tracing. + */ +#define hlist_for_each_entry_rcu_notrace(pos, head, member) \ + for (pos = hlist_entry_safe (rcu_dereference_raw_notrace(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw_notrace(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_rcu_bh - iterate over rcu list of given type + * @pos: the type * to use as a loop cursor. + * @head: the head for your list. + * @member: the name of the hlist_node within the struct. + * + * This list-traversal primitive may safely run concurrently with + * the _rcu list-mutation primitives such as hlist_add_head_rcu() + * as long as the traversal is guarded by rcu_read_lock(). + */ +#define hlist_for_each_entry_rcu_bh(pos, head, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_first_rcu(head)),\ + typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu(\ + &(pos)->member)), typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_continue_rcu - iterate over a hlist continuing after current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue_rcu(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_continue_rcu_bh - iterate over a hlist continuing after current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_continue_rcu_bh(pos, member) \ + for (pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe(rcu_dereference_bh(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) + +/** + * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point + * @pos: the type * to use as a loop cursor. + * @member: the name of the hlist_node within the struct. + */ +#define hlist_for_each_entry_from_rcu(pos, member) \ + for (; pos; \ + pos = hlist_entry_safe(rcu_dereference_raw(hlist_next_rcu( \ + &(pos)->member)), typeof(*(pos)), member)) + +#endif /* __KERNEL__ */ +#endif diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h new file mode 100644 index 0000000..c99d78a --- /dev/null +++ b/include/linux/rcupdate.h @@ -0,0 +1,16 @@ +#ifndef __TOOLS_LINUX_RCUPDATE_H +#define __TOOLS_LINUX_RCUPDATE_H + +#include <urcu.h> +#include <linux/compiler.h> + +#define rcu_dereference_check(p, c) rcu_dereference(p) +#define rcu_dereference_raw(p) rcu_dereference(p) +#define rcu_dereference_protected(p, c) rcu_dereference(p) +#define rcu_access_pointer(p) READ_ONCE(p) + +#define kfree_rcu(ptr, rcu_head) kfree(ptr) /* XXX */ + +#define RCU_INIT_POINTER(p, v) WRITE_ONCE(p, v) + +#endif /* __TOOLS_LINUX_RCUPDATE_H */ diff --git a/include/linux/reboot.h b/include/linux/reboot.h new file mode 100644 index 0000000..4c67a15 --- /dev/null +++ b/include/linux/reboot.h @@ -0,0 +1,74 @@ +#ifndef _LINUX_REBOOT_H +#define _LINUX_REBOOT_H + +#include <linux/notifier.h> + +#define SYS_DOWN 0x0001 /* Notify of system down */ +#define SYS_RESTART SYS_DOWN +#define SYS_HALT 0x0002 /* Notify of system halt */ +#define SYS_POWER_OFF 0x0003 /* Notify of system power off */ + +enum reboot_mode { + REBOOT_COLD = 0, + REBOOT_WARM, + REBOOT_HARD, + REBOOT_SOFT, + REBOOT_GPIO, +}; +extern enum reboot_mode reboot_mode; + +enum reboot_type { + BOOT_TRIPLE = 't', + BOOT_KBD = 'k', + BOOT_BIOS = 'b', + BOOT_ACPI = 'a', + BOOT_EFI = 'e', + BOOT_CF9_FORCE = 'p', + BOOT_CF9_SAFE = 'q', +}; +extern enum reboot_type reboot_type; + +extern int reboot_default; +extern int reboot_cpu; +extern int reboot_force; + + +static inline int register_reboot_notifier(struct notifier_block *n) { return 0; } +static inline int unregister_reboot_notifier(struct notifier_block *n) { return 0; } + +extern int register_restart_handler(struct notifier_block *); +extern int unregister_restart_handler(struct notifier_block *); +extern void do_kernel_restart(char *cmd); + +/* + * Architecture-specific implementations of sys_reboot commands. + */ + +extern void migrate_to_reboot_cpu(void); +extern void machine_restart(char *cmd); +extern void machine_halt(void); +extern void machine_power_off(void); + +extern void machine_shutdown(void); +struct pt_regs; +extern void machine_crash_shutdown(struct pt_regs *); + +/* + * Architecture independent implemenations of sys_reboot commands. + */ + +extern void kernel_restart_prepare(char *cmd); +extern void kernel_restart(char *cmd); +extern void kernel_halt(void); +extern void kernel_power_off(void); + +extern int C_A_D; /* for sysctl */ +void ctrl_alt_del(void); + +#define POWEROFF_CMD_PATH_LEN 256 +extern char poweroff_cmd[POWEROFF_CMD_PATH_LEN]; + +extern void orderly_poweroff(bool force); +extern void orderly_reboot(void); + +#endif /* _LINUX_REBOOT_H */ diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h new file mode 100644 index 0000000..e5b35ed --- /dev/null +++ b/include/linux/rhashtable.h @@ -0,0 +1,912 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _LINUX_RHASHTABLE_H +#define _LINUX_RHASHTABLE_H + +#include <linux/atomic.h> +#include <linux/cache.h> +#include <linux/compiler.h> +#include <linux/cpumask.h> +#include <linux/err.h> +#include <linux/errno.h> +#include <linux/jhash.h> +#include <linux/list_nulls.h> +#include <linux/workqueue.h> +#include <linux/mutex.h> +#include <linux/spinlock.h> +#include <linux/rcupdate.h> + +/* + * The end of the chain is marked with a special nulls marks which has + * the following format: + * + * +-------+-----------------------------------------------------+-+ + * | Base | Hash |1| + * +-------+-----------------------------------------------------+-+ + * + * Base (4 bits) : Reserved to distinguish between multiple tables. + * Specified via &struct rhashtable_params.nulls_base. + * Hash (27 bits): Full hash (unmasked) of first element added to bucket + * 1 (1 bit) : Nulls marker (always set) + * + * The remaining bits of the next pointer remain unused for now. + */ +#define RHT_BASE_BITS 4 +#define RHT_HASH_BITS 27 +#define RHT_BASE_SHIFT RHT_HASH_BITS + +/* Base bits plus 1 bit for nulls marker */ +#define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) + +struct rhash_head { + struct rhash_head __rcu *next; +}; + +/** + * struct bucket_table - Table of hash buckets + * @size: Number of hash buckets + * @rehash: Current bucket being rehashed + * @hash_rnd: Random seed to fold into hash + * @locks_mask: Mask to apply before accessing locks[] + * @locks: Array of spinlocks protecting individual buckets + * @walkers: List of active walkers + * @rcu: RCU structure for freeing the table + * @future_tbl: Table under construction during rehashing + * @buckets: size * hash buckets + */ +struct bucket_table { + unsigned int size; + unsigned int rehash; + u32 hash_rnd; + unsigned int locks_mask; + spinlock_t *locks; + struct list_head walkers; + struct rcu_head rcu; + + struct bucket_table __rcu *future_tbl; + + struct rhash_head __rcu *buckets[] ____cacheline_aligned_in_smp; +}; + +/** + * struct rhashtable_compare_arg - Key for the function rhashtable_compare + * @ht: Hash table + * @key: Key to compare against + */ +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed); +typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 len, u32 seed); +typedef int (*rht_obj_cmpfn_t)(struct rhashtable_compare_arg *arg, + const void *obj); + +struct rhashtable; + +/** + * struct rhashtable_params - Hash table construction parameters + * @nelem_hint: Hint on number of elements, should be 75% of desired size + * @key_len: Length of key + * @key_offset: Offset of key in struct to be hashed + * @head_offset: Offset of rhash_head in struct to be hashed + * @insecure_max_entries: Maximum number of entries (may be exceeded) + * @max_size: Maximum size while expanding + * @min_size: Minimum size while shrinking + * @nulls_base: Base value to generate nulls marker + * @insecure_elasticity: Set to true to disable chain length checks + * @automatic_shrinking: Enable automatic shrinking of tables + * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) + * @obj_hashfn: Function to hash object + * @obj_cmpfn: Function to compare key with object + */ +struct rhashtable_params { + size_t nelem_hint; + size_t key_len; + size_t key_offset; + size_t head_offset; + unsigned int insecure_max_entries; + unsigned int max_size; + unsigned int min_size; + u32 nulls_base; + bool insecure_elasticity; + bool automatic_shrinking; + size_t locks_mul; + rht_hashfn_t hashfn; + rht_obj_hashfn_t obj_hashfn; + rht_obj_cmpfn_t obj_cmpfn; +}; + +/** + * struct rhashtable - Hash table handle + * @tbl: Bucket table + * @nelems: Number of elements in table + * @key_len: Key length for hashfn + * @elasticity: Maximum chain length before rehash + * @p: Configuration parameters + * @run_work: Deferred worker to expand/shrink asynchronously + * @mutex: Mutex to protect current/future table swapping + * @lock: Spin lock to protect walker list + */ +struct rhashtable { + struct bucket_table __rcu *tbl; + atomic_t nelems; + unsigned int key_len; + unsigned int elasticity; + struct rhashtable_params p; + struct work_struct run_work; + struct mutex mutex; + spinlock_t lock; +}; + +/** + * struct rhashtable_walker - Hash table walker + * @list: List entry on list of walkers + * @tbl: The table that we were walking over + */ +struct rhashtable_walker { + struct list_head list; + struct bucket_table *tbl; +}; + +/** + * struct rhashtable_iter - Hash table iterator, fits into netlink cb + * @ht: Table to iterate through + * @p: Current pointer + * @walker: Associated rhashtable walker + * @slot: Current slot + * @skip: Number of entries to skip in slot + */ +struct rhashtable_iter { + struct rhashtable *ht; + struct rhash_head *p; + struct rhashtable_walker *walker; + unsigned int slot; + unsigned int skip; +}; + +static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash) +{ + return NULLS_MARKER(ht->p.nulls_base + hash); +} + +#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \ + ((ptr) = (typeof(ptr)) rht_marker(ht, hash)) + +static inline bool rht_is_a_nulls(const struct rhash_head *ptr) +{ + return ((unsigned long) ptr & 1); +} + +static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr) +{ + return ((unsigned long) ptr) >> 1; +} + +static inline void *rht_obj(const struct rhashtable *ht, + const struct rhash_head *he) +{ + return (char *)he - ht->p.head_offset; +} + +static inline unsigned int rht_bucket_index(const struct bucket_table *tbl, + unsigned int hash) +{ + return (hash >> RHT_HASH_RESERVED_SPACE) & (tbl->size - 1); +} + +static inline unsigned int rht_key_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const void *key, const struct rhashtable_params params) +{ + unsigned int hash; + + /* params must be equal to ht->p if it isn't constant. */ + if (!__builtin_constant_p(params.key_len)) + hash = ht->p.hashfn(key, ht->key_len, tbl->hash_rnd); + else if (params.key_len) { + unsigned int key_len = params.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else if (key_len & (sizeof(u32) - 1)) + hash = jhash(key, key_len, tbl->hash_rnd); + else + hash = jhash2(key, key_len / sizeof(u32), + tbl->hash_rnd); + } else { + unsigned int key_len = ht->p.key_len; + + if (params.hashfn) + hash = params.hashfn(key, key_len, tbl->hash_rnd); + else + hash = jhash(key, key_len, tbl->hash_rnd); + } + + return rht_bucket_index(tbl, hash); +} + +static inline unsigned int rht_head_hashfn( + struct rhashtable *ht, const struct bucket_table *tbl, + const struct rhash_head *he, const struct rhashtable_params params) +{ + const char *ptr = rht_obj(ht, he); + + return likely(params.obj_hashfn) ? + rht_bucket_index(tbl, params.obj_hashfn(ptr, params.key_len ?: + ht->p.key_len, + tbl->hash_rnd)) : + rht_key_hashfn(ht, tbl, ptr + params.key_offset, params); +} + +/** + * rht_grow_above_75 - returns true if nelems > 0.75 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_75(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Expand table when exceeding 75% load */ + return atomic_read(&ht->nelems) > (tbl->size / 4 * 3) && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_shrink_below_30(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + /* Shrink table beneath 30% load */ + return atomic_read(&ht->nelems) < (tbl->size * 3 / 10) && + tbl->size > ht->p.min_size; +} + +/** + * rht_grow_above_100 - returns true if nelems > table-size + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_100(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return atomic_read(&ht->nelems) > tbl->size && + (!ht->p.max_size || tbl->size < ht->p.max_size); +} + +/** + * rht_grow_above_max - returns true if table is above maximum + * @ht: hash table + * @tbl: current table + */ +static inline bool rht_grow_above_max(const struct rhashtable *ht, + const struct bucket_table *tbl) +{ + return ht->p.insecure_max_entries && + atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; +} + +/* The bucket lock is selected based on the hash and protects mutations + * on a group of hash buckets. + * + * A maximum of tbl->size/2 bucket locks is allocated. This ensures that + * a single lock always covers both buckets which may both contains + * entries which link to the same bucket of the old table during resizing. + * This allows to simplify the locking as locking the bucket in both + * tables during resize always guarantee protection. + * + * IMPORTANT: When holding the bucket lock of both the old and new table + * during expansions and shrinking, the old bucket lock must always be + * acquired first. + */ +static inline spinlock_t *rht_bucket_lock(const struct bucket_table *tbl, + unsigned int hash) +{ + return &tbl->locks[hash & tbl->locks_mask]; +} + +#ifdef CONFIG_PROVE_LOCKING +int lockdep_rht_mutex_is_held(struct rhashtable *ht); +int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash); +#else +static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht) +{ + return 1; +} + +static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, + u32 hash) +{ + return 1; +} +#endif /* CONFIG_PROVE_LOCKING */ + +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params); + +struct bucket_table *rhashtable_insert_slow(struct rhashtable *ht, + const void *key, + struct rhash_head *obj, + struct bucket_table *old_tbl); +int rhashtable_insert_rehash(struct rhashtable *ht, struct bucket_table *tbl); + +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter, + gfp_t gfp); +void rhashtable_walk_exit(struct rhashtable_iter *iter); +int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU); +void *rhashtable_walk_next(struct rhashtable_iter *iter); +void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU); + +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg); +void rhashtable_destroy(struct rhashtable *ht); + +#define rht_dereference(p, ht) \ + rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht)) + +#define rht_dereference_rcu(p, ht) \ + rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht)) + +#define rht_dereference_bucket(p, tbl, hash) \ + rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash)) + +#define rht_dereference_bucket_rcu(p, tbl, hash) \ + rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash)) + +#define rht_entry(tpos, pos, member) \ + ({ tpos = container_of(pos, typeof(*tpos), member); 1; }) + +/** + * rht_for_each_continue - continue iterating over hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + */ +#define rht_for_each_continue(pos, head, tbl, hash) \ + for (pos = rht_dereference_bucket(head, tbl, hash); \ + !rht_is_a_nulls(pos); \ + pos = rht_dereference_bucket((pos)->next, tbl, hash)) + +/** + * rht_for_each - iterate over hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + */ +#define rht_for_each(pos, tbl, hash) \ + rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash) + +/** + * rht_for_each_entry_continue - continue iterating over hash chain + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + */ +#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member) \ + for (pos = rht_dereference_bucket(head, tbl, hash); \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket((pos)->next, tbl, hash)) + +/** + * rht_for_each_entry - iterate over hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + */ +#define rht_for_each_entry(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash], \ + tbl, hash, member) + +/** + * rht_for_each_entry_safe - safely iterate over hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @next: the &struct rhash_head to use as next in loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive allows for the looped code to + * remove the loop cursor from the list. + */ +#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member) \ + for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL; \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = next, \ + next = !rht_is_a_nulls(pos) ? \ + rht_dereference_bucket(pos->next, tbl, hash) : NULL) + +/** + * rht_for_each_rcu_continue - continue iterating over rcu hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu_continue(pos, head, tbl, hash) \ + for (({barrier(); }), \ + pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + !rht_is_a_nulls(pos); \ + pos = rcu_dereference_raw(pos->next)) + +/** + * rht_for_each_rcu - iterate over rcu hash chain + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_rcu(pos, tbl, hash) \ + rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash) + +/** + * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @head: the previous &struct rhash_head to continue from + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \ + for (({barrier(); }), \ + pos = rht_dereference_bucket_rcu(head, tbl, hash); \ + (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member); \ + pos = rht_dereference_bucket_rcu(pos->next, tbl, hash)) + +/** + * rht_for_each_entry_rcu - iterate over rcu hash chain of given type + * @tpos: the type * to use as a loop cursor. + * @pos: the &struct rhash_head to use as a loop cursor. + * @tbl: the &struct bucket_table + * @hash: the hash value / bucket index + * @member: name of the &struct rhash_head within the hashable struct. + * + * This hash chain list-traversal primitive may safely run concurrently with + * the _rcu mutation primitives such as rhashtable_insert() as long as the + * traversal is guarded by rcu_read_lock(). + */ +#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member) \ + rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\ + tbl, hash, member) + +static inline int rhashtable_compare(struct rhashtable_compare_arg *arg, + const void *obj) +{ + struct rhashtable *ht = arg->ht; + const char *ptr = obj; + + return memcmp(ptr + ht->p.key_offset, arg->key, ht->p.key_len); +} + +/** + * rhashtable_lookup_fast - search hash table, inlined version + * @ht: hash table + * @key: the pointer to the key + * @params: hash table parameters + * + * Computes the hash value for the key and traverses the bucket chain looking + * for a entry with an identical key. The first matching entry is returned. + * + * Returns the first entry on which the compare function returned true. + */ +static inline void *rhashtable_lookup_fast( + struct rhashtable *ht, const void *key, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + const struct bucket_table *tbl; + struct rhash_head *he; + unsigned int hash; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); +restart: + hash = rht_key_hashfn(ht, tbl, key, params); + rht_for_each_rcu(he, tbl, hash) { + if (params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, he)) : + rhashtable_compare(&arg, rht_obj(ht, he))) + continue; + rcu_read_unlock(); + return rht_obj(ht, he); + } + + /* Ensure we see any new tables. */ + smp_rmb(); + + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(tbl)) + goto restart; + rcu_read_unlock(); + + return NULL; +} + +/* Internal function, please use rhashtable_insert_fast() instead */ +static inline int __rhashtable_insert_fast( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; + struct bucket_table *tbl, *new_tbl; + struct rhash_head *head; + spinlock_t *lock; + unsigned int elasticity; + unsigned int hash; + int err; + +restart: + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* All insertions must grab the oldest table containing + * the hashed bucket that is yet to be rehashed. + */ + for (;;) { + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + spin_lock_bh(lock); + + if (tbl->rehash <= hash) + break; + + spin_unlock_bh(lock); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } + + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (unlikely(new_tbl)) { + tbl = rhashtable_insert_slow(ht, key, obj, new_tbl); + if (!IS_ERR_OR_NULL(tbl)) + goto slow_path; + + err = PTR_ERR(tbl); + goto out; + } + + err = -E2BIG; + if (unlikely(rht_grow_above_max(ht, tbl))) + goto out; + + if (unlikely(rht_grow_above_100(ht, tbl))) { +slow_path: + spin_unlock_bh(lock); + err = rhashtable_insert_rehash(ht, tbl); + rcu_read_unlock(); + if (err) + return err; + + goto restart; + } + + err = -EEXIST; + elasticity = ht->elasticity; + rht_for_each(head, tbl, hash) { + if (key && + unlikely(!(params.obj_cmpfn ? + params.obj_cmpfn(&arg, rht_obj(ht, head)) : + rhashtable_compare(&arg, rht_obj(ht, head))))) + goto out; + if (!--elasticity) + goto slow_path; + } + + err = 0; + + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + if (rht_grow_above_75(ht, tbl)) + schedule_work(&ht->run_work); + +out: + spin_unlock_bh(lock); + rcu_read_unlock(); + + return err; +} + +/** + * rhashtable_insert_fast - insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + return __rhashtable_insert_fast(ht, NULL, obj, params); +} + +/** + * rhashtable_lookup_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +static inline int rhashtable_lookup_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, + params); +} + +/** + * rhashtable_lookup_insert_key - search and insert object to hash table + * with explicit key + * @ht: hash table + * @key: key + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + * + * Returns zero on success. + */ +static inline int rhashtable_lookup_insert_key( + struct rhashtable *ht, const void *key, struct rhash_head *obj, + const struct rhashtable_params params) +{ + BUG_ON(!ht->p.obj_hashfn || !key); + + return __rhashtable_insert_fast(ht, key, obj, params); +} + +/* Internal function, please use rhashtable_remove_fast() instead */ +static inline int __rhashtable_remove_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj, const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t * lock; + unsigned int hash; + int err = -ENOENT; + + hash = rht_head_hashfn(ht, tbl, obj, params); + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(*pprev, obj->next); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_remove_fast - remove object from hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Since the hash chain is single linked, the removal operation needs to + * walk the bucket chain upon removal. The removal operation is thus + * considerable slow if the hash table is not correctly sized. + * + * Will automatically shrink the table via rhashtable_expand() if the + * shrink_decision function specified at rhashtable_init() returns true. + * + * Returns zero on success, -ENOENT if the entry could not be found. + */ +static inline int rhashtable_remove_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_remove_fast(ht, tbl, obj, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + if (err) + goto out; + + atomic_dec(&ht->nelems); + if (unlikely(ht->p.automatic_shrinking && + rht_shrink_below_30(ht, tbl))) + schedule_work(&ht->run_work); + +out: + rcu_read_unlock(); + + return err; +} + +/* Internal function, please use rhashtable_replace_fast() instead */ +static inline int __rhashtable_replace_fast( + struct rhashtable *ht, struct bucket_table *tbl, + struct rhash_head *obj_old, struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct rhash_head __rcu **pprev; + struct rhash_head *he; + spinlock_t *lock; + unsigned int hash; + int err = -ENOENT; + + /* Minimally, the old and new objects must have same hash + * (which should mean identifiers are the same). + */ + hash = rht_head_hashfn(ht, tbl, obj_old, params); + if (hash != rht_head_hashfn(ht, tbl, obj_new, params)) + return -EINVAL; + + lock = rht_bucket_lock(tbl, hash); + + spin_lock_bh(lock); + + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { + if (he != obj_old) { + pprev = &he->next; + continue; + } + + rcu_assign_pointer(obj_new->next, obj_old->next); + rcu_assign_pointer(*pprev, obj_new); + err = 0; + break; + } + + spin_unlock_bh(lock); + + return err; +} + +/** + * rhashtable_replace_fast - replace an object in hash table + * @ht: hash table + * @obj_old: pointer to hash head inside object being replaced + * @obj_new: pointer to hash head inside object which is new + * @params: hash table parameters + * + * Replacing an object doesn't affect the number of elements in the hash table + * or bucket, so we don't need to worry about shrinking or expanding the + * table here. + * + * Returns zero on success, -ENOENT if the entry could not be found, + * -EINVAL if hash is not the same for the old and new objects. + */ +static inline int rhashtable_replace_fast( + struct rhashtable *ht, struct rhash_head *obj_old, + struct rhash_head *obj_new, + const struct rhashtable_params params) +{ + struct bucket_table *tbl; + int err; + + rcu_read_lock(); + + tbl = rht_dereference_rcu(ht->tbl, ht); + + /* Because we have already taken (and released) the bucket + * lock in old_tbl, if we find that future_tbl is not yet + * visible then that guarantees the entry to still be in + * the old tbl if it exists. + */ + while ((err = __rhashtable_replace_fast(ht, tbl, obj_old, + obj_new, params)) && + (tbl = rht_dereference_rcu(tbl->future_tbl, ht))) + ; + + rcu_read_unlock(); + + return err; +} + +#endif /* _LINUX_RHASHTABLE_H */ diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h new file mode 100644 index 0000000..9d70e6e --- /dev/null +++ b/include/linux/rwsem.h @@ -0,0 +1,28 @@ +#ifndef __TOOLS_LINUX_RWSEM_H +#define __TOOLS_LINUX_RWSEM_H + +#include <pthread.h> + +struct rw_semaphore { + pthread_rwlock_t lock; +}; + +#define __RWSEM_INITIALIZER(name) \ + { .lock = PTHREAD_RWLOCK_INITIALIZER } + +#define DECLARE_RWSEM(name) \ + struct rw_semaphore name = __RWSEM_INITIALIZER(name) + +static inline void init_rwsem(struct rw_semaphore *lock) +{ + pthread_rwlock_init(&lock->lock, NULL); +} + +#define down_read(l) pthread_rwlock_rdlock(&(l)->lock) +#define down_read_trylock(l) (!pthread_rwlock_tryrdlock(&(l)->lock)) +#define up_read(l) pthread_rwlock_unlock(&(l)->lock) + +#define down_write(l) pthread_rwlock_wrlock(&(l)->lock) +#define up_write(l) pthread_rwlock_unlock(&(l)->lock) + +#endif /* __TOOLS_LINUX_RWSEM_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h new file mode 100644 index 0000000..0316f50 --- /dev/null +++ b/include/linux/sched.h @@ -0,0 +1,144 @@ +#ifndef __TOOLS_LINUX_SCHED_H +#define __TOOLS_LINUX_SCHED_H + +#include <pthread.h> +#include <time.h> +#include <linux/atomic.h> +#include <linux/bug.h> +#include <linux/completion.h> +#include <linux/jiffies.h> +#include <linux/time64.h> + +#define TASK_RUNNING 0 +#define TASK_INTERRUPTIBLE 1 +#define TASK_UNINTERRUPTIBLE 2 +#define __TASK_STOPPED 4 +#define __TASK_TRACED 8 +/* in tsk->exit_state */ +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) +/* in tsk->state again */ +#define TASK_DEAD 64 +#define TASK_WAKEKILL 128 +#define TASK_WAKING 256 +#define TASK_PARKED 512 +#define TASK_NOLOAD 1024 +#define TASK_NEW 2048 +#define TASK_IDLE_WORKER 4096 +#define TASK_STATE_MAX 8192 + +/* Convenience macros for the sake of set_task_state */ +#define TASK_KILLABLE (TASK_WAKEKILL | TASK_UNINTERRUPTIBLE) +#define TASK_STOPPED (TASK_WAKEKILL | __TASK_STOPPED) +#define TASK_TRACED (TASK_WAKEKILL | __TASK_TRACED) + +#define TASK_IDLE (TASK_UNINTERRUPTIBLE | TASK_NOLOAD) + +/* Convenience macros for the sake of wake_up */ +#define TASK_NORMAL (TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE) +#define TASK_ALL (TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED) + +#define TASK_COMM_LEN 16 + +#define PF_EXITING 0x00000004 /* getting shut down */ +#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */ +#define PF_VCPU 0x00000010 /* I'm a virtual CPU */ +#define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */ +#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */ +#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */ +#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */ +#define PF_DUMPCORE 0x00000200 /* dumped core */ +#define PF_SIGNALED 0x00000400 /* killed by a signal */ +#define PF_MEMALLOC 0x00000800 /* Allocating memory */ +#define PF_NPROC_EXCEEDED 0x00001000 /* set_user noticed that RLIMIT_NPROC was exceeded */ +#define PF_USED_MATH 0x00002000 /* if unset the fpu must be initialized before use */ +#define PF_USED_ASYNC 0x00004000 /* used async_schedule*(), used by module init */ +#define PF_NOFREEZE 0x00008000 /* this thread should not be frozen */ +#define PF_FROZEN 0x00010000 /* frozen for system suspend */ +#define PF_FSTRANS 0x00020000 /* inside a filesystem transaction */ +#define PF_KSWAPD 0x00040000 /* I am kswapd */ +#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ +#define PF_KTHREAD 0x00200000 /* I am a kernel thread */ +#define PF_RANDOMIZE 0x00400000 /* randomize virtual address space */ +#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */ +#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ +#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ +#define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ +#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ + +struct task_struct { + pthread_t thread; + + int (*thread_fn)(void *); + void *thread_data; + + pthread_mutex_t lock; + pthread_cond_t wait; + + atomic_t usage; + volatile long state; + + /* kthread: */ + unsigned long kthread_flags; + struct completion exited; + + unsigned flags; + + bool on_cpu; + char comm[TASK_COMM_LEN]; + struct bio_list *bio_list; +}; + +extern __thread struct task_struct *current; + +#define __set_task_state(tsk, state_value) \ + do { (tsk)->state = (state_value); } while (0) +#define set_task_state(tsk, state_value) \ + smp_store_mb((tsk)->state, (state_value)) +#define __set_current_state(state_value) \ + do { current->state = (state_value); } while (0) +#define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + +#define get_task_struct(tsk) do { atomic_inc(&(tsk)->usage); } while(0) + +extern void __put_task_struct(struct task_struct *t); + +static inline void put_task_struct(struct task_struct *t) +{ + if (atomic_dec_and_test(&t->usage)) + __put_task_struct(t); +} + +#define cond_resched() +#define need_resched() 0 + +void schedule(void); + +#define MAX_SCHEDULE_TIMEOUT LONG_MAX +long schedule_timeout(long timeout); + +static inline void io_schedule(void) +{ + schedule(); +} + +static inline long io_schedule_timeout(long timeout) +{ + return schedule_timeout(timeout); +} + +int wake_up_process(struct task_struct *); + +static inline u64 ktime_get_seconds(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return ts.tv_sec; +} + +#endif /* __TOOLS_LINUX_SCHED_H */ diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h new file mode 100644 index 0000000..ef3040e --- /dev/null +++ b/include/linux/sched/rt.h @@ -0,0 +1,9 @@ +#ifndef _SCHED_RT_H +#define _SCHED_RT_H + +static inline int rt_task(struct task_struct *p) +{ + return 0; +} + +#endif /* _SCHED_RT_H */ diff --git a/include/linux/semaphore.h b/include/linux/semaphore.h new file mode 100644 index 0000000..aeba6eb --- /dev/null +++ b/include/linux/semaphore.h @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2008 Intel Corporation + * Author: Matthew Wilcox <willy@linux.intel.com> + * + * Distributed under the terms of the GNU GPL, version 2 + * + * Please see kernel/semaphore.c for documentation of these functions + */ +#ifndef __LINUX_SEMAPHORE_H +#define __LINUX_SEMAPHORE_H + +#include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/spinlock.h> + +/* Please don't access any members of this structure directly */ +struct semaphore { + raw_spinlock_t lock; + unsigned int count; + struct list_head wait_list; +}; + +#define __SEMAPHORE_INITIALIZER(name, n) \ +{ \ + .lock = __RAW_SPIN_LOCK_UNLOCKED((name).lock), \ + .count = n, \ + .wait_list = LIST_HEAD_INIT((name).wait_list), \ +} + +#define DEFINE_SEMAPHORE(name) \ + struct semaphore name = __SEMAPHORE_INITIALIZER(name, 1) + +static inline void sema_init(struct semaphore *sem, int val) +{ + static struct lock_class_key __key; + *sem = (struct semaphore) __SEMAPHORE_INITIALIZER(*sem, val); + lockdep_init_map(&sem->lock.dep_map, "semaphore->lock", &__key, 0); +} + +extern void down(struct semaphore *sem); +extern int __must_check down_interruptible(struct semaphore *sem); +extern int __must_check down_killable(struct semaphore *sem); +extern int __must_check down_trylock(struct semaphore *sem); +extern int __must_check down_timeout(struct semaphore *sem, long); +extern void up(struct semaphore *sem); + +#endif /* __LINUX_SEMAPHORE_H */ diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h new file mode 100644 index 0000000..7047838 --- /dev/null +++ b/include/linux/seq_file.h @@ -0,0 +1,25 @@ +#ifndef _LINUX_SEQ_FILE_H +#define _LINUX_SEQ_FILE_H + +#include <linux/types.h> +#include <linux/fs.h> + +struct seq_operations; +struct path; + +struct seq_file { + char *buf; + size_t size; + size_t from; + size_t count; + size_t pad_until; + loff_t index; + loff_t read_pos; + u64 version; + const struct seq_operations *op; + int poll_event; + const struct file *file; + void *private; +}; + +#endif diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h new file mode 100644 index 0000000..7a08137 --- /dev/null +++ b/include/linux/seqlock.h @@ -0,0 +1,567 @@ +#ifndef __LINUX_SEQLOCK_H +#define __LINUX_SEQLOCK_H +/* + * Reader/writer consistent mechanism without starving writers. This type of + * lock for data where the reader wants a consistent set of information + * and is willing to retry if the information changes. There are two types + * of readers: + * 1. Sequence readers which never block a writer but they may have to retry + * if a writer is in progress by detecting change in sequence number. + * Writers do not wait for a sequence reader. + * 2. Locking readers which will wait if a writer or another locking reader + * is in progress. A locking reader in progress will also block a writer + * from going forward. Unlike the regular rwlock, the read lock here is + * exclusive so that only one locking reader can get it. + * + * This is not as cache friendly as brlock. Also, this may not work well + * for data that contains pointers, because any writer could + * invalidate a pointer that a reader was following. + * + * Expected non-blocking reader usage: + * do { + * seq = read_seqbegin(&foo); + * ... + * } while (read_seqretry(&foo, seq)); + * + * + * On non-SMP the spin locks disappear but the writer still needs + * to increment the sequence variables because an interrupt routine could + * change the state of the data. + * + * Based on x86_64 vsyscall gettimeofday + * by Keith Owens and Andrea Arcangeli + */ + +#include <linux/spinlock.h> +#include <linux/lockdep.h> +#include <linux/compiler.h> + +/* + * Version using sequence counter only. + * This can be used when code has its own mutex protecting the + * updating starting before the write_seqcountbeqin() and ending + * after the write_seqcount_end(). + */ +typedef struct seqcount { + unsigned sequence; +} seqcount_t; + +static inline void __seqcount_init(seqcount_t *s, const char *name, + struct lock_class_key *key) +{ + s->sequence = 0; +} + +# define SEQCOUNT_DEP_MAP_INIT(lockname) +# define seqcount_init(s) __seqcount_init(s, NULL, NULL) +# define seqcount_lockdep_reader_access(x) + +#define SEQCNT_ZERO(lockname) { .sequence = 0, SEQCOUNT_DEP_MAP_INIT(lockname)} + + +/** + * __read_seqcount_begin - begin a seq-read critical section (without barrier) + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb() + * barrier. Callers should ensure that smp_rmb() or equivalent ordering is + * provided before actually loading any of the variables that are to be + * protected in this critical section. + * + * Use carefully, only in critical code, and comment how the barrier is + * provided. + */ +static inline unsigned __read_seqcount_begin(const seqcount_t *s) +{ + unsigned ret; + +repeat: + ret = READ_ONCE(s->sequence); + if (unlikely(ret & 1)) { + cpu_relax(); + goto repeat; + } + return ret; +} + +/** + * raw_read_seqcount - Read the raw seqcount + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * raw_read_seqcount opens a read critical section of the given + * seqcount without any lockdep checking and without checking or + * masking the LSB. Calling code is responsible for handling that. + */ +static inline unsigned raw_read_seqcount(const seqcount_t *s) +{ + unsigned ret = READ_ONCE(s->sequence); + smp_rmb(); + return ret; +} + +/** + * raw_read_seqcount_begin - start seq-read critical section w/o lockdep + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * raw_read_seqcount_begin opens a read critical section of the given + * seqcount, but without any lockdep checking. Validity of the critical + * section is tested by checking read_seqcount_retry function. + */ +static inline unsigned raw_read_seqcount_begin(const seqcount_t *s) +{ + unsigned ret = __read_seqcount_begin(s); + smp_rmb(); + return ret; +} + +/** + * read_seqcount_begin - begin a seq-read critical section + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * read_seqcount_begin opens a read critical section of the given seqcount. + * Validity of the critical section is tested by checking read_seqcount_retry + * function. + */ +static inline unsigned read_seqcount_begin(const seqcount_t *s) +{ + seqcount_lockdep_reader_access(s); + return raw_read_seqcount_begin(s); +} + +/** + * raw_seqcount_begin - begin a seq-read critical section + * @s: pointer to seqcount_t + * Returns: count to be passed to read_seqcount_retry + * + * raw_seqcount_begin opens a read critical section of the given seqcount. + * Validity of the critical section is tested by checking read_seqcount_retry + * function. + * + * Unlike read_seqcount_begin(), this function will not wait for the count + * to stabilize. If a writer is active when we begin, we will fail the + * read_seqcount_retry() instead of stabilizing at the beginning of the + * critical section. + */ +static inline unsigned raw_seqcount_begin(const seqcount_t *s) +{ + unsigned ret = READ_ONCE(s->sequence); + smp_rmb(); + return ret & ~1; +} + +/** + * __read_seqcount_retry - end a seq-read critical section (without barrier) + * @s: pointer to seqcount_t + * @start: count, from read_seqcount_begin + * Returns: 1 if retry is required, else 0 + * + * __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb() + * barrier. Callers should ensure that smp_rmb() or equivalent ordering is + * provided before actually loading any of the variables that are to be + * protected in this critical section. + * + * Use carefully, only in critical code, and comment how the barrier is + * provided. + */ +static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start) +{ + return unlikely(s->sequence != start); +} + +/** + * read_seqcount_retry - end a seq-read critical section + * @s: pointer to seqcount_t + * @start: count, from read_seqcount_begin + * Returns: 1 if retry is required, else 0 + * + * read_seqcount_retry closes a read critical section of the given seqcount. + * If the critical section was invalid, it must be ignored (and typically + * retried). + */ +static inline int read_seqcount_retry(const seqcount_t *s, unsigned start) +{ + smp_rmb(); + return __read_seqcount_retry(s, start); +} + + + +static inline void raw_write_seqcount_begin(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); +} + +static inline void raw_write_seqcount_end(seqcount_t *s) +{ + smp_wmb(); + s->sequence++; +} + +/** + * raw_write_seqcount_barrier - do a seq write barrier + * @s: pointer to seqcount_t + * + * This can be used to provide an ordering guarantee instead of the + * usual consistency guarantee. It is one wmb cheaper, because we can + * collapse the two back-to-back wmb()s. + * + * seqcount_t seq; + * bool X = true, Y = false; + * + * void read(void) + * { + * bool x, y; + * + * do { + * int s = read_seqcount_begin(&seq); + * + * x = X; y = Y; + * + * } while (read_seqcount_retry(&seq, s)); + * + * BUG_ON(!x && !y); + * } + * + * void write(void) + * { + * Y = true; + * + * raw_write_seqcount_barrier(seq); + * + * X = false; + * } + */ +static inline void raw_write_seqcount_barrier(seqcount_t *s) +{ + s->sequence++; + smp_wmb(); + s->sequence++; +} + +static inline int raw_read_seqcount_latch(seqcount_t *s) +{ + int seq = READ_ONCE(s->sequence); + /* Pairs with the first smp_wmb() in raw_write_seqcount_latch() */ + smp_read_barrier_depends(); + return seq; +} + +/** + * raw_write_seqcount_latch - redirect readers to even/odd copy + * @s: pointer to seqcount_t + * + * The latch technique is a multiversion concurrency control method that allows + * queries during non-atomic modifications. If you can guarantee queries never + * interrupt the modification -- e.g. the concurrency is strictly between CPUs + * -- you most likely do not need this. + * + * Where the traditional RCU/lockless data structures rely on atomic + * modifications to ensure queries observe either the old or the new state the + * latch allows the same for non-atomic updates. The trade-off is doubling the + * cost of storage; we have to maintain two copies of the entire data + * structure. + * + * Very simply put: we first modify one copy and then the other. This ensures + * there is always one copy in a stable state, ready to give us an answer. + * + * The basic form is a data structure like: + * + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; + * + * Where a modification, which is assumed to be externally serialized, does the + * following: + * + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); <- Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[0], ...); + * + * smp_wmb(); <- Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[1], ...); + * } + * + * The query will have a form like: + * + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; + * + * do { + * seq = raw_read_seqcount_latch(&latch->seq); + * + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); + * + * smp_rmb(); + * } while (seq != latch->seq); + * + * return entry; + * } + * + * So during the modification, queries are first redirected to data[1]. Then we + * modify data[0]. When that is complete, we redirect queries back to data[0] + * and we can modify data[1]. + * + * NOTE: The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. + */ +static inline void raw_write_seqcount_latch(seqcount_t *s) +{ + smp_wmb(); /* prior stores before incrementing "sequence" */ + s->sequence++; + smp_wmb(); /* increment "sequence" before following stores */ +} + +/* + * Sequence counter only version assumes that callers are using their + * own mutexing. + */ +static inline void write_seqcount_begin_nested(seqcount_t *s, int subclass) +{ + raw_write_seqcount_begin(s); +} + +static inline void write_seqcount_begin(seqcount_t *s) +{ + write_seqcount_begin_nested(s, 0); +} + +static inline void write_seqcount_end(seqcount_t *s) +{ + raw_write_seqcount_end(s); +} + +/** + * write_seqcount_invalidate - invalidate in-progress read-side seq operations + * @s: pointer to seqcount_t + * + * After write_seqcount_invalidate, no read-side seq operations will complete + * successfully and see data older than this. + */ +static inline void write_seqcount_invalidate(seqcount_t *s) +{ + smp_wmb(); + s->sequence+=2; +} + +typedef struct { + struct seqcount seqcount; + spinlock_t lock; +} seqlock_t; + +/* + * These macros triggered gcc-3.x compile-time problems. We think these are + * OK now. Be cautious. + */ +#define __SEQLOCK_UNLOCKED(lockname) \ + { \ + .seqcount = SEQCNT_ZERO(lockname), \ + .lock = __SPIN_LOCK_UNLOCKED(lockname) \ + } + +#define seqlock_init(x) \ + do { \ + seqcount_init(&(x)->seqcount); \ + spin_lock_init(&(x)->lock); \ + } while (0) + +#define DEFINE_SEQLOCK(x) \ + seqlock_t x = __SEQLOCK_UNLOCKED(x) + +/* + * Read side functions for starting and finalizing a read side section. + */ +static inline unsigned read_seqbegin(const seqlock_t *sl) +{ + return read_seqcount_begin(&sl->seqcount); +} + +static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) +{ + return read_seqcount_retry(&sl->seqcount, start); +} + +/* + * Lock out other writers and update the count. + * Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void write_seqlock(seqlock_t *sl) +{ + spin_lock(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock(&sl->lock); +} + +static inline void write_seqlock_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_bh(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_bh(&sl->lock); +} + +static inline void write_seqlock_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); + write_seqcount_begin(&sl->seqcount); +} + +static inline void write_sequnlock_irq(seqlock_t *sl) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + write_seqcount_begin(&sl->seqcount); + return flags; +} + +#define write_seqlock_irqsave(lock, flags) \ + do { flags = __write_seqlock_irqsave(lock); } while (0) + +static inline void +write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags) +{ + write_seqcount_end(&sl->seqcount); + spin_unlock_irqrestore(&sl->lock, flags); +} + +/* + * A locking reader exclusively locks out other writers and locking readers, + * but doesn't update the sequence number. Acts like a normal spin_lock/unlock. + * Don't need preempt_disable() because that is in the spin_lock already. + */ +static inline void read_seqlock_excl(seqlock_t *sl) +{ + spin_lock(&sl->lock); +} + +static inline void read_sequnlock_excl(seqlock_t *sl) +{ + spin_unlock(&sl->lock); +} + +/** + * read_seqbegin_or_lock - begin a sequence number check or locking block + * @lock: sequence lock + * @seq : sequence number to be checked + * + * First try it once optimistically without taking the lock. If that fails, + * take the lock. The sequence number is also used as a marker for deciding + * whether to be a reader (even) or writer (odd). + * N.B. seq must be initialized to an even number to begin with. + */ +static inline void read_seqbegin_or_lock(seqlock_t *lock, int *seq) +{ + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl(lock); +} + +static inline int need_seqretry(seqlock_t *lock, int seq) +{ + return !(seq & 1) && read_seqretry(lock, seq); +} + +static inline void done_seqretry(seqlock_t *lock, int seq) +{ + if (seq & 1) + read_sequnlock_excl(lock); +} + +static inline void read_seqlock_excl_bh(seqlock_t *sl) +{ + spin_lock_bh(&sl->lock); +} + +static inline void read_sequnlock_excl_bh(seqlock_t *sl) +{ + spin_unlock_bh(&sl->lock); +} + +static inline void read_seqlock_excl_irq(seqlock_t *sl) +{ + spin_lock_irq(&sl->lock); +} + +static inline void read_sequnlock_excl_irq(seqlock_t *sl) +{ + spin_unlock_irq(&sl->lock); +} + +static inline unsigned long __read_seqlock_excl_irqsave(seqlock_t *sl) +{ + unsigned long flags; + + spin_lock_irqsave(&sl->lock, flags); + return flags; +} + +#define read_seqlock_excl_irqsave(lock, flags) \ + do { flags = __read_seqlock_excl_irqsave(lock); } while (0) + +static inline void +read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags) +{ + spin_unlock_irqrestore(&sl->lock, flags); +} + +static inline unsigned long +read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq) +{ + unsigned long flags = 0; + + if (!(*seq & 1)) /* Even */ + *seq = read_seqbegin(lock); + else /* Odd */ + read_seqlock_excl_irqsave(lock, flags); + + return flags; +} + +static inline void +done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags) +{ + if (seq & 1) + read_sequnlock_excl_irqrestore(lock, flags); +} +#endif /* __LINUX_SEQLOCK_H */ diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h new file mode 100644 index 0000000..baa36b1 --- /dev/null +++ b/include/linux/shrinker.h @@ -0,0 +1,25 @@ +#ifndef __TOOLS_LINUX_SHRINKER_H +#define __TOOLS_LINUX_SHRINKER_H + +struct shrink_control { + gfp_t gfp_mask; + unsigned long nr_to_scan; +}; + +#define SHRINK_STOP (~0UL) + +struct shrinker { + unsigned long (*count_objects)(struct shrinker *, + struct shrink_control *sc); + unsigned long (*scan_objects)(struct shrinker *, + struct shrink_control *sc); + + int seeks; /* seeks to recreate an obj */ + long batch; /* reclaim batch size, 0 = default */ + struct list_head list; +}; + +static inline int register_shrinker(struct shrinker *shrinker) { return 0; } +static inline void unregister_shrinker(struct shrinker *shrinker) {} + +#endif /* __TOOLS_LINUX_SHRINKER_H */ diff --git a/include/linux/slab.h b/include/linux/slab.h new file mode 100644 index 0000000..58fb73e --- /dev/null +++ b/include/linux/slab.h @@ -0,0 +1,106 @@ +#ifndef __TOOLS_LINUX_SLAB_H +#define __TOOLS_LINUX_SLAB_H + +#include <malloc.h> +#include <stdlib.h> +#include <string.h> + +#include <linux/kernel.h> +#include <linux/page.h> +#include <linux/types.h> + +#define ARCH_KMALLOC_MINALIGN 16 +#define KMALLOC_MAX_SIZE SIZE_MAX + +static inline void *kmalloc(size_t size, gfp_t flags) +{ + void *p = malloc(size); + + if (p && (flags & __GFP_ZERO)) + memset(p, 0, size); + + return p; +} + +static inline void *krealloc(void *old, size_t size, gfp_t flags) +{ + void *new = kmalloc(size, flags); + + if (new && (flags & __GFP_ZERO)) + memset(new, 0, size); + + if (new) { + memcpy(new, old, + min(malloc_usable_size(old), + malloc_usable_size(new))); + free(old); + } + + return new; +} + +#define kzalloc(size, flags) calloc(1, size) +#define kcalloc(n, size, flags) calloc(n, size) +#define kmalloc_array(n, size, flags) calloc(n, size) + +#define vmalloc(size) malloc(size) +#define vzalloc(size) calloc(1, size) + +#define kfree(p) free(p) +#define kvfree(p) free(p) +#define kzfree(p) free(p) + +static inline struct page *alloc_pages(gfp_t flags, unsigned int order) +{ + size_t size = PAGE_SIZE << order; + void *p = memalign(PAGE_SIZE, size); + + if (p && (flags & __GFP_ZERO)) + memset(p, 0, size); + + return p; +} + +#define alloc_page(gfp) alloc_pages(gfp, 0) + +#define __get_free_pages(gfp, order) ((unsigned long) alloc_pages(gfp, order)) +#define __get_free_page(gfp) __get_free_pages(gfp, 0) + +#define __free_pages(page, order) \ +do { \ + (void) order; \ + free(page); \ +} while (0) + +#define free_pages(addr, order) \ +do { \ + (void) order; \ + free((void *) (addr)); \ +} while (0) + +#define __free_page(page) __free_pages((page), 0) +#define free_page(addr) free_pages((addr), 0) + +#define VM_IOREMAP 0x00000001 /* ioremap() and friends */ +#define VM_ALLOC 0x00000002 /* vmalloc() */ +#define VM_MAP 0x00000004 /* vmap()ed pages */ +#define VM_USERMAP 0x00000008 /* suitable for remap_vmalloc_range */ +#define VM_UNINITIALIZED 0x00000020 /* vm_struct is not fully initialized */ +#define VM_NO_GUARD 0x00000040 /* don't add guard page */ +#define VM_KASAN 0x00000080 /* has allocated kasan shadow memory */ + +#define PAGE_KERNEL 0 + +static inline void vunmap(const void *addr) {} + +static inline void *vmap(struct page **pages, unsigned int count, + unsigned long flags, unsigned prot) +{ + return page_address(pages[0]); +} + +#define is_vmalloc_addr(page) 0 + +#define vmalloc_to_page(addr) ((struct page *) (addr)) + +#endif /* __TOOLS_LINUX_SLAB_H */ diff --git a/include/linux/sort.h b/include/linux/sort.h new file mode 100644 index 0000000..d534da2 --- /dev/null +++ b/include/linux/sort.h @@ -0,0 +1,10 @@ +#ifndef _LINUX_SORT_H +#define _LINUX_SORT_H + +#include <linux/types.h> + +void sort(void *base, size_t num, size_t size, + int (*cmp)(const void *, const void *), + void (*swap)(void *, void *, int)); + +#endif diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h new file mode 100644 index 0000000..0fa79a3 --- /dev/null +++ b/include/linux/spinlock.h @@ -0,0 +1,60 @@ +#ifndef __TOOLS_LINUX_SPINLOCK_H +#define __TOOLS_LINUX_SPINLOCK_H + +#include <linux/atomic.h> + +typedef struct { + int count; +} raw_spinlock_t; + +#define __RAW_SPIN_LOCK_UNLOCKED(name) (raw_spinlock_t) { .count = 0 } + +static inline void raw_spin_lock_init(raw_spinlock_t *lock) +{ + smp_store_release(&lock->count, 0); +} + +static inline void raw_spin_lock(raw_spinlock_t *lock) +{ + while (xchg_acquire(&lock->count, 1)) + ; +} + +static inline void raw_spin_unlock(raw_spinlock_t *lock) +{ + smp_store_release(&lock->count, 0); +} + +#define raw_spin_lock_irq(lock) raw_spin_lock(lock) +#define raw_spin_unlock_irq(lock) raw_spin_unlock(lock) + +#define raw_spin_lock_irqsave(lock, flags) \ +do { \ + (void) flags; \ + raw_spin_lock(lock); \ +} while (0) + +#define raw_spin_unlock_irqrestore(lock, flags) raw_spin_unlock(lock) + +typedef raw_spinlock_t spinlock_t; + +#define __SPIN_LOCK_UNLOCKED(name) __RAW_SPIN_LOCK_UNLOCKED(name) + +#define DEFINE_SPINLOCK(x) spinlock_t x = __SPIN_LOCK_UNLOCKED(x) + +#define spin_lock_init(lock) raw_spin_lock_init(lock) +#define spin_lock(lock) raw_spin_lock(lock) +#define spin_unlock(lock) raw_spin_unlock(lock) + +#define spin_lock_nested(lock, n) spin_lock(lock) + +#define spin_lock_bh(lock) raw_spin_lock(lock) +#define spin_unlock_bh(lock) raw_spin_unlock(lock) + +#define spin_lock_irq(lock) raw_spin_lock(lock) +#define spin_unlock_irq(lock) raw_spin_unlock(lock) + +#define spin_lock_irqsave(lock, flags) raw_spin_lock_irqsave(lock, flags) +#define spin_unlock_irqrestore(lock, flags) raw_spin_unlock_irqrestore(lock, flags) + +#endif /* __TOOLS_LINUX_SPINLOCK_H */ diff --git a/include/linux/stat.h b/include/linux/stat.h new file mode 100644 index 0000000..d5ded25 --- /dev/null +++ b/include/linux/stat.h @@ -0,0 +1,15 @@ +#ifndef _LINUX_STAT_H +#define _LINUX_STAT_H + +#include <sys/stat.h> + +#define S_IRWXUGO (S_IRWXU|S_IRWXG|S_IRWXO) +#define S_IALLUGO (S_ISUID|S_ISGID|S_ISVTX|S_IRWXUGO) +#define S_IRUGO (S_IRUSR|S_IRGRP|S_IROTH) +#define S_IWUGO (S_IWUSR|S_IWGRP|S_IWOTH) +#define S_IXUGO (S_IXUSR|S_IXGRP|S_IXOTH) + +#define UTIME_NOW ((1l << 30) - 1l) +#define UTIME_OMIT ((1l << 30) - 2l) + +#endif diff --git a/include/linux/string.h b/include/linux/string.h new file mode 100644 index 0000000..abc191e --- /dev/null +++ b/include/linux/string.h @@ -0,0 +1,15 @@ +#ifndef _TOOLS_LINUX_STRING_H_ +#define _TOOLS_LINUX_STRING_H_ + +#include <stdlib.h> +#include <string.h> +#include <linux/types.h> /* for size_t */ + +extern size_t strlcpy(char *dest, const char *src, size_t size); +extern char *skip_spaces(const char *); +extern char *strim(char *); +extern void memzero_explicit(void *, size_t); + +#define kstrndup(s, n, gfp) strndup(s, n) + +#endif /* _LINUX_STRING_H_ */ diff --git a/include/linux/stringify.h b/include/linux/stringify.h new file mode 100644 index 0000000..841cec8 --- /dev/null +++ b/include/linux/stringify.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_STRINGIFY_H +#define __LINUX_STRINGIFY_H + +/* Indirect stringification. Doing two levels allows the parameter to be a + * macro itself. For example, compile with -DFOO=bar, __stringify(FOO) + * converts to "bar". + */ + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#endif /* !__LINUX_STRINGIFY_H */ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h new file mode 100644 index 0000000..0be85b7 --- /dev/null +++ b/include/linux/sysfs.h @@ -0,0 +1,36 @@ +#ifndef _SYSFS_H_ +#define _SYSFS_H_ + +#include <linux/compiler.h> +#include <linux/stringify.h> + +struct kobject; + +struct attribute { + const char *name; + umode_t mode; +}; + +#define __ATTR(_name, _mode, _show, _store) { \ + .attr = {.name = __stringify(_name), .mode = _mode }, \ + .show = _show, \ + .store = _store, \ +} + +static inline int sysfs_create_files(struct kobject *kobj, + const struct attribute **attr) +{ + return 0; +} + +static inline int sysfs_create_link(struct kobject *kobj, + struct kobject *target, const char *name) +{ + return 0; +} + +static inline void sysfs_remove_link(struct kobject *kobj, const char *name) +{ +} + +#endif /* _SYSFS_H_ */ diff --git a/include/linux/time64.h b/include/linux/time64.h new file mode 100644 index 0000000..9d8a3ef --- /dev/null +++ b/include/linux/time64.h @@ -0,0 +1,194 @@ +#ifndef _LINUX_TIME64_H +#define _LINUX_TIME64_H + +#include <linux/types.h> + +typedef __s64 time64_t; + +/* + * This wants to go into uapi/linux/time.h once we agreed about the + * userspace interfaces. + */ +#if __BITS_PER_LONG == 64 +# define timespec64 timespec +#else +struct timespec64 { + time64_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; + +struct itimerspec64 { + struct timespec64 it_interval; + struct timespec64 it_value; +}; + +#endif + +/* Parameters used to convert the timespec values: */ +#define MSEC_PER_SEC 1000L +#define USEC_PER_MSEC 1000L +#define NSEC_PER_USEC 1000L +#define NSEC_PER_MSEC 1000000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000L +#define FSEC_PER_SEC 1000000000000000LL + +/* Located here for timespec[64]_valid_strict */ +#define TIME64_MAX ((s64)~((u64)1 << 63)) +#define KTIME_MAX ((s64)~((u64)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +#if __BITS_PER_LONG == 64 + +static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) +{ + return ts64; +} + +static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) +{ + return ts; +} + +# define timespec64_equal timespec_equal +# define timespec64_compare timespec_compare +# define set_normalized_timespec64 set_normalized_timespec +# define timespec64_add timespec_add +# define timespec64_sub timespec_sub +# define timespec64_valid timespec_valid +# define timespec64_valid_strict timespec_valid_strict +# define timespec64_to_ns timespec_to_ns +# define ns_to_timespec64 ns_to_timespec +# define timespec64_add_ns timespec_add_ns + +#else + +static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) +{ + struct timespec ret; + + ret.tv_sec = (time_t)ts64.tv_sec; + ret.tv_nsec = ts64.tv_nsec; + return ret; +} + +static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) +{ + struct timespec64 ret; + + ret.tv_sec = ts.tv_sec; + ret.tv_nsec = ts.tv_nsec; + return ret; +} + +static inline int timespec64_equal(const struct timespec64 *a, + const struct timespec64 *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} + +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int timespec64_compare(const struct timespec64 *lhs, const struct timespec64 *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +extern void set_normalized_timespec64(struct timespec64 *ts, time64_t sec, s64 nsec); + +static inline struct timespec64 timespec64_add(struct timespec64 lhs, + struct timespec64 rhs) +{ + struct timespec64 ts_delta; + set_normalized_timespec64(&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + +/* + * sub = lhs - rhs, in normalized form + */ +static inline struct timespec64 timespec64_sub(struct timespec64 lhs, + struct timespec64 rhs) +{ + struct timespec64 ts_delta; + set_normalized_timespec64(&ts_delta, lhs.tv_sec - rhs.tv_sec, + lhs.tv_nsec - rhs.tv_nsec); + return ts_delta; +} + +/* + * Returns true if the timespec64 is norm, false if denorm: + */ +static inline bool timespec64_valid(const struct timespec64 *ts) +{ + /* Dates before 1970 are bogus */ + if (ts->tv_sec < 0) + return false; + /* Can't have more nanoseconds then a second */ + if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec64_valid_strict(const struct timespec64 *ts) +{ + if (!timespec64_valid(ts)) + return false; + /* Disallow values that could overflow ktime_t */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return false; + return true; +} + +/** + * timespec64_to_ns - Convert timespec64 to nanoseconds + * @ts: pointer to the timespec64 variable to be converted + * + * Returns the scalar nanosecond representation of the timespec64 + * parameter. + */ +static inline s64 timespec64_to_ns(const struct timespec64 *ts) +{ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * ns_to_timespec64 - Convert nanoseconds to timespec64 + * @nsec: the nanoseconds value to be converted + * + * Returns the timespec64 representation of the nsec parameter. + */ +extern struct timespec64 ns_to_timespec64(const s64 nsec); + +/** + * timespec64_add_ns - Adds nanoseconds to a timespec64 + * @a: pointer to timespec64 to be incremented + * @ns: unsigned nanoseconds value to be added + * + * This must always be inlined because its used from the x86-64 vdso, + * which cannot call other kernel functions. + */ +static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) +{ + a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); + a->tv_nsec = ns; +} + +#endif + +/* + * timespec64_add_safe assumes both values are positive and checks for + * overflow. It will return TIME64_MAX in case of overflow. + */ +extern struct timespec64 timespec64_add_safe(const struct timespec64 lhs, + const struct timespec64 rhs); + +#endif /* _LINUX_TIME64_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h new file mode 100644 index 0000000..363f26a --- /dev/null +++ b/include/linux/timer.h @@ -0,0 +1,50 @@ +#ifndef __TOOLS_LINUX_TIMER_H +#define __TOOLS_LINUX_TIMER_H + +#include <string.h> +#include <linux/types.h> + +struct timer_list { + unsigned long expires; + void (*function)(unsigned long); + unsigned long data; + bool pending; +}; + +static inline void init_timer(struct timer_list *timer) +{ + memset(timer, 0, sizeof(*timer)); +} + +#define __init_timer(_timer, _flags) init_timer(_timer) + +#define __setup_timer(_timer, _fn, _data, _flags) \ + do { \ + __init_timer((_timer), (_flags)); \ + (_timer)->function = (_fn); \ + (_timer)->data = (_data); \ + } while (0) + +#define setup_timer(timer, fn, data) \ + __setup_timer((timer), (fn), (data), 0) + +static inline int timer_pending(const struct timer_list *timer) +{ + return timer->pending; +} + +int del_timer(struct timer_list * timer); +int del_timer_sync(struct timer_list *timer); + +int mod_timer(struct timer_list *timer, unsigned long expires); +//extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); + +static inline void add_timer(struct timer_list *timer) +{ + BUG_ON(timer_pending(timer)); + mod_timer(timer, timer->expires); +} + +void flush_timers(void); + +#endif /* __TOOLS_LINUX_TIMER_H */ diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h new file mode 100644 index 0000000..1686cb9 --- /dev/null +++ b/include/linux/tracepoint.h @@ -0,0 +1,62 @@ +#ifndef __TOOLS_LINUX_TRACEPOINT_H +#define __TOOLS_LINUX_TRACEPOINT_H + +#define PARAMS(args...) args + +#define TP_PROTO(args...) args +#define TP_ARGS(args...) args +#define TP_CONDITION(args...) args + +#define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ + static inline void trace_##name(proto) \ + { } \ + static inline void trace_##name##_rcuidle(proto) \ + { } \ + static inline int \ + register_trace_##name(void (*probe)(data_proto), \ + void *data) \ + { \ + return -ENOSYS; \ + } \ + static inline int \ + unregister_trace_##name(void (*probe)(data_proto), \ + void *data) \ + { \ + return -ENOSYS; \ + } \ + static inline void check_trace_callback_type_##name(void (*cb)(data_proto)) \ + { \ + } \ + static inline bool \ + trace_##name##_enabled(void) \ + { \ + return false; \ + } + +#define DEFINE_TRACE_FN(name, reg, unreg) +#define DEFINE_TRACE(name) +#define EXPORT_TRACEPOINT_SYMBOL_GPL(name) +#define EXPORT_TRACEPOINT_SYMBOL(name) + +#define DECLARE_TRACE_NOARGS(name) \ + __DECLARE_TRACE(name, void, , \ + cpu_online(raw_smp_processor_id()), \ + void *__data, __data) + +#define DECLARE_TRACE(name, proto, args) \ + __DECLARE_TRACE(name, PARAMS(proto), PARAMS(args), \ + cpu_online(raw_smp_processor_id()), \ + PARAMS(void *__data, proto), \ + PARAMS(__data, args)) + +#define DECLARE_EVENT_CLASS(name, proto, args, tstruct, assign, print) +#define DEFINE_EVENT(template, name, proto, args) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_FN(template, name, proto, args, reg, unreg)\ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) +#define TRACE_EVENT(name, proto, args, struct, assign, print) \ + DECLARE_TRACE(name, PARAMS(proto), PARAMS(args)) + +#endif /* __TOOLS_LINUX_TRACEPOINT_H */ diff --git a/include/linux/typecheck.h b/include/linux/typecheck.h new file mode 100644 index 0000000..eb5b74a --- /dev/null +++ b/include/linux/typecheck.h @@ -0,0 +1,24 @@ +#ifndef TYPECHECK_H_INCLUDED +#define TYPECHECK_H_INCLUDED + +/* + * Check at compile time that something is of a particular type. + * Always evaluates to 1 so you may use it easily in comparisons. + */ +#define typecheck(type,x) \ +({ type __dummy; \ + typeof(x) __dummy2; \ + (void)(&__dummy == &__dummy2); \ + 1; \ +}) + +/* + * Check at compile time that 'function' is a certain type, or is a pointer + * to that type (needs to use typedef for the function type.) + */ +#define typecheck_fn(type,function) \ +({ typeof(type) __tmp = function; \ + (void)__tmp; \ +}) + +#endif /* TYPECHECK_H_INCLUDED */ diff --git a/include/linux/types.h b/include/linux/types.h new file mode 100644 index 0000000..ddc8eca --- /dev/null +++ b/include/linux/types.h @@ -0,0 +1,98 @@ +#ifndef _TOOLS_LINUX_TYPES_H_ +#define _TOOLS_LINUX_TYPES_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <sys/types.h> + +#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */ +#include <asm/types.h> + +#define BITS_PER_LONG __BITS_PER_LONG + +struct page; +struct kmem_cache; + +typedef unsigned long pgoff_t; + +typedef unsigned short umode_t; + +typedef unsigned gfp_t; + +#define GFP_KERNEL 0 +#define GFP_ATOMIC 0 +#define GFP_NOFS 0 +#define GFP_NOIO 0 +#define GFP_NOWAIT 0 +#define __GFP_IO 0 +#define __GFP_NOWARN 0 +#define __GFP_NORETRY 0 +#define __GFP_ZERO 1 + +#define PAGE_ALLOC_COSTLY_ORDER 6 + +typedef __u64 u64; +typedef __s64 s64; +typedef __u32 u32; +typedef __s32 s32; +typedef __u16 u16; +typedef __s16 s16; +typedef __u8 u8; +typedef __s8 s8; + +#ifdef __CHECKER__ +#define __bitwise__ __attribute__((bitwise)) +#else +#define __bitwise__ +#endif +#ifdef __CHECK_ENDIAN__ +#define __bitwise __bitwise__ +#else +#define __bitwise +#endif + +#define __force +#define __user +#define __must_check +#define __cold + +typedef __u16 __bitwise __le16; +typedef __u16 __bitwise __be16; +typedef __u32 __bitwise __le32; +typedef __u32 __bitwise __be32; +typedef __u64 __bitwise __le64; +typedef __u64 __bitwise __be64; + +#ifndef __aligned_u64 +# define __aligned_u64 __u64 __attribute__((aligned(8))) +#endif + +typedef u64 sector_t; + +struct list_head { + struct list_head *next, *prev; +}; + +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +struct callback_head { + struct callback_head *next; + void (*func)(struct callback_head *head); +} __attribute__((aligned(sizeof(void *)))); + +#if 0 +#define rcu_head callback_head + +typedef void (*rcu_callback_t)(struct rcu_head *head); +typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func); +#endif + +#endif /* _TOOLS_LINUX_TYPES_H_ */ diff --git a/include/linux/unaligned/access_ok.h b/include/linux/unaligned/access_ok.h new file mode 100644 index 0000000..33383ca --- /dev/null +++ b/include/linux/unaligned/access_ok.h @@ -0,0 +1,67 @@ +#ifndef _LINUX_UNALIGNED_ACCESS_OK_H +#define _LINUX_UNALIGNED_ACCESS_OK_H + +#include <linux/kernel.h> +#include <asm/byteorder.h> + +static __always_inline u16 get_unaligned_le16(const void *p) +{ + return le16_to_cpup((__le16 *)p); +} + +static __always_inline u32 get_unaligned_le32(const void *p) +{ + return le32_to_cpup((__le32 *)p); +} + +static __always_inline u64 get_unaligned_le64(const void *p) +{ + return le64_to_cpup((__le64 *)p); +} + +static __always_inline u16 get_unaligned_be16(const void *p) +{ + return be16_to_cpup((__be16 *)p); +} + +static __always_inline u32 get_unaligned_be32(const void *p) +{ + return be32_to_cpup((__be32 *)p); +} + +static __always_inline u64 get_unaligned_be64(const void *p) +{ + return be64_to_cpup((__be64 *)p); +} + +static __always_inline void put_unaligned_le16(u16 val, void *p) +{ + *((__le16 *)p) = cpu_to_le16(val); +} + +static __always_inline void put_unaligned_le32(u32 val, void *p) +{ + *((__le32 *)p) = cpu_to_le32(val); +} + +static __always_inline void put_unaligned_le64(u64 val, void *p) +{ + *((__le64 *)p) = cpu_to_le64(val); +} + +static __always_inline void put_unaligned_be16(u16 val, void *p) +{ + *((__be16 *)p) = cpu_to_be16(val); +} + +static __always_inline void put_unaligned_be32(u32 val, void *p) +{ + *((__be32 *)p) = cpu_to_be32(val); +} + +static __always_inline void put_unaligned_be64(u64 val, void *p) +{ + *((__be64 *)p) = cpu_to_be64(val); +} + +#endif /* _LINUX_UNALIGNED_ACCESS_OK_H */ diff --git a/include/linux/unaligned/be_byteshift.h b/include/linux/unaligned/be_byteshift.h new file mode 100644 index 0000000..9356b24 --- /dev/null +++ b/include/linux/unaligned/be_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_BE_BYTESHIFT_H +#define _LINUX_UNALIGNED_BE_BYTESHIFT_H + +#include <linux/types.h> + +static inline u16 __get_unaligned_be16(const u8 *p) +{ + return p[0] << 8 | p[1]; +} + +static inline u32 __get_unaligned_be32(const u8 *p) +{ + return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3]; +} + +static inline u64 __get_unaligned_be64(const u8 *p) +{ + return (u64)__get_unaligned_be32(p) << 32 | + __get_unaligned_be32(p + 4); +} + +static inline void __put_unaligned_be16(u16 val, u8 *p) +{ + *p++ = val >> 8; + *p++ = val; +} + +static inline void __put_unaligned_be32(u32 val, u8 *p) +{ + __put_unaligned_be16(val >> 16, p); + __put_unaligned_be16(val, p + 2); +} + +static inline void __put_unaligned_be64(u64 val, u8 *p) +{ + __put_unaligned_be32(val >> 32, p); + __put_unaligned_be32(val, p + 4); +} + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_be16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_be32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_be64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_be16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_be32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_be64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/be_memmove.h b/include/linux/unaligned/be_memmove.h new file mode 100644 index 0000000..c2a76c5 --- /dev/null +++ b/include/linux/unaligned/be_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_MEMMOVE_H +#define _LINUX_UNALIGNED_BE_MEMMOVE_H + +#include <linux/unaligned/memmove.h> + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/be_struct.h b/include/linux/unaligned/be_struct.h new file mode 100644 index 0000000..1324158 --- /dev/null +++ b/include/linux/unaligned/be_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_BE_STRUCT_H +#define _LINUX_UNALIGNED_BE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_be16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_be64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_be16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_be32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_be64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_BE_STRUCT_H */ diff --git a/include/linux/unaligned/generic.h b/include/linux/unaligned/generic.h new file mode 100644 index 0000000..02d97ff --- /dev/null +++ b/include/linux/unaligned/generic.h @@ -0,0 +1,68 @@ +#ifndef _LINUX_UNALIGNED_GENERIC_H +#define _LINUX_UNALIGNED_GENERIC_H + +/* + * Cause a link-time error if we try an unaligned access other than + * 1,2,4 or 8 bytes long + */ +extern void __bad_unaligned_access_size(void); + +#define __get_unaligned_le(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_le16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_le32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_le64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __get_unaligned_be(ptr) ((__force typeof(*(ptr)))({ \ + __builtin_choose_expr(sizeof(*(ptr)) == 1, *(ptr), \ + __builtin_choose_expr(sizeof(*(ptr)) == 2, get_unaligned_be16((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 4, get_unaligned_be32((ptr)), \ + __builtin_choose_expr(sizeof(*(ptr)) == 8, get_unaligned_be64((ptr)), \ + __bad_unaligned_access_size())))); \ + })) + +#define __put_unaligned_le(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_le16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_le32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_le64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#define __put_unaligned_be(val, ptr) ({ \ + void *__gu_p = (ptr); \ + switch (sizeof(*(ptr))) { \ + case 1: \ + *(u8 *)__gu_p = (__force u8)(val); \ + break; \ + case 2: \ + put_unaligned_be16((__force u16)(val), __gu_p); \ + break; \ + case 4: \ + put_unaligned_be32((__force u32)(val), __gu_p); \ + break; \ + case 8: \ + put_unaligned_be64((__force u64)(val), __gu_p); \ + break; \ + default: \ + __bad_unaligned_access_size(); \ + break; \ + } \ + (void)0; }) + +#endif /* _LINUX_UNALIGNED_GENERIC_H */ diff --git a/include/linux/unaligned/le_byteshift.h b/include/linux/unaligned/le_byteshift.h new file mode 100644 index 0000000..be376fb --- /dev/null +++ b/include/linux/unaligned/le_byteshift.h @@ -0,0 +1,70 @@ +#ifndef _LINUX_UNALIGNED_LE_BYTESHIFT_H +#define _LINUX_UNALIGNED_LE_BYTESHIFT_H + +#include <linux/types.h> + +static inline u16 __get_unaligned_le16(const u8 *p) +{ + return p[0] | p[1] << 8; +} + +static inline u32 __get_unaligned_le32(const u8 *p) +{ + return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24; +} + +static inline u64 __get_unaligned_le64(const u8 *p) +{ + return (u64)__get_unaligned_le32(p + 4) << 32 | + __get_unaligned_le32(p); +} + +static inline void __put_unaligned_le16(u16 val, u8 *p) +{ + *p++ = val; + *p++ = val >> 8; +} + +static inline void __put_unaligned_le32(u32 val, u8 *p) +{ + __put_unaligned_le16(val >> 16, p + 2); + __put_unaligned_le16(val, p); +} + +static inline void __put_unaligned_le64(u64 val, u8 *p) +{ + __put_unaligned_le32(val >> 32, p + 4); + __put_unaligned_le32(val, p); +} + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_le16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_le32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_le64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_le16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_le32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_le64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_BYTESHIFT_H */ diff --git a/include/linux/unaligned/le_memmove.h b/include/linux/unaligned/le_memmove.h new file mode 100644 index 0000000..269849b --- /dev/null +++ b/include/linux/unaligned/le_memmove.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_MEMMOVE_H +#define _LINUX_UNALIGNED_LE_MEMMOVE_H + +#include <linux/unaligned/memmove.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_memmove16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_memmove32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_memmove64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_memmove16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_memmove32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_memmove64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_MEMMOVE_H */ diff --git a/include/linux/unaligned/le_struct.h b/include/linux/unaligned/le_struct.h new file mode 100644 index 0000000..088c457 --- /dev/null +++ b/include/linux/unaligned/le_struct.h @@ -0,0 +1,36 @@ +#ifndef _LINUX_UNALIGNED_LE_STRUCT_H +#define _LINUX_UNALIGNED_LE_STRUCT_H + +#include <linux/unaligned/packed_struct.h> + +static inline u16 get_unaligned_le16(const void *p) +{ + return __get_unaligned_cpu16((const u8 *)p); +} + +static inline u32 get_unaligned_le32(const void *p) +{ + return __get_unaligned_cpu32((const u8 *)p); +} + +static inline u64 get_unaligned_le64(const void *p) +{ + return __get_unaligned_cpu64((const u8 *)p); +} + +static inline void put_unaligned_le16(u16 val, void *p) +{ + __put_unaligned_cpu16(val, p); +} + +static inline void put_unaligned_le32(u32 val, void *p) +{ + __put_unaligned_cpu32(val, p); +} + +static inline void put_unaligned_le64(u64 val, void *p) +{ + __put_unaligned_cpu64(val, p); +} + +#endif /* _LINUX_UNALIGNED_LE_STRUCT_H */ diff --git a/include/linux/unaligned/memmove.h b/include/linux/unaligned/memmove.h new file mode 100644 index 0000000..eeb5a77 --- /dev/null +++ b/include/linux/unaligned/memmove.h @@ -0,0 +1,45 @@ +#ifndef _LINUX_UNALIGNED_MEMMOVE_H +#define _LINUX_UNALIGNED_MEMMOVE_H + +#include <linux/kernel.h> +#include <linux/string.h> + +/* Use memmove here, so gcc does not insert a __builtin_memcpy. */ + +static inline u16 __get_unaligned_memmove16(const void *p) +{ + u16 tmp; + memmove(&tmp, p, 2); + return tmp; +} + +static inline u32 __get_unaligned_memmove32(const void *p) +{ + u32 tmp; + memmove(&tmp, p, 4); + return tmp; +} + +static inline u64 __get_unaligned_memmove64(const void *p) +{ + u64 tmp; + memmove(&tmp, p, 8); + return tmp; +} + +static inline void __put_unaligned_memmove16(u16 val, void *p) +{ + memmove(p, &val, 2); +} + +static inline void __put_unaligned_memmove32(u32 val, void *p) +{ + memmove(p, &val, 4); +} + +static inline void __put_unaligned_memmove64(u64 val, void *p) +{ + memmove(p, &val, 8); +} + +#endif /* _LINUX_UNALIGNED_MEMMOVE_H */ diff --git a/include/linux/unaligned/packed_struct.h b/include/linux/unaligned/packed_struct.h new file mode 100644 index 0000000..c0d817d --- /dev/null +++ b/include/linux/unaligned/packed_struct.h @@ -0,0 +1,46 @@ +#ifndef _LINUX_UNALIGNED_PACKED_STRUCT_H +#define _LINUX_UNALIGNED_PACKED_STRUCT_H + +#include <linux/kernel.h> + +struct __una_u16 { u16 x; } __packed; +struct __una_u32 { u32 x; } __packed; +struct __una_u64 { u64 x; } __packed; + +static inline u16 __get_unaligned_cpu16(const void *p) +{ + const struct __una_u16 *ptr = (const struct __una_u16 *)p; + return ptr->x; +} + +static inline u32 __get_unaligned_cpu32(const void *p) +{ + const struct __una_u32 *ptr = (const struct __una_u32 *)p; + return ptr->x; +} + +static inline u64 __get_unaligned_cpu64(const void *p) +{ + const struct __una_u64 *ptr = (const struct __una_u64 *)p; + return ptr->x; +} + +static inline void __put_unaligned_cpu16(u16 val, void *p) +{ + struct __una_u16 *ptr = (struct __una_u16 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu32(u32 val, void *p) +{ + struct __una_u32 *ptr = (struct __una_u32 *)p; + ptr->x = val; +} + +static inline void __put_unaligned_cpu64(u64 val, void *p) +{ + struct __una_u64 *ptr = (struct __una_u64 *)p; + ptr->x = val; +} + +#endif /* _LINUX_UNALIGNED_PACKED_STRUCT_H */ diff --git a/include/linux/uuid.h b/include/linux/uuid.h new file mode 100644 index 0000000..b81992d --- /dev/null +++ b/include/linux/uuid.h @@ -0,0 +1,27 @@ +/* + * UUID/GUID definition + * + * Copyright (C) 2010, 2016 Intel Corp. + * Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation; + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#ifndef _LINUX_UUID_H_ +#define _LINUX_UUID_H_ + +#include <uapi/linux/uuid.h> +#include <string.h> + +static inline int uuid_le_cmp(const uuid_le u1, const uuid_le u2) +{ + return memcmp(&u1, &u2, sizeof(uuid_le)); +} + +#endif diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h new file mode 100644 index 0000000..eb6284d --- /dev/null +++ b/include/linux/vmalloc.h @@ -0,0 +1,8 @@ +#ifndef __TOOLS_LINUX_VMALLOC_H +#define __TOOLS_LINUX_VMALLOC_H + +#define vmalloc(size) malloc(size) +#define __vmalloc(size, flags, prot) malloc(size) +#define vfree(p) free(p) + +#endif /* __TOOLS_LINUX_VMALLOC_H */ diff --git a/include/linux/wait.h b/include/linux/wait.h new file mode 100644 index 0000000..77cba05 --- /dev/null +++ b/include/linux/wait.h @@ -0,0 +1,1235 @@ +#ifndef _LINUX_WAIT_H +#define _LINUX_WAIT_H + +#include <pthread.h> + +#include <linux/bitmap.h> +#include <linux/list.h> +#include <linux/lockdep.h> +#include <linux/spinlock.h> +//#include <uapi/linux/wait.h> + +typedef struct __wait_queue wait_queue_t; +typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key); +int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key); + +/* __wait_queue::flags */ +#define WQ_FLAG_EXCLUSIVE 0x01 +#define WQ_FLAG_WOKEN 0x02 + +struct __wait_queue { + unsigned int flags; + void *private; + wait_queue_func_t func; + struct list_head task_list; +}; + +struct wait_bit_key { + void *flags; + int bit_nr; +#define WAIT_ATOMIC_T_BIT_NR -1 + unsigned long timeout; +}; + +struct wait_bit_queue { + struct wait_bit_key key; + wait_queue_t wait; +}; + +struct __wait_queue_head { + spinlock_t lock; + struct list_head task_list; +}; +typedef struct __wait_queue_head wait_queue_head_t; + +struct task_struct; + +/* + * Macros for declaration and initialisaton of the datatypes + */ + +#define __WAITQUEUE_INITIALIZER(name, tsk) { \ + .private = tsk, \ + .func = default_wake_function, \ + .task_list = { NULL, NULL } } + +#define DECLARE_WAITQUEUE(name, tsk) \ + wait_queue_t name = __WAITQUEUE_INITIALIZER(name, tsk) + +#define __WAIT_QUEUE_HEAD_INITIALIZER(name) { \ + .lock = __SPIN_LOCK_UNLOCKED(name.lock), \ + .task_list = { &(name).task_list, &(name).task_list } } + +#define DECLARE_WAIT_QUEUE_HEAD(name) \ + wait_queue_head_t name = __WAIT_QUEUE_HEAD_INITIALIZER(name) + +#define __WAIT_BIT_KEY_INITIALIZER(word, bit) \ + { .flags = word, .bit_nr = bit, } + +#define __WAIT_ATOMIC_T_KEY_INITIALIZER(p) \ + { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } + +extern void __init_waitqueue_head(wait_queue_head_t *q, const char *name, struct lock_class_key *); + +#define init_waitqueue_head(q) \ + do { \ + static struct lock_class_key __key; \ + \ + __init_waitqueue_head((q), #q, &__key); \ + } while (0) + +#ifdef CONFIG_LOCKDEP +# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \ + ({ init_waitqueue_head(&name); name; }) +# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \ + wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) +#else +# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name) +#endif + +static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p) +{ + q->flags = 0; + q->private = p; + q->func = default_wake_function; +} + +static inline void +init_waitqueue_func_entry(wait_queue_t *q, wait_queue_func_t func) +{ + q->flags = 0; + q->private = NULL; + q->func = func; +} + +/** + * waitqueue_active -- locklessly test for waiters on the queue + * @q: the waitqueue to test for waiters + * + * returns true if the wait list is not empty + * + * NOTE: this function is lockless and requires care, incorrect usage _will_ + * lead to sporadic and non-obvious failure. + * + * Use either while holding wait_queue_head_t::lock or when used for wakeups + * with an extra smp_mb() like: + * + * CPU0 - waker CPU1 - waiter + * + * for (;;) { + * @cond = true; prepare_to_wait(&wq, &wait, state); + * smp_mb(); // smp_mb() from set_current_state() + * if (waitqueue_active(wq)) if (@cond) + * wake_up(wq); break; + * schedule(); + * } + * finish_wait(&wq, &wait); + * + * Because without the explicit smp_mb() it's possible for the + * waitqueue_active() load to get hoisted over the @cond store such that we'll + * observe an empty wait list while the waiter might not observe @cond. + * + * Also note that this 'optimization' trades a spin_lock() for an smp_mb(), + * which (when the lock is uncontended) are of roughly equal cost. + */ +static inline int waitqueue_active(wait_queue_head_t *q) +{ + return !list_empty(&q->task_list); +} + +/** + * wq_has_sleeper - check if there are any waiting processes + * @wq: wait queue head + * + * Returns true if wq has waiting processes + * + * Please refer to the comment for waitqueue_active. + */ +static inline bool wq_has_sleeper(wait_queue_head_t *wq) +{ + /* + * We need to be sure we are in sync with the + * add_wait_queue modifications to the wait queue. + * + * This memory barrier should be paired with one on the + * waiting side. + */ + smp_mb(); + return waitqueue_active(wq); +} + +extern void add_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); +extern void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait); +extern void remove_wait_queue(wait_queue_head_t *q, wait_queue_t *wait); + +static inline void __add_wait_queue(wait_queue_head_t *head, wait_queue_t *new) +{ + list_add(&new->task_list, &head->task_list); +} + +/* + * Used for wake-one threads: + */ +static inline void +__add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue(q, wait); +} + +static inline void __add_wait_queue_tail(wait_queue_head_t *head, + wait_queue_t *new) +{ + list_add_tail(&new->task_list, &head->task_list); +} + +static inline void +__add_wait_queue_tail_exclusive(wait_queue_head_t *q, wait_queue_t *wait) +{ + wait->flags |= WQ_FLAG_EXCLUSIVE; + __add_wait_queue_tail(q, wait); +} + +static inline void +__remove_wait_queue(wait_queue_head_t *head, wait_queue_t *old) +{ + list_del(&old->task_list); +} + +typedef int wait_bit_action_f(struct wait_bit_key *, int mode); +void __wake_up(wait_queue_head_t *q, unsigned int mode, int nr, void *key); +void __wake_up_locked_key(wait_queue_head_t *q, unsigned int mode, void *key); +void __wake_up_sync_key(wait_queue_head_t *q, unsigned int mode, int nr, void *key); +void __wake_up_locked(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_sync(wait_queue_head_t *q, unsigned int mode, int nr); +void __wake_up_bit(wait_queue_head_t *, void *, int); +int __wait_on_bit(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_action_f *, unsigned); +void wake_up_bit(void *, int); +void wake_up_atomic_t(atomic_t *); +int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned); +int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long); +int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned); +int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned); +wait_queue_head_t *bit_waitqueue(void *, int); + +#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL) +#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL) +#define wake_up_all(x) __wake_up(x, TASK_NORMAL, 0, NULL) +#define wake_up_locked(x) __wake_up_locked((x), TASK_NORMAL, 1) +#define wake_up_all_locked(x) __wake_up_locked((x), TASK_NORMAL, 0) + +#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL) +#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL) +#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL) +#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1) + +/* + * Wakeup macros to be used to report events to the targets. + */ +#define wake_up_poll(x, m) \ + __wake_up(x, TASK_NORMAL, 1, (void *) (m)) +#define wake_up_locked_poll(x, m) \ + __wake_up_locked_key((x), TASK_NORMAL, (void *) (m)) +#define wake_up_interruptible_poll(x, m) \ + __wake_up(x, TASK_INTERRUPTIBLE, 1, (void *) (m)) +#define wake_up_interruptible_sync_poll(x, m) \ + __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, (void *) (m)) + +#define ___wait_cond_timeout(condition) \ +({ \ + bool __cond = (condition); \ + if (__cond && !__ret) \ + __ret = 1; \ + __cond || !__ret; \ +}) + +#define ___wait_is_interruptible(state) \ + (!__builtin_constant_p(state) || \ + state == TASK_INTERRUPTIBLE || state == TASK_KILLABLE) \ + +/* + * The below macro ___wait_event() has an explicit shadow of the __ret + * variable when used from the wait_event_*() macros. + * + * This is so that both can use the ___wait_cond_timeout() construct + * to wrap the condition. + * + * The type inconsistency of the wait_event_*() __ret variable is also + * on purpose; we use long where we can return timeout values and int + * otherwise. + */ + +#define ___wait_event(wq, condition, state, exclusive, ret, cmd) \ +({ \ + __label__ __out; \ + wait_queue_t __wait; \ + long __ret = ret; /* explicit shadow */ \ + \ + INIT_LIST_HEAD(&__wait.task_list); \ + if (exclusive) \ + __wait.flags = WQ_FLAG_EXCLUSIVE; \ + else \ + __wait.flags = 0; \ + \ + for (;;) { \ + long __int = prepare_to_wait_event(&wq, &__wait, state);\ + \ + if (condition) \ + break; \ + \ + if (___wait_is_interruptible(state) && __int) { \ + __ret = __int; \ + if (exclusive) { \ + abort_exclusive_wait(&wq, &__wait, \ + state, NULL); \ + goto __out; \ + } \ + break; \ + } \ + \ + cmd; \ + } \ + finish_wait(&wq, &__wait); \ +__out: __ret; \ +}) + +#define __wait_event(wq, condition) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + schedule()) + +/** + * wait_event - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define wait_event(wq, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __wait_event(wq, condition); \ +} while (0) + +#define __io_wait_event(wq, condition) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + io_schedule()) + +/* + * io_wait_event() -- like wait_event() but with io_schedule() + */ +#define io_wait_event(wq, condition) \ +do { \ + might_sleep(); \ + if (condition) \ + break; \ + __io_wait_event(wq, condition); \ +} while (0) + +#define __wait_event_freezable(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + schedule(); try_to_freeze()) + +/** + * wait_event_freezable - sleep (or freeze) until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute + * to system load) until the @condition evaluates to true. The + * @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define wait_event_freezable(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable(wq, condition); \ + __ret; \ +}) + +#define __wait_event_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_UNINTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_timeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * or the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed. + */ +#define wait_event_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_timeout(wq, condition, timeout); \ + __ret; \ +}) + +#define __wait_event_freezable_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret); try_to_freeze()) + +/* + * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid + * increasing load and is freezable. + */ +#define wait_event_freezable_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_freezable_timeout(wq, condition, timeout); \ + __ret; \ +}) + +#define __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 1, 0, \ + cmd1; schedule(); cmd2) +/* + * Just like wait_event_cmd(), except it sets exclusive flag + */ +#define wait_event_exclusive_cmd(wq, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_exclusive_cmd(wq, condition, cmd1, cmd2); \ +} while (0) + +#define __wait_event_cmd(wq, condition, cmd1, cmd2) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + cmd1; schedule(); cmd2) + +/** + * wait_event_cmd - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @cmd1: the command will be executed before sleep + * @cmd2: the command will be executed after sleep + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + */ +#define wait_event_cmd(wq, condition, cmd1, cmd2) \ +do { \ + if (condition) \ + break; \ + __wait_event_cmd(wq, condition, cmd1, cmd2); \ +} while (0) + +#define __wait_event_interruptible(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + schedule()) + +/** + * wait_event_interruptible - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_interruptible(wq, condition); \ + __ret; \ +}) + +#define __wait_event_interruptible_timeout(wq, condition, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + __ret = schedule_timeout(__ret)) + +/** + * wait_event_interruptible_timeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * Returns: + * 0 if the @condition evaluated to %false after the @timeout elapsed, + * 1 if the @condition evaluated to %true after the @timeout elapsed, + * the remaining jiffies (at least 1) if the @condition evaluated + * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was + * interrupted by a signal. + */ +#define wait_event_interruptible_timeout(wq, condition, timeout) \ +({ \ + long __ret = timeout; \ + might_sleep(); \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_timeout(wq, \ + condition, timeout); \ + __ret; \ +}) + +#define __wait_event_hrtimeout(wq, condition, timeout, state) \ +({ \ + int __ret = 0; \ + struct hrtimer_sleeper __t; \ + \ + hrtimer_init_on_stack(&__t.timer, CLOCK_MONOTONIC, \ + HRTIMER_MODE_REL); \ + hrtimer_init_sleeper(&__t, current); \ + if ((timeout).tv64 != KTIME_MAX) \ + hrtimer_start_range_ns(&__t.timer, timeout, \ + current->timer_slack_ns, \ + HRTIMER_MODE_REL); \ + \ + __ret = ___wait_event(wq, condition, state, 0, 0, \ + if (!__t.task) { \ + __ret = -ETIME; \ + break; \ + } \ + schedule()); \ + \ + hrtimer_cancel(&__t.timer); \ + destroy_hrtimer_on_stack(&__t.timer); \ + __ret; \ +}) + +/** + * wait_event_hrtimeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, as a ktime_t + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if @condition became true, or -ETIME if the timeout + * elapsed. + */ +#define wait_event_hrtimeout(wq, condition, timeout) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq, condition, timeout, \ + TASK_UNINTERRUPTIBLE); \ + __ret; \ +}) + +/** + * wait_event_interruptible_hrtimeout - sleep until a condition gets true or a timeout elapses + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @timeout: timeout, as a ktime_t + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function returns 0 if @condition became true, -ERESTARTSYS if it was + * interrupted by a signal, or -ETIME if the timeout elapsed. + */ +#define wait_event_interruptible_hrtimeout(wq, condition, timeout) \ +({ \ + long __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_hrtimeout(wq, condition, timeout, \ + TASK_INTERRUPTIBLE); \ + __ret; \ +}) + +#define __wait_event_interruptible_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ + schedule()) + +#define wait_event_interruptible_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_interruptible_exclusive(wq, condition);\ + __ret; \ +}) + +#define __wait_event_killable_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_KILLABLE, 1, 0, \ + schedule()) + +#define wait_event_killable_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_killable_exclusive(wq, condition); \ + __ret; \ +}) + + +#define __wait_event_freezable_exclusive(wq, condition) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0, \ + schedule(); try_to_freeze()) + +#define wait_event_freezable_exclusive(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_freezable_exclusive(wq, condition);\ + __ret; \ +}) + + +#define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \ +({ \ + int __ret = 0; \ + DEFINE_WAIT(__wait); \ + if (exclusive) \ + __wait.flags |= WQ_FLAG_EXCLUSIVE; \ + do { \ + if (likely(list_empty(&__wait.task_list))) \ + __add_wait_queue_tail(&(wq), &__wait); \ + set_current_state(TASK_INTERRUPTIBLE); \ + if (signal_pending(current)) { \ + __ret = -ERESTARTSYS; \ + break; \ + } \ + if (irq) \ + spin_unlock_irq(&(wq).lock); \ + else \ + spin_unlock(&(wq).lock); \ + schedule(); \ + if (irq) \ + spin_lock_irq(&(wq).lock); \ + else \ + spin_lock(&(wq).lock); \ + } while (!(condition)); \ + __remove_wait_queue(&(wq), &__wait); \ + __set_current_state(TASK_RUNNING); \ + __ret; \ +}) + + +/** + * wait_event_interruptible_locked - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * It must be called with wq.lock being held. This spinlock is + * unlocked while sleeping but @condition testing is done while lock + * is held and when this macro exits the lock is held. + * + * The lock is locked/unlocked using spin_lock()/spin_unlock() + * functions which must match the way they are locked/unlocked outside + * of this macro. + * + * wake_up_locked() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_locked(wq, condition) \ + ((condition) \ + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 0)) + +/** + * wait_event_interruptible_locked_irq - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * It must be called with wq.lock being held. This spinlock is + * unlocked while sleeping but @condition testing is done while lock + * is held and when this macro exits the lock is held. + * + * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq() + * functions which must match the way they are locked/unlocked outside + * of this macro. + * + * wake_up_locked() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_locked_irq(wq, condition) \ + ((condition) \ + ? 0 : __wait_event_interruptible_locked(wq, condition, 0, 1)) + +/** + * wait_event_interruptible_exclusive_locked - sleep exclusively until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * It must be called with wq.lock being held. This spinlock is + * unlocked while sleeping but @condition testing is done while lock + * is held and when this macro exits the lock is held. + * + * The lock is locked/unlocked using spin_lock()/spin_unlock() + * functions which must match the way they are locked/unlocked outside + * of this macro. + * + * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag + * set thus when other process waits process on the list if this + * process is awaken further processes are not considered. + * + * wake_up_locked() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_exclusive_locked(wq, condition) \ + ((condition) \ + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 0)) + +/** + * wait_event_interruptible_exclusive_locked_irq - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * It must be called with wq.lock being held. This spinlock is + * unlocked while sleeping but @condition testing is done while lock + * is held and when this macro exits the lock is held. + * + * The lock is locked/unlocked using spin_lock_irq()/spin_unlock_irq() + * functions which must match the way they are locked/unlocked outside + * of this macro. + * + * The process is put on the wait queue with an WQ_FLAG_EXCLUSIVE flag + * set thus when other process waits process on the list if this + * process is awaken further processes are not considered. + * + * wake_up_locked() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_exclusive_locked_irq(wq, condition) \ + ((condition) \ + ? 0 : __wait_event_interruptible_locked(wq, condition, 1, 1)) + + +#define __wait_event_killable(wq, condition) \ + ___wait_event(wq, condition, TASK_KILLABLE, 0, 0, schedule()) + +/** + * wait_event_killable - sleep until a condition gets true + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * + * The process is put to sleep (TASK_KILLABLE) until the + * @condition evaluates to true or a signal is received. + * The @condition is checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * The function will return -ERESTARTSYS if it was interrupted by a + * signal and 0 if @condition evaluated to true. + */ +#define wait_event_killable(wq, condition) \ +({ \ + int __ret = 0; \ + might_sleep(); \ + if (!(condition)) \ + __ret = __wait_event_killable(wq, condition); \ + __ret; \ +}) + + +#define __wait_event_lock_irq(wq, condition, lock, cmd) \ + (void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock)) + +/** + * wait_event_lock_irq_cmd - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd + * and schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + */ +#define wait_event_lock_irq_cmd(wq, condition, lock, cmd) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, cmd); \ +} while (0) + +/** + * wait_event_lock_irq - sleep until a condition gets true. The + * condition is checked under the lock. This + * is expected to be called with the lock + * taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_UNINTERRUPTIBLE) until the + * @condition evaluates to true. The @condition is checked each time + * the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + */ +#define wait_event_lock_irq(wq, condition, lock) \ +do { \ + if (condition) \ + break; \ + __wait_event_lock_irq(wq, condition, lock, ); \ +} while (0) + + +#define __wait_event_interruptible_lock_irq(wq, condition, lock, cmd) \ + ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ + spin_unlock_irq(&lock); \ + cmd; \ + schedule(); \ + spin_lock_irq(&lock)) + +/** + * wait_event_interruptible_lock_irq_cmd - sleep until a condition gets true. + * The condition is checked under the lock. This is expected to + * be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before cmd and + * schedule() and reacquired afterwards. + * @cmd: a command which is invoked outside the critical section before + * sleep + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or a signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before invoking the cmd and going to sleep and is reacquired + * afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq_cmd(wq, condition, lock, cmd) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock, cmd); \ + __ret; \ +}) + +/** + * wait_event_interruptible_lock_irq - sleep until a condition gets true. + * The condition is checked under the lock. This is expected + * to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The macro will return -ERESTARTSYS if it was interrupted by a signal + * and 0 if @condition evaluated to true. + */ +#define wait_event_interruptible_lock_irq(wq, condition, lock) \ +({ \ + int __ret = 0; \ + if (!(condition)) \ + __ret = __wait_event_interruptible_lock_irq(wq, \ + condition, lock,); \ + __ret; \ +}) + +#define __wait_event_interruptible_lock_irq_timeout(wq, condition, \ + lock, timeout) \ + ___wait_event(wq, ___wait_cond_timeout(condition), \ + TASK_INTERRUPTIBLE, 0, timeout, \ + spin_unlock_irq(&lock); \ + __ret = schedule_timeout(__ret); \ + spin_lock_irq(&lock)); + +/** + * wait_event_interruptible_lock_irq_timeout - sleep until a condition gets + * true or a timeout elapses. The condition is checked under + * the lock. This is expected to be called with the lock taken. + * @wq: the waitqueue to wait on + * @condition: a C expression for the event to wait for + * @lock: a locked spinlock_t, which will be released before schedule() + * and reacquired afterwards. + * @timeout: timeout, in jiffies + * + * The process is put to sleep (TASK_INTERRUPTIBLE) until the + * @condition evaluates to true or signal is received. The @condition is + * checked each time the waitqueue @wq is woken up. + * + * wake_up() has to be called after changing any variable that could + * change the result of the wait condition. + * + * This is supposed to be called while holding the lock. The lock is + * dropped before going to sleep and is reacquired afterwards. + * + * The function returns 0 if the @timeout elapsed, -ERESTARTSYS if it + * was interrupted by a signal, and the remaining jiffies otherwise + * if the condition evaluated to true before the timeout elapsed. + */ +#define wait_event_interruptible_lock_irq_timeout(wq, condition, lock, \ + timeout) \ +({ \ + long __ret = timeout; \ + if (!___wait_cond_timeout(condition)) \ + __ret = __wait_event_interruptible_lock_irq_timeout( \ + wq, condition, lock, timeout); \ + __ret; \ +}) + +/* + * Waitqueues which are removed from the waitqueue_head at wakeup time + */ +void prepare_to_wait(wait_queue_head_t *q, wait_queue_t *wait, int state); +void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int state); +long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state); +void finish_wait(wait_queue_head_t *q, wait_queue_t *wait); +void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key); +long wait_woken(wait_queue_t *wait, unsigned mode, long timeout); +int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key); +int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key); + +#define DEFINE_WAIT_FUNC(name, function) \ + wait_queue_t name = { \ + .private = current, \ + .func = function, \ + .task_list = LIST_HEAD_INIT((name).task_list), \ + } + +#define DEFINE_WAIT(name) DEFINE_WAIT_FUNC(name, autoremove_wake_function) + +#define DEFINE_WAIT_BIT(name, word, bit) \ + struct wait_bit_queue name = { \ + .key = __WAIT_BIT_KEY_INITIALIZER(word, bit), \ + .wait = { \ + .private = current, \ + .func = wake_bit_function, \ + .task_list = \ + LIST_HEAD_INIT((name).wait.task_list), \ + }, \ + } + +#define init_wait(wait) \ + do { \ + (wait)->private = current; \ + (wait)->func = autoremove_wake_function; \ + INIT_LIST_HEAD(&(wait)->task_list); \ + (wait)->flags = 0; \ + } while (0) + + +extern int bit_wait(struct wait_bit_key *, int); +extern int bit_wait_io(struct wait_bit_key *, int); +extern int bit_wait_timeout(struct wait_bit_key *, int); +extern int bit_wait_io_timeout(struct wait_bit_key *, int); + +/** + * wait_on_bit - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit. + * For instance, if one were to have waiters on a bitflag, one would + * call wait_on_bit() in threads waiting for the bit to clear. + * One uses wait_on_bit() where one is waiting for the bit to clear, + * but has no intention of setting it. + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait, + mode); +} + +/** + * wait_on_bit_io - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), but calls + * io_schedule() instead of schedule() for the actual waiting. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, + bit_wait_io, + mode); +} + +/** + * wait_on_bit_timeout - wait for a bit to be cleared or a timeout elapses + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * @timeout: timeout, in jiffies + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared. This is similar to wait_on_bit(), except also takes a + * timeout parameter. + * + * Returned value will be zero if the bit was cleared before the + * @timeout elapsed, or non-zero if the @timeout elapsed or process + * received a signal and the mode permitted wakeup on that signal. + */ +static inline int +wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, + unsigned long timeout) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_timeout(word, bit, + bit_wait_timeout, + mode, timeout); +} + +/** + * wait_on_bit_action - wait for a bit to be cleared + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared, and allow the waiting action to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returned value will be zero if the bit was cleared, or non-zero + * if the process received a signal and the mode permitted wakeup + * on that signal. + */ +static inline int +wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit(word, bit, action, mode); +} + +/** + * wait_on_bit_lock - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * There is a standard hashed waitqueue table for generic use. This + * is the part of the hashtable's accessor API that waits on a bit + * when one intends to set it, for instance, trying to lock bitflags. + * For instance, if one were to have waiters trying to set bitflag + * and waiting for it to clear before setting it, one would call + * wait_on_bit() in threads waiting to be able to set the bit. + * One uses wait_on_bit_lock() where one is waiting for the bit to + * clear with the intention of setting it, and when done, clearing it. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode); +} + +/** + * wait_on_bit_lock_io - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to atomically set it. This is similar + * to wait_on_bit(), but calls io_schedule() instead of schedule() + * for the actual waiting. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_io(unsigned long *word, int bit, unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode); +} + +/** + * wait_on_bit_lock_action - wait for a bit to be cleared, when wanting to set it + * @word: the word being waited on, a kernel virtual address + * @bit: the bit of the word being waited on + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Use the standard hashed waitqueue table to wait for a bit + * to be cleared and then to set it, and allow the waiting action + * to be specified. + * This is like wait_on_bit() but allows fine control of how the waiting + * is done. + * + * Returns zero if the bit was (eventually) found to be clear and was + * set. Returns non-zero if a signal was delivered to the process and + * the @mode allows that signal to wake the process. + */ +static inline int +wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, + unsigned mode) +{ + might_sleep(); + if (!test_and_set_bit(bit, word)) + return 0; + return out_of_line_wait_on_bit_lock(word, bit, action, mode); +} + +/** + * wait_on_atomic_t - Wait for an atomic_t to become 0 + * @val: The atomic value being waited on, a kernel virtual address + * @action: the function used to sleep, which may take special actions + * @mode: the task state to sleep in + * + * Wait for an atomic_t to become 0. We abuse the bit-wait waitqueue table for + * the purpose of getting a waitqueue, but we set the key to a bit number + * outside of the target 'word'. + */ +static inline +int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +{ + might_sleep(); + if (atomic_read(val) == 0) + return 0; + return out_of_line_wait_on_atomic_t(val, action, mode); +} + +#endif /* _LINUX_WAIT_H */ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h new file mode 100644 index 0000000..4bad3d3 --- /dev/null +++ b/include/linux/workqueue.h @@ -0,0 +1,189 @@ +#ifndef __TOOLS_LINUX_WORKQUEUE_H +#define __TOOLS_LINUX_WORKQUEUE_H + +#include <linux/list.h> +#include <linux/timer.h> + +struct task_struct; +struct workqueue_struct; +struct work_struct; +typedef void (*work_func_t)(struct work_struct *work); +void delayed_work_timer_fn(unsigned long __data); + +#define work_data_bits(work) ((unsigned long *)(&(work)->data)) + +#if 0 +enum { + //WORK_STRUCT_PENDING_BIT = 0, /* work item is pending execution */ + //WORK_STRUCT_DELAYED_BIT = 1, /* work item is delayed */ + // + //WORK_STRUCT_PENDING = 1 << WORK_STRUCT_PENDING_BIT, + //WORK_STRUCT_DELAYED = 1 << WORK_STRUCT_DELAYED_BIT, +}; +#endif + +struct work_struct { + atomic_long_t data; + struct list_head entry; + work_func_t func; +}; + +#define INIT_WORK(_work, _func) \ +do { \ + (_work)->data.counter = 0; \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->func = (_func); \ +} while (0) + +struct delayed_work { + struct work_struct work; + struct timer_list timer; + struct workqueue_struct *wq; +}; + +#define INIT_DELAYED_WORK(_work, _func) \ + do { \ + INIT_WORK(&(_work)->work, (_func)); \ + __setup_timer(&(_work)->timer, delayed_work_timer_fn, \ + (unsigned long)(_work), \ + TIMER_IRQSAFE); \ + } while (0) + +static inline struct delayed_work *to_delayed_work(struct work_struct *work) +{ + return container_of(work, struct delayed_work, work); +} + +enum { + WQ_UNBOUND = 1 << 1, /* not bound to any cpu */ + WQ_FREEZABLE = 1 << 2, /* freeze during suspend */ + WQ_MEM_RECLAIM = 1 << 3, /* may be used for memory reclaim */ + WQ_HIGHPRI = 1 << 4, /* high priority */ + WQ_CPU_INTENSIVE = 1 << 5, /* cpu intensive workqueue */ + WQ_SYSFS = 1 << 6, /* visible in sysfs, see wq_sysfs_register() */ + + /* + * Per-cpu workqueues are generally preferred because they tend to + * show better performance thanks to cache locality. Per-cpu + * workqueues exclude the scheduler from choosing the CPU to + * execute the worker threads, which has an unfortunate side effect + * of increasing power consumption. + * + * The scheduler considers a CPU idle if it doesn't have any task + * to execute and tries to keep idle cores idle to conserve power; + * however, for example, a per-cpu work item scheduled from an + * interrupt handler on an idle CPU will force the scheduler to + * excute the work item on that CPU breaking the idleness, which in + * turn may lead to more scheduling choices which are sub-optimal + * in terms of power consumption. + * + * Workqueues marked with WQ_POWER_EFFICIENT are per-cpu by default + * but become unbound if workqueue.power_efficient kernel param is + * specified. Per-cpu workqueues which are identified to + * contribute significantly to power-consumption are identified and + * marked with this flag and enabling the power_efficient mode + * leads to noticeable power saving at the cost of small + * performance disadvantage. + * + * http://thread.gmane.org/gmane.linux.kernel/1480396 + */ + WQ_POWER_EFFICIENT = 1 << 7, + + __WQ_DRAINING = 1 << 16, /* internal: workqueue is draining */ + __WQ_ORDERED = 1 << 17, /* internal: workqueue is ordered */ + __WQ_LEGACY = 1 << 18, /* internal: create*_workqueue() */ + + WQ_MAX_ACTIVE = 512, /* I like 512, better ideas? */ + WQ_MAX_UNBOUND_PER_CPU = 4, /* 4 * #cpus for unbound wq */ + WQ_DFL_ACTIVE = WQ_MAX_ACTIVE / 2, +}; + +/* unbound wq's aren't per-cpu, scale max_active according to #cpus */ +#define WQ_UNBOUND_MAX_ACTIVE WQ_MAX_ACTIVE + +extern struct workqueue_struct *system_wq; +extern struct workqueue_struct *system_highpri_wq; +extern struct workqueue_struct *system_long_wq; +extern struct workqueue_struct *system_unbound_wq; +extern struct workqueue_struct *system_freezable_wq; +extern struct workqueue_struct *system_power_efficient_wq; +extern struct workqueue_struct *system_freezable_power_efficient_wq; + +extern struct workqueue_struct * +alloc_workqueue(const char *fmt, unsigned int flags, + int max_active, ...) __printf(1, 4); + +#define alloc_ordered_workqueue(fmt, flags, args...) \ + alloc_workqueue(fmt, WQ_UNBOUND | __WQ_ORDERED | (flags), 1, ##args) + +#define create_workqueue(name) \ + alloc_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, 1, (name)) +#define create_freezable_workqueue(name) \ + alloc_workqueue("%s", __WQ_LEGACY | WQ_FREEZABLE | WQ_UNBOUND | \ + WQ_MEM_RECLAIM, 1, (name)) +#define create_singlethread_workqueue(name) \ + alloc_ordered_workqueue("%s", __WQ_LEGACY | WQ_MEM_RECLAIM, name) + +extern void destroy_workqueue(struct workqueue_struct *wq); + +struct workqueue_attrs *alloc_workqueue_attrs(gfp_t gfp_mask); +void free_workqueue_attrs(struct workqueue_attrs *attrs); +int apply_workqueue_attrs(struct workqueue_struct *wq, + const struct workqueue_attrs *attrs); + +extern bool queue_work(struct workqueue_struct *wq, + struct work_struct *work); +extern bool queue_delayed_work(struct workqueue_struct *wq, + struct delayed_work *work, unsigned long delay); +extern bool mod_delayed_work(struct workqueue_struct *wq, + struct delayed_work *dwork, unsigned long delay); + +extern void flush_workqueue(struct workqueue_struct *wq); +extern void drain_workqueue(struct workqueue_struct *wq); + +extern int schedule_on_each_cpu(work_func_t func); + +extern bool flush_work(struct work_struct *work); +extern bool cancel_work_sync(struct work_struct *work); + +extern bool flush_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work(struct delayed_work *dwork); +extern bool cancel_delayed_work_sync(struct delayed_work *dwork); + +extern void workqueue_set_max_active(struct workqueue_struct *wq, + int max_active); +extern bool current_is_workqueue_rescuer(void); +extern bool workqueue_congested(int cpu, struct workqueue_struct *wq); +extern unsigned int work_busy(struct work_struct *work); +extern __printf(1, 2) void set_worker_desc(const char *fmt, ...); +extern void print_worker_info(const char *log_lvl, struct task_struct *task); +extern void show_workqueue_state(void); + +static inline bool schedule_work_on(int cpu, struct work_struct *work) +{ + return queue_work(system_wq, work); +} + +static inline bool schedule_work(struct work_struct *work) +{ + return queue_work(system_wq, work); +} + +static inline void flush_scheduled_work(void) +{ + flush_workqueue(system_wq); +} + +static inline bool schedule_delayed_work_on(int cpu, struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work(system_wq, dwork, delay); +} + +static inline bool schedule_delayed_work(struct delayed_work *dwork, + unsigned long delay) +{ + return queue_delayed_work(system_wq, dwork, delay); +} + +#endif /* __TOOLS_LINUX_WORKQUEUE_H */ diff --git a/include/linux/xattr.h b/include/linux/xattr.h new file mode 100644 index 0000000..d7fade7 --- /dev/null +++ b/include/linux/xattr.h @@ -0,0 +1,68 @@ +/* + File: linux/xattr.h + + Extended attributes handling. + + Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org> + Copyright (c) 2001-2002 Silicon Graphics, Inc. All Rights Reserved. + Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com> +*/ +#ifndef _LINUX_XATTR_H +#define _LINUX_XATTR_H + + +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/spinlock.h> +#include <uapi/linux/xattr.h> + +struct inode; +struct dentry; + +/* + * struct xattr_handler: When @name is set, match attributes with exactly that + * name. When @prefix is set instead, match attributes with that prefix and + * with a non-empty suffix. + */ +struct xattr_handler { + const char *name; + const char *prefix; + int flags; /* fs private flags */ + bool (*list)(struct dentry *dentry); + int (*get)(const struct xattr_handler *, struct dentry *dentry, + struct inode *inode, const char *name, void *buffer, + size_t size); + int (*set)(const struct xattr_handler *, struct dentry *dentry, + struct inode *inode, const char *name, const void *buffer, + size_t size, int flags); +}; + +const char *xattr_full_name(const struct xattr_handler *, const char *); + +struct xattr { + const char *name; + void *value; + size_t value_len; +}; + +ssize_t xattr_getsecurity(struct inode *, const char *, void *, size_t); +ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t); +ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size); +int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int); +int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int); +int vfs_removexattr(struct dentry *, const char *); + +ssize_t generic_getxattr(struct dentry *dentry, struct inode *inode, const char *name, void *buffer, size_t size); +ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size); +int generic_setxattr(struct dentry *dentry, struct inode *inode, + const char *name, const void *value, size_t size, int flags); +int generic_removexattr(struct dentry *dentry, const char *name); +ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name, + char **xattr_value, size_t size, gfp_t flags); + +static inline const char *xattr_prefix(const struct xattr_handler *handler) +{ + return handler->prefix ?: handler->name; +} + +#endif /* _LINUX_XATTR_H */ diff --git a/include/linux/zconf.h b/include/linux/zconf.h new file mode 100644 index 0000000..0beb75e --- /dev/null +++ b/include/linux/zconf.h @@ -0,0 +1,57 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef _ZCONF_H +#define _ZCONF_H + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus a few kilobytes + for small objects. +*/ + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# define MAX_MEM_LEVEL 8 +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* default windowBits for decompression. MAX_WBITS is for compression only */ +#ifndef DEF_WBITS +# define DEF_WBITS MAX_WBITS +#endif + +/* default memLevel */ +#if MAX_MEM_LEVEL >= 8 +# define DEF_MEM_LEVEL 8 +#else +# define DEF_MEM_LEVEL MAX_MEM_LEVEL +#endif + + /* Type declarations */ + +typedef unsigned char Byte; /* 8 bits */ +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ +typedef void *voidp; + +#endif /* _ZCONF_H */ diff --git a/include/linux/zlib.h b/include/linux/zlib.h new file mode 100644 index 0000000..92dbbd3 --- /dev/null +++ b/include/linux/zlib.h @@ -0,0 +1,593 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + + Copyright (C) 1995-2005 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://www.ietf.org/rfc/rfc1950.txt + (zlib format), rfc1951.txt (deflate format) and rfc1952.txt (gzip format). +*/ + +#ifndef _ZLIB_H +#define _ZLIB_H + +#include <linux/zconf.h> + +/* zlib deflate based on ZLIB_VERSION "1.1.3" */ +/* zlib inflate based on ZLIB_VERSION "1.2.3" */ + +/* + This is a modified version of zlib for use inside the Linux kernel. + The main changes are to perform all memory allocation in advance. + + Inflation Changes: + * Z_PACKET_FLUSH is added and used by ppp_deflate. Before returning + this checks there is no more input data available and the next data + is a STORED block. It also resets the mode to be read for the next + data, all as per PPP requirements. + * Addition of zlib_inflateIncomp which copies incompressible data into + the history window and adjusts the accoutning without calling + zlib_inflate itself to inflate the data. +*/ + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed + data. This version of the library supports only one compression method + (deflation) but other algorithms will be added later and will have the same + stream interface. + + Compression can be done in a single step if the buffers are large + enough (for example if an input file is mmap'ed), or can be done by + repeated calls of the compression function. In the latter case, the + application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never + crash even in case of corrupted input. +*/ + +struct internal_state; + +typedef struct z_stream_s { + const Byte *next_in; /* next input byte */ + uLong avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total nb of input bytes read so far */ + + Byte *next_out; /* next output byte should be put there */ + uLong avail_out; /* remaining free space at next_out */ + uLong total_out; /* total nb of bytes output so far */ + + char *msg; /* last error message, NULL if no error */ + struct internal_state *state; /* not visible by applications */ + + void *workspace; /* memory allocated for this stream */ + + int data_type; /* best guess about the data type: ascii or binary */ + uLong adler; /* adler32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream *z_streamp; + +/* + The application must update next_in and avail_in when avail_in has + dropped to zero. It must update next_out and avail_out when avail_out + has dropped to zero. The application must initialize zalloc, zfree and + opaque before calling the init function. All other fields are set by the + compression library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this + if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, + pointers returned by zalloc for objects of exactly 65536 bytes *must* + have their offset normalized to zero. The default allocation function + provided by this library ensures this (see zutil.c). To reduce memory + requirements and avoid any allocation of 64K objects, at the expense of + compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or + progress reports. After compression, total_in holds the total size of + the uncompressed data and may be saved for use in the decompressor + (particularly if the decompressor wants to decompress everything in + a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 /* will be removed, use Z_SYNC_FLUSH instead */ +#define Z_PACKET_FLUSH 2 +#define Z_SYNC_FLUSH 3 +#define Z_FULL_FLUSH 4 +#define Z_FINISH 5 +#define Z_BLOCK 6 /* Only for inflate at present */ +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative + * values are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_ASCII 1 +#define Z_UNKNOWN 2 +/* Possible values of the data_type field */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + + /* basic functions */ + +extern int zlib_deflate_workspacesize (int windowBits, int memLevel); +/* + Returns the number of bytes that needs to be allocated for a per- + stream workspace with the specified parameters. A pointer to this + number of bytes should be returned in stream->workspace before + you call zlib_deflateInit() or zlib_deflateInit2(). If you call + zlib_deflateInit(), specify windowBits = MAX_WBITS and memLevel = + MAX_MEM_LEVEL here. If you call zlib_deflateInit2(), the windowBits + and memLevel parameters passed to zlib_deflateInit2() must not + exceed those passed here. +*/ + +/* +extern int deflateInit (z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. + If zalloc and zfree are set to NULL, deflateInit updates them to + use default allocation functions. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at + all (the input data is simply copied a block at a time). + Z_DEFAULT_COMPRESSION requests a default compromise between speed and + compression (currently equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if level is not a valid compression level, + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). + msg is set to null if there is no error message. deflateInit does not + perform any compression: this will be done by deflate(). +*/ + + +extern int zlib_deflate (z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce some + output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary (in interactive applications). + Some output may be provided even if flush is not set. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating avail_in or avail_out accordingly; avail_out + should never be zero before the call. The application can consume the + compressed output when it wants, for example when the output buffer is full + (avail_out == 0), or after each call of deflate(). If deflate returns Z_OK + and with zero avail_out, it must be called again after making room in the + output buffer because there might be more output pending. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In particular + avail_in is zero after the call if enough output space has been provided + before the call.) Flushing may degrade compression for some compression + algorithms and so it should be used only when necessary. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + the compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there + was enough output space; if deflate returns with Z_OK, this function must be + called again with Z_FINISH and more output space (updated avail_out) but no + more input data, until it returns with Z_STREAM_END or an error. After + deflate has returned Z_STREAM_END, the only possible operations on the + stream are deflateReset or deflateEnd. + + Z_FINISH can be used immediately after deflateInit if all the compression + is to be done in a single step. In this case, avail_out must be at least + 0.1% larger than avail_in plus 12 bytes. If deflate does not return + Z_STREAM_END, then it must be called again as described above. + + deflate() sets strm->adler to the adler32 checksum of all input read + so far (that is, total_in bytes). + + deflate() may update data_type if it can make a good guess about + the input data type (Z_ASCII or Z_BINARY). In doubt, the data is considered + binary. This field is only for information purposes and does not affect + the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was NULL), Z_BUF_ERROR if no progress is possible + (for example avail_in or avail_out was zero). +*/ + + +extern int zlib_deflateEnd (z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, + msg may be set but then points to a static string (which must not be + deallocated). +*/ + + +extern int zlib_inflate_workspacesize (void); +/* + Returns the number of bytes that needs to be allocated for a per- + stream workspace. A pointer to this number of bytes should be + returned in stream->workspace before calling zlib_inflateInit(). +*/ + +/* +extern int zlib_inflateInit (z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, and workspace must be initialized before by + the caller. If next_in is not NULL and avail_in is large enough (the exact + value depends on the compression method), inflateInit determines the + compression method from the zlib header and allocates all data structures + accordingly; otherwise the allocation will be deferred to the first call of + inflate. If zalloc and zfree are set to NULL, inflateInit updates them to + use default allocation functions. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller. msg is set to null if there is no error + message. inflateInit does not perform any decompression apart from reading + the zlib header if present: this will be done by inflate(). (So next_in and + avail_in may be modified, but next_out and avail_out are unchanged.) +*/ + + +extern int zlib_inflate (z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in is updated and processing + will resume at this point for the next call of inflate(). + + - Provide more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there + is no more input data or no more space in the output buffer (see below + about the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming + more output, and updating the next_* and avail_* values accordingly. + The application can consume the uncompressed output when it wants, for + example when the output buffer is full (avail_out == 0), or after each + call of inflate(). If inflate returns Z_OK and with zero avail_out, it + must be called again after making room in the output buffer because there + might be more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, + Z_FINISH, or Z_BLOCK. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() stop + if and when it gets to the next deflate block boundary. When decoding the + zlib or gzip format, this will cause inflate() to return immediately after + the header and before the first block. When doing a raw inflate, inflate() + will go ahead and process the first block, and will return when it gets to + the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + Also to assist in this, on return inflate() will set strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 + if inflate() is currently decoding the last block in the deflate stream, + plus 128 if inflate() returned immediately after decoding an end-of-block + code or decoding the complete header up to just before the first byte of the + deflate stream. The end-of-block will not be indicated until all of the + uncompressed data from that block has been written to strm->next_out. The + number of unused bits may in general be greater than seven, except when + bit 7 of data_type is set, in which case the number of unused bits will be + less than eight. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step + (a single call of inflate), the parameter flush should be set to + Z_FINISH. In this case all pending input is processed and all pending + output is flushed; avail_out must be large enough to hold all the + uncompressed data. (The size of the uncompressed data may have been saved + by the compressor for this purpose.) The next operation on this stream must + be inflateEnd to deallocate the decompression state. The use of Z_FINISH + is never required, but can be used to inform inflate that a faster approach + may be used for the single inflate() call. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the only effect of the flush parameter in this implementation + is on the return value of inflate(), as noted below, or when it returns early + because Z_BLOCK is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the adler32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the adler32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed adler32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() will decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically. Any information + contained in the gzip header is not retained, so applications that need that + information should instead use raw inflate, see inflateInit2() below, or + inflateBack() and perform their own processing of the gzip header and + trailer. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value), Z_STREAM_ERROR if the stream structure was inconsistent (for example + if next_in or next_out was NULL), Z_MEM_ERROR if there was not enough memory, + Z_BUF_ERROR if no progress is possible or if there was not enough room in the + output buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may then + call inflateSync() to look for a good compression block if a partial recovery + of the data is desired. +*/ + + +extern int zlib_inflateEnd (z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any + pending output. + + inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state + was inconsistent. In the error case, msg may be set but then points to a + static string (which must not be deallocated). +*/ + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +extern int deflateInit2 (z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields next_in, zalloc, zfree and opaque must be initialized before by + the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but + is slow and reduces compression ratio; memLevel=9 uses maximum memory + for optimal speed. The default value is 8. See zconf.h for total memory + usage as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), or Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match). Filtered data consists mostly of small values with a + somewhat random distribution. In this case, the compression algorithm is + tuned to compress them better. The effect of Z_FILTERED is to force more + Huffman coding and less string matching; it is somewhat intermediate + between Z_DEFAULT and Z_HUFFMAN_ONLY. The strategy parameter only affects + the compression ratio but not the correctness of the compressed output even + if it is not set appropriately. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid + method). msg is set to null if there is no error message. deflateInit2 does + not perform any compression: this will be done by deflate(). +*/ + +extern int zlib_deflateReset (z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, + but does not free and reallocate all the internal compression state. + The stream will keep the same compression level and any other attributes + that may have been set by deflateInit2. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +static inline unsigned long deflateBound(unsigned long s) +{ + return s + ((s + 7) >> 3) + ((s + 63) >> 6) + 11; +} + +/* +extern int inflateInit2 (z_streamp strm, int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an adler32 or a crc32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is + a crc32 instead of an adler32. + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if a parameter is invalid (such as a null strm). msg + is set to null if there is no error message. inflateInit2 does not perform + any decompression apart from reading the zlib header if present: this will + be done by inflate(). (So next_in and avail_in may be modified, but next_out + and avail_out are unchanged.) +*/ + +extern int zlib_inflateReset (z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate all the internal decompression state. + The stream will keep attributes that may have been set by inflateInit2. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being NULL). +*/ + +extern int zlib_inflateIncomp (z_stream *strm); +/* + This function adds the data at next_in (avail_in bytes) to the output + history without performing any output. There must be no pending output, + and the decompressor must be expecting to see the start of a block. + Calling this function is equivalent to decompressing a stored block + containing the data at next_in (except that the data is not output). +*/ + +#define zlib_deflateInit(strm, level) \ + zlib_deflateInit2((strm), (level), Z_DEFLATED, MAX_WBITS, \ + DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY) +#define zlib_inflateInit(strm) \ + zlib_inflateInit2((strm), DEF_WBITS) + +extern int zlib_deflateInit2(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy); +extern int zlib_inflateInit2(z_streamp strm, int windowBits); + +#if !defined(_Z_UTIL_H) && !defined(NO_DUMMY_DECL) + struct internal_state {int dummy;}; /* hack for buggy compilers */ +#endif + +/* Utility function: initialize zlib, unpack binary blob, clean up zlib, + * return len or negative error code. */ +extern int zlib_inflate_blob(void *dst, unsigned dst_sz, const void *src, unsigned src_sz); + +#endif /* _ZLIB_H */ diff --git a/include/linux/zutil.h b/include/linux/zutil.h new file mode 100644 index 0000000..8caa7d3 --- /dev/null +++ b/include/linux/zutil.h @@ -0,0 +1,108 @@ +/* zutil.h -- internal interface and configuration of the compression library + * Copyright (C) 1995-1998 Jean-loup Gailly. + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* WARNING: this file should *not* be used by applications. It is + part of the implementation of the compression library and is + subject to change. Applications should only use zlib.h. + */ + +/* @(#) $Id: zutil.h,v 1.1 2000/01/01 03:32:23 davem Exp $ */ + +#ifndef _Z_UTIL_H +#define _Z_UTIL_H + +#include <stdlib.h> +#include <string.h> +#include <linux/zlib.h> +#include <linux/string.h> +#include <linux/kernel.h> + +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + + /* common constants */ + +#define STORED_BLOCK 0 +#define STATIC_TREES 1 +#define DYN_TREES 2 +/* The three kinds of block type */ + +#define MIN_MATCH 3 +#define MAX_MATCH 258 +/* The minimum and maximum match lengths */ + +#define PRESET_DICT 0x20 /* preset dictionary flag in zlib header */ + + /* target dependencies */ + + /* Common defaults */ + +#ifndef OS_CODE +# define OS_CODE 0x03 /* assume Unix */ +#endif + + /* functions */ + +typedef uLong (*check_func) (uLong check, const Byte *buf, + uInt len); + + + /* checksum functions */ + +#define BASE 65521L /* largest prime smaller than 65536 */ +#define NMAX 5552 +/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */ + +#define DO1(buf,i) {s1 += buf[i]; s2 += s1;} +#define DO2(buf,i) DO1(buf,i); DO1(buf,i+1); +#define DO4(buf,i) DO2(buf,i); DO2(buf,i+2); +#define DO8(buf,i) DO4(buf,i); DO4(buf,i+4); +#define DO16(buf) DO8(buf,0); DO8(buf,8); + +/* ========================================================================= */ +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. If buf is NULL, this function returns + the required initial value for the checksum. + An Adler-32 checksum is almost as reliable as a CRC32 but can be computed + much faster. Usage example: + + uLong adler = zlib_adler32(0L, NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = zlib_adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ +static inline uLong zlib_adler32(uLong adler, + const Byte *buf, + uInt len) +{ + unsigned long s1 = adler & 0xffff; + unsigned long s2 = (adler >> 16) & 0xffff; + int k; + + if (buf == NULL) return 1L; + + while (len > 0) { + k = len < NMAX ? len : NMAX; + len -= k; + while (k >= 16) { + DO16(buf); + buf += 16; + k -= 16; + } + if (k != 0) do { + s1 += *buf++; + s2 += s1; + } while (--k); + s1 %= BASE; + s2 %= BASE; + } + return (s2 << 16) | s1; +} + +#endif /* _Z_UTIL_H */ |