summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/net/inet_frag.h84
-rw-r--r--include/net/ipv6.h2
-rw-r--r--net/ipv4/inet_fragment.c39
-rw-r--r--net/ipv4/ip_fragment.c28
-rw-r--r--net/ipv6/netfilter/nf_conntrack_reasm.c11
-rw-r--r--net/ipv6/reassembly.c10
6 files changed, 118 insertions, 56 deletions
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 32786a044718..3f237db0a426 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -1,10 +1,17 @@
#ifndef __NET_FRAG_H__
#define __NET_FRAG_H__
+#include <linux/percpu_counter.h>
+
struct netns_frags {
int nqueues;
- atomic_t mem;
struct list_head lru_list;
+ spinlock_t lru_lock;
+
+ /* The percpu_counter "mem" need to be cacheline aligned.
+ * mem.count must not share cacheline with other writers
+ */
+ struct percpu_counter mem ____cacheline_aligned_in_smp;
/* sysctls */
int timeout;
@@ -13,12 +20,11 @@ struct netns_frags {
};
struct inet_frag_queue {
- struct hlist_node list;
- struct netns_frags *net;
- struct list_head lru_list; /* lru list member */
spinlock_t lock;
- atomic_t refcnt;
struct timer_list timer; /* when will this queue expire? */
+ struct list_head lru_list; /* lru list member */
+ struct hlist_node list;
+ atomic_t refcnt;
struct sk_buff *fragments; /* list of received fragments */
struct sk_buff *fragments_tail;
ktime_t stamp;
@@ -31,24 +37,29 @@ struct inet_frag_queue {
#define INET_FRAG_LAST_IN 1
u16 max_size;
+
+ struct netns_frags *net;
};
#define INETFRAGS_HASHSZ 64
struct inet_frags {
struct hlist_head hash[INETFRAGS_HASHSZ];
- rwlock_t lock;
- u32 rnd;
- int qsize;
+ /* This rwlock is a global lock (seperate per IPv4, IPv6 and
+ * netfilter). Important to keep this on a seperate cacheline.
+ */
+ rwlock_t lock ____cacheline_aligned_in_smp;
int secret_interval;
struct timer_list secret_timer;
+ u32 rnd;
+ int qsize;
unsigned int (*hashfn)(struct inet_frag_queue *);
+ bool (*match)(struct inet_frag_queue *q, void *arg);
void (*constructor)(struct inet_frag_queue *q,
void *arg);
void (*destructor)(struct inet_frag_queue *);
void (*skb_free)(struct sk_buff *);
- bool (*match)(struct inet_frag_queue *q, void *arg);
void (*frag_expire)(unsigned long data);
};
@@ -72,4 +83,59 @@ static inline void inet_frag_put(struct inet_frag_queue *q, struct inet_frags *f
inet_frag_destroy(q, f, NULL);
}
+/* Memory Tracking Functions. */
+
+/* The default percpu_counter batch size is not big enough to scale to
+ * fragmentation mem acct sizes.
+ * The mem size of a 64K fragment is approx:
+ * (44 fragments * 2944 truesize) + frag_queue struct(200) = 129736 bytes
+ */
+static unsigned int frag_percpu_counter_batch = 130000;
+
+static inline int frag_mem_limit(struct netns_frags *nf)
+{
+ return percpu_counter_read(&nf->mem);
+}
+
+static inline void sub_frag_mem_limit(struct inet_frag_queue *q, int i)
+{
+ __percpu_counter_add(&q->net->mem, -i, frag_percpu_counter_batch);
+}
+
+static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
+{
+ __percpu_counter_add(&q->net->mem, i, frag_percpu_counter_batch);
+}
+
+static inline void init_frag_mem_limit(struct netns_frags *nf)
+{
+ percpu_counter_init(&nf->mem, 0);
+}
+
+static inline int sum_frag_mem_limit(struct netns_frags *nf)
+{
+ return percpu_counter_sum_positive(&nf->mem);
+}
+
+static inline void inet_frag_lru_move(struct inet_frag_queue *q)
+{
+ spin_lock(&q->net->lru_lock);
+ list_move_tail(&q->lru_list, &q->net->lru_list);
+ spin_unlock(&q->net->lru_lock);
+}
+
+static inline void inet_frag_lru_del(struct inet_frag_queue *q)
+{
+ spin_lock(&q->net->lru_lock);
+ list_del(&q->lru_list);
+ spin_unlock(&q->net->lru_lock);
+}
+
+static inline void inet_frag_lru_add(struct netns_frags *nf,
+ struct inet_frag_queue *q)
+{
+ spin_lock(&nf->lru_lock);
+ list_add_tail(&q->lru_list, &nf->lru_list);
+ spin_unlock(&nf->lru_lock);
+}
#endif
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c1878f7049c8..dc30b60975ef 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -288,7 +288,7 @@ static inline int ip6_frag_nqueues(struct net *net)
static inline int ip6_frag_mem(struct net *net)
{
- return atomic_read(&net->ipv6.frags.mem);
+ return sum_frag_mem_limit(&net->ipv6.frags);
}
#endif
diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c
index 4750d2b74d79..2e453bde6992 100644
--- a/net/ipv4/inet_fragment.c
+++ b/net/ipv4/inet_fragment.c
@@ -73,8 +73,9 @@ EXPORT_SYMBOL(inet_frags_init);
void inet_frags_init_net(struct netns_frags *nf)
{
nf->nqueues = 0;
- atomic_set(&nf->mem, 0);
+ init_frag_mem_limit(nf);
INIT_LIST_HEAD(&nf->lru_list);
+ spin_lock_init(&nf->lru_lock);
}
EXPORT_SYMBOL(inet_frags_init_net);
@@ -91,6 +92,8 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f)
local_bh_disable();
inet_frag_evictor(nf, f, true);
local_bh_enable();
+
+ percpu_counter_destroy(&nf->mem);
}
EXPORT_SYMBOL(inet_frags_exit_net);
@@ -98,9 +101,9 @@ static inline void fq_unlink(struct inet_frag_queue *fq, struct inet_frags *f)
{
write_lock(&f->lock);
hlist_del(&fq->list);
- list_del(&fq->lru_list);
fq->net->nqueues--;
write_unlock(&f->lock);
+ inet_frag_lru_del(fq);
}
void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
@@ -117,12 +120,8 @@ void inet_frag_kill(struct inet_frag_queue *fq, struct inet_frags *f)
EXPORT_SYMBOL(inet_frag_kill);
static inline void frag_kfree_skb(struct netns_frags *nf, struct inet_frags *f,
- struct sk_buff *skb, int *work)
+ struct sk_buff *skb)
{
- if (work)
- *work -= skb->truesize;
-
- atomic_sub(skb->truesize, &nf->mem);
if (f->skb_free)
f->skb_free(skb);
kfree_skb(skb);
@@ -133,6 +132,7 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
{
struct sk_buff *fp;
struct netns_frags *nf;
+ unsigned int sum, sum_truesize = 0;
WARN_ON(!(q->last_in & INET_FRAG_COMPLETE));
WARN_ON(del_timer(&q->timer) != 0);
@@ -143,13 +143,14 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f,
while (fp) {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(nf, f, fp, work);
+ sum_truesize += fp->truesize;
+ frag_kfree_skb(nf, f, fp);
fp = xp;
}
-
+ sum = sum_truesize + f->qsize;
if (work)
- *work -= f->qsize;
- atomic_sub(f->qsize, &nf->mem);
+ *work -= sum;
+ sub_frag_mem_limit(q, sum);
if (f->destructor)
f->destructor(q);
@@ -164,22 +165,23 @@ int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force)
int work, evicted = 0;
if (!force) {
- if (atomic_read(&nf->mem) <= nf->high_thresh)
+ if (frag_mem_limit(nf) <= nf->high_thresh)
return 0;
}
- work = atomic_read(&nf->mem) - nf->low_thresh;
+ work = frag_mem_limit(nf) - nf->low_thresh;
while (work > 0) {
- read_lock(&f->lock);
+ spin_lock(&nf->lru_lock);
+
if (list_empty(&nf->lru_list)) {
- read_unlock(&f->lock);
+ spin_unlock(&nf->lru_lock);
break;
}
q = list_first_entry(&nf->lru_list,
struct inet_frag_queue, lru_list);
atomic_inc(&q->refcnt);
- read_unlock(&f->lock);
+ spin_unlock(&nf->lru_lock);
spin_lock(&q->lock);
if (!(q->last_in & INET_FRAG_COMPLETE))
@@ -233,9 +235,9 @@ static struct inet_frag_queue *inet_frag_intern(struct netns_frags *nf,
atomic_inc(&qp->refcnt);
hlist_add_head(&qp->list, &f->hash[hash]);
- list_add_tail(&qp->lru_list, &nf->lru_list);
nf->nqueues++;
write_unlock(&f->lock);
+ inet_frag_lru_add(nf, qp);
return qp;
}
@@ -250,7 +252,8 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
q->net = nf;
f->constructor(q, arg);
- atomic_add(f->qsize, &nf->mem);
+ add_frag_mem_limit(q, f->qsize);
+
setup_timer(&q->timer, f->frag_expire, (unsigned long)q);
spin_lock_init(&q->lock);
atomic_set(&q->refcnt, 1);
diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c
index f55a4e61bfb8..1211613c6c34 100644
--- a/net/ipv4/ip_fragment.c
+++ b/net/ipv4/ip_fragment.c
@@ -122,7 +122,7 @@ int ip_frag_nqueues(struct net *net)
int ip_frag_mem(struct net *net)
{
- return atomic_read(&net->ipv4.frags.mem);
+ return sum_frag_mem_limit(&net->ipv4.frags);
}
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
@@ -161,13 +161,6 @@ static bool ip4_frag_match(struct inet_frag_queue *q, void *a)
qp->user == arg->user;
}
-/* Memory Tracking Functions. */
-static void frag_kfree_skb(struct netns_frags *nf, struct sk_buff *skb)
-{
- atomic_sub(skb->truesize, &nf->mem);
- kfree_skb(skb);
-}
-
static void ip4_frag_init(struct inet_frag_queue *q, void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
@@ -340,6 +333,7 @@ static inline int ip_frag_too_far(struct ipq *qp)
static int ip_frag_reinit(struct ipq *qp)
{
struct sk_buff *fp;
+ unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
atomic_inc(&qp->q.refcnt);
@@ -349,9 +343,12 @@ static int ip_frag_reinit(struct ipq *qp)
fp = qp->q.fragments;
do {
struct sk_buff *xp = fp->next;
- frag_kfree_skb(qp->q.net, fp);
+
+ sum_truesize += fp->truesize;
+ kfree_skb(fp);
fp = xp;
} while (fp);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
qp->q.last_in = 0;
qp->q.len = 0;
@@ -496,7 +493,8 @@ found:
qp->q.fragments = next;
qp->q.meat -= free_it->len;
- frag_kfree_skb(qp->q.net, free_it);
+ sub_frag_mem_limit(&qp->q, free_it->truesize);
+ kfree_skb(free_it);
}
}
@@ -519,7 +517,7 @@ found:
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
- atomic_add(skb->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, skb->truesize);
if (offset == 0)
qp->q.last_in |= INET_FRAG_FIRST_IN;
@@ -531,9 +529,7 @@ found:
qp->q.meat == qp->q.len)
return ip_frag_reasm(qp, prev, dev);
- write_lock(&ip4_frags.lock);
- list_move_tail(&qp->q.lru_list, &qp->q.net->lru_list);
- write_unlock(&ip4_frags.lock);
+ inet_frag_lru_move(&qp->q);
return -EINPROGRESS;
err:
@@ -617,7 +613,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &qp->q.net->mem);
+ add_frag_mem_limit(&qp->q, clone->truesize);
}
skb_push(head, head->data - skb_network_header(head));
@@ -645,7 +641,7 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *prev,
}
fp = next;
}
- atomic_sub(sum_truesize, &qp->q.net->mem);
+ sub_frag_mem_limit(&qp->q, sum_truesize);
head->next = NULL;
head->dev = dev;
diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c
index 3dacecc99065..c674f158efa8 100644
--- a/net/ipv6/netfilter/nf_conntrack_reasm.c
+++ b/net/ipv6/netfilter/nf_conntrack_reasm.c
@@ -319,7 +319,7 @@ found:
fq->q.meat += skb->len;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
- atomic_add(skb->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -328,9 +328,8 @@ found:
fq->nhoffset = nhoff;
fq->q.last_in |= INET_FRAG_FIRST_IN;
}
- write_lock(&nf_frags.lock);
- list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
- write_unlock(&nf_frags.lock);
+
+ inet_frag_lru_move(&fq->q);
return 0;
discard_fq:
@@ -398,7 +397,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
clone->ip_summed = head->ip_summed;
NFCT_FRAG6_CB(clone)->orig = NULL;
- atomic_add(clone->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -422,7 +421,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)
head->csum = csum_add(head->csum, fp->csum);
head->truesize += fp->truesize;
}
- atomic_sub(head->truesize, &fq->q.net->mem);
+ sub_frag_mem_limit(&fq->q, head->truesize);
head->local_df = 1;
head->next = NULL;
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
index e5253ec9e0fc..bab2c270f292 100644
--- a/net/ipv6/reassembly.c
+++ b/net/ipv6/reassembly.c
@@ -327,7 +327,7 @@ found:
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
- atomic_add(skb->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
@@ -341,9 +341,7 @@ found:
fq->q.meat == fq->q.len)
return ip6_frag_reasm(fq, prev, dev);
- write_lock(&ip6_frags.lock);
- list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list);
- write_unlock(&ip6_frags.lock);
+ inet_frag_lru_move(&fq->q);
return -1;
discard_fq:
@@ -429,7 +427,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
head->len -= clone->len;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
- atomic_add(clone->truesize, &fq->q.net->mem);
+ add_frag_mem_limit(&fq->q, clone->truesize);
}
/* We have to remove fragment header from datagram and to relocate
@@ -467,7 +465,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,
}
fp = next;
}
- atomic_sub(sum_truesize, &fq->q.net->mem);
+ sub_frag_mem_limit(&fq->q, sum_truesize);
head->next = NULL;
head->dev = dev;