summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--main/linux-pae/0002-gre-fix-hard-header-destination-address-checking.patch44
-rw-r--r--main/linux-pae/0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch39
-rw-r--r--main/linux-pae/0004-arp-flush-arp-cache-on-device-change.patch29
-rw-r--r--main/linux-pae/0005-r8169-fix-broken-register-writes.patch (renamed from main/linux-pae/net-git-78f1cd-r8169-fix-broken-register-writes.patch)17
-rw-r--r--main/linux-pae/0006-r8169-offical-fix-for-CVE-2009-4537-overlength-frame.patch (renamed from main/linux-pae/net-git-c0cd88-r8169-offical-fix-for-CVE-2009-4537-overlength-frame-DMAs.patch)17
-rw-r--r--main/linux-pae/0007-r8169-Fix-rtl8169_rx_interrupt.patch89
-rw-r--r--main/linux-pae/0008-r8169-clean-up-my-printk-uglyness.patch36
-rw-r--r--main/linux-pae/0009-ipsec-Fix-bogus-bundle-flowi.patch110
-rw-r--r--main/linux-pae/0010-xfrm-Remove-xfrm_state_genid.patch54
-rw-r--r--main/linux-pae/0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch35
-rw-r--r--main/linux-pae/0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch105
-rw-r--r--main/linux-pae/0013-flow-structurize-flow-cache.patch395
-rw-r--r--main/linux-pae/0014-flow-virtualize-flow-cache-entry-methods.patch513
-rw-r--r--main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch1068
-rw-r--r--main/linux-pae/0016-xfrm-remove-policy-garbage-collection.patch91
-rw-r--r--main/linux-pae/0017-flow-delayed-deletion-of-flow-cache-entries.patch231
-rw-r--r--main/linux-pae/0018-xfrm-Fix-crashes-in-xfrm_lookup.patch46
-rw-r--r--main/linux-pae/APKBUILD54
-rw-r--r--main/linux-pae/arp.patch14
-rw-r--r--main/linux-pae/ip_gre.patch15
-rw-r--r--main/linux-pae/ip_gre2.patch17
-rw-r--r--main/linux-pae/xfrm-cache-size-revert.patch12
22 files changed, 2941 insertions, 90 deletions
diff --git a/main/linux-pae/0002-gre-fix-hard-header-destination-address-checking.patch b/main/linux-pae/0002-gre-fix-hard-header-destination-address-checking.patch
new file mode 100644
index 00000000..36a0ae44
--- /dev/null
+++ b/main/linux-pae/0002-gre-fix-hard-header-destination-address-checking.patch
@@ -0,0 +1,44 @@
+From 9082391046940c410eac3bad065c8701998b5cab Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 3 Mar 2010 04:01:13 +0000
+Subject: [PATCH 02/18] gre: fix hard header destination address checking
+
+ipgre_header() can be called with zero daddr when the gre device is
+configured as multipoint tunnel and still has the NOARP flag set (which is
+typically cleared by the userspace arp daemon). If the NOARP packets are
+not dropped, ipgre_tunnel_xmit() will take rt->rt_gateway (= NBMA IP) and
+use that for route look up (and may lead to bogus xfrm acquires).
+
+The multicast address check is removed as sending to multicast group should
+be ok. In fact, if gre device has a multicast address as destination
+ipgre_header is always called with multicast address.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 6d55cb91a0020ac0d78edcad61efd6c8cf5785a3)
+---
+ net/ipv4/ip_gre.c | 7 ++-----
+ 1 files changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index 1433338..ac88ce5 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -1137,12 +1137,9 @@ static int ipgre_header(struct sk_buff *skb, struct net_device *dev,
+
+ if (saddr)
+ memcpy(&iph->saddr, saddr, 4);
+-
+- if (daddr) {
++ if (daddr)
+ memcpy(&iph->daddr, daddr, 4);
+- return t->hlen;
+- }
+- if (iph->daddr && !ipv4_is_multicast(iph->daddr))
++ if (iph->daddr)
+ return t->hlen;
+
+ return -t->hlen;
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch b/main/linux-pae/0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch
new file mode 100644
index 00000000..61d7c9a6
--- /dev/null
+++ b/main/linux-pae/0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch
@@ -0,0 +1,39 @@
+From cd0e9d08480e1e0648e17d099ecf50f6fd8714e5 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Sat, 20 Mar 2010 02:27:58 +0000
+Subject: [PATCH 03/18] ip_gre: include route header_len in max_headroom calculation
+
+Taking route's header_len into account, and updating gre device
+needed_headroom will give better hints on upper bound of required
+headroom. This is useful if the gre traffic is xfrm'ed.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 243aad830e8a4cdda261626fbaeddde16b08d04a)
+---
+ net/ipv4/ip_gre.c | 4 +++-
+ 1 files changed, 3 insertions(+), 1 deletions(-)
+
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index ac88ce5..7f1ff73 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -803,11 +803,13 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev
+ tunnel->err_count = 0;
+ }
+
+- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
++ max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
+
+ if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
+ (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
+ struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
++ if (max_headroom > dev->needed_headroom)
++ dev->needed_headroom = max_headroom;
+ if (!new_skb) {
+ ip_rt_put(rt);
+ stats->tx_dropped++;
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0004-arp-flush-arp-cache-on-device-change.patch b/main/linux-pae/0004-arp-flush-arp-cache-on-device-change.patch
new file mode 100644
index 00000000..85161ea3
--- /dev/null
+++ b/main/linux-pae/0004-arp-flush-arp-cache-on-device-change.patch
@@ -0,0 +1,29 @@
+From 8a0e3ea4924059a7268446177d6869e3399adbb2 Mon Sep 17 00:00:00 2001
+From: Timo Teras <timo.teras@iki.fi>
+Date: Mon, 12 Apr 2010 13:46:45 +0000
+Subject: [PATCH 04/18] arp: flush arp cache on device change
+
+If IFF_NOARP is changed, we must flush the arp cache.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+---
+ net/ipv4/arp.c | 3 +++
+ 1 files changed, 3 insertions(+), 0 deletions(-)
+
+diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
+index 4e80f33..580bfc3 100644
+--- a/net/ipv4/arp.c
++++ b/net/ipv4/arp.c
+@@ -1200,6 +1200,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
+ neigh_changeaddr(&arp_tbl, dev);
+ rt_cache_flush(dev_net(dev), 0);
+ break;
++ case NETDEV_CHANGE:
++ neigh_changeaddr(&arp_tbl, dev);
++ break;
+ default:
+ break;
+ }
+--
+1.7.0.2
+
diff --git a/main/linux-pae/net-git-78f1cd-r8169-fix-broken-register-writes.patch b/main/linux-pae/0005-r8169-fix-broken-register-writes.patch
index f5f72acc..bfa8df29 100644
--- a/main/linux-pae/net-git-78f1cd-r8169-fix-broken-register-writes.patch
+++ b/main/linux-pae/0005-r8169-fix-broken-register-writes.patch
@@ -1,9 +1,9 @@
-From 78f1cd02457252e1ffbc6caa44a17424a45286b8 Mon Sep 17 00:00:00 2001
+From 89f350c4ec426b4c1db6ef269546940365d918e1 Mon Sep 17 00:00:00 2001
From: Francois Romieu <romieu@fr.zoreil.com>
Date: Sat, 27 Mar 2010 19:35:46 -0700
-Subject: [PATCH] r8169: fix broken register writes
+Subject: [PATCH 05/18] r8169: fix broken register writes
MIME-Version: 1.0
-Content-Type: text/plain; charset=utf8
+Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
This is quite similar to b39fe41f481d20c201012e4483e76c203802dda7
@@ -14,19 +14,20 @@ level before being merged into a 64 bit logical entity.
Credits go to Ben Hutchings <ben@decadent.org.uk> for the MAR
registers (aka "multicast is broken for ages on ARM) and to
-Timo Teräs <timo.teras@iki.fi> for the MAC registers.
+Timo Teräs <timo.teras@iki.fi> for the MAC registers.
Signed-off-by: Francois Romieu <romieu@fr.zoreil.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 78f1cd02457252e1ffbc6caa44a17424a45286b8)
---
drivers/net/r8169.c | 4 ++--
1 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
-index b93fd23..7193afc 100644
+index 0fe2fc9..24599b5 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
-@@ -2820,8 +2820,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
+@@ -2827,8 +2827,8 @@ static void rtl_rar_set(struct rtl8169_private *tp, u8 *addr)
spin_lock_irq(&tp->lock);
RTL_W8(Cfg9346, Cfg9346_Unlock);
@@ -36,7 +37,7 @@ index b93fd23..7193afc 100644
RTL_W8(Cfg9346, Cfg9346_Lock);
spin_unlock_irq(&tp->lock);
-@@ -4747,8 +4747,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
+@@ -4795,8 +4795,8 @@ static void rtl_set_rx_mode(struct net_device *dev)
mc_filter[1] = swab32(data);
}
@@ -47,5 +48,5 @@ index b93fd23..7193afc 100644
RTL_W32(RxConfig, tmp);
--
-1.7.0.3
+1.7.0.2
diff --git a/main/linux-pae/net-git-c0cd88-r8169-offical-fix-for-CVE-2009-4537-overlength-frame-DMAs.patch b/main/linux-pae/0006-r8169-offical-fix-for-CVE-2009-4537-overlength-frame.patch
index 250c85d6..03ea13fa 100644
--- a/main/linux-pae/net-git-c0cd88-r8169-offical-fix-for-CVE-2009-4537-overlength-frame-DMAs.patch
+++ b/main/linux-pae/0006-r8169-offical-fix-for-CVE-2009-4537-overlength-frame.patch
@@ -1,7 +1,7 @@
-From c0cd884af045338476b8e69a61fceb3f34ff22f1 Mon Sep 17 00:00:00 2001
+From a60cfaf3df9cd0cddbc24695434ed5bfa917d505 Mon Sep 17 00:00:00 2001
From: Neil Horman <nhorman@redhat.com>
Date: Mon, 29 Mar 2010 13:16:02 -0700
-Subject: [PATCH] r8169: offical fix for CVE-2009-4537 (overlength frame DMAs)
+Subject: [PATCH 06/18] r8169: offical fix for CVE-2009-4537 (overlength frame DMAs)
Official patch to fix the r8169 frame length check error.
@@ -48,15 +48,16 @@ such that performance is restored easily.
Signed-off-by: Neil Horman <nhorman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit c0cd884af045338476b8e69a61fceb3f34ff22f1)
---
drivers/net/r8169.c | 29 ++++++++++++++++++++++++-----
1 files changed, 24 insertions(+), 5 deletions(-)
diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
-index 7193afc..9674005 100644
+index 24599b5..1484528 100644
--- a/drivers/net/r8169.c
+++ b/drivers/net/r8169.c
-@@ -186,7 +186,12 @@ static DEFINE_PCI_DEVICE_TABLE(rtl8169_pci_tbl) = {
+@@ -186,7 +186,12 @@ static struct pci_device_id rtl8169_pci_tbl[] = {
MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
@@ -70,7 +71,7 @@ index 7193afc..9674005 100644
static int use_dac;
static struct {
u32 msg_enable;
-@@ -3217,9 +3222,13 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
+@@ -3245,9 +3250,13 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev)
}
static void rtl8169_set_rxbufsize(struct rtl8169_private *tp,
@@ -86,7 +87,7 @@ index 7193afc..9674005 100644
tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE;
}
-@@ -3231,7 +3240,17 @@ static int rtl8169_open(struct net_device *dev)
+@@ -3259,7 +3268,17 @@ static int rtl8169_open(struct net_device *dev)
int retval = -ENOMEM;
@@ -105,7 +106,7 @@ index 7193afc..9674005 100644
/*
* Rx and Tx desscriptors needs 256 bytes alignment.
-@@ -3884,7 +3903,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
+@@ -3912,7 +3931,7 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
rtl8169_down(dev);
@@ -115,5 +116,5 @@ index 7193afc..9674005 100644
ret = rtl8169_init_ring(dev);
if (ret < 0)
--
-1.7.0.3
+1.7.0.2
diff --git a/main/linux-pae/0007-r8169-Fix-rtl8169_rx_interrupt.patch b/main/linux-pae/0007-r8169-Fix-rtl8169_rx_interrupt.patch
new file mode 100644
index 00000000..fad27232
--- /dev/null
+++ b/main/linux-pae/0007-r8169-Fix-rtl8169_rx_interrupt.patch
@@ -0,0 +1,89 @@
+From 26654a966adb674afc30d285f7e79535d03c2492 Mon Sep 17 00:00:00 2001
+From: Eric Dumazet <eric.dumazet@gmail.com>
+Date: Wed, 31 Mar 2010 02:08:31 +0000
+Subject: [PATCH 07/18] r8169: Fix rtl8169_rx_interrupt()
+
+In case a reset is performed, rtl8169_rx_interrupt() is called from
+process context instead of softirq context. Special care must be taken
+to call appropriate network core services (netif_rx() instead of
+netif_receive_skb()). VLAN handling also corrected.
+
+Reported-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Tested-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
+Diagnosed-by: Oleg Nesterov <oleg@redhat.com>
+Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 630b943c182d1aed69f244405131902fbcba7ec6)
+---
+ drivers/net/r8169.c | 22 +++++++++++++++++-----
+ 1 files changed, 17 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
+index 1484528..bed1d47 100644
+--- a/drivers/net/r8169.c
++++ b/drivers/net/r8169.c
+@@ -1047,14 +1047,14 @@ static void rtl8169_vlan_rx_register(struct net_device *dev,
+ }
+
+ static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
+- struct sk_buff *skb)
++ struct sk_buff *skb, int polling)
+ {
+ u32 opts2 = le32_to_cpu(desc->opts2);
+ struct vlan_group *vlgrp = tp->vlgrp;
+ int ret;
+
+ if (vlgrp && (opts2 & RxVlanTag)) {
+- vlan_hwaccel_receive_skb(skb, vlgrp, swab16(opts2 & 0xffff));
++ __vlan_hwaccel_rx(skb, vlgrp, swab16(opts2 & 0xffff), polling);
+ ret = 0;
+ } else
+ ret = -1;
+@@ -1071,7 +1071,7 @@ static inline u32 rtl8169_tx_vlan_tag(struct rtl8169_private *tp,
+ }
+
+ static int rtl8169_rx_vlan_skb(struct rtl8169_private *tp, struct RxDesc *desc,
+- struct sk_buff *skb)
++ struct sk_buff *skb, int polling)
+ {
+ return -1;
+ }
+@@ -4480,12 +4480,20 @@ out:
+ return done;
+ }
+
++/*
++ * Warning : rtl8169_rx_interrupt() might be called :
++ * 1) from NAPI (softirq) context
++ * (polling = 1 : we should call netif_receive_skb())
++ * 2) from process context (rtl8169_reset_task())
++ * (polling = 0 : we must call netif_rx() instead)
++ */
+ static int rtl8169_rx_interrupt(struct net_device *dev,
+ struct rtl8169_private *tp,
+ void __iomem *ioaddr, u32 budget)
+ {
+ unsigned int cur_rx, rx_left;
+ unsigned int delta, count;
++ int polling = (budget != ~(u32)0) ? 1 : 0;
+
+ cur_rx = tp->cur_rx;
+ rx_left = NUM_RX_DESC + tp->dirty_rx - cur_rx;
+@@ -4550,8 +4558,12 @@ static int rtl8169_rx_interrupt(struct net_device *dev,
+ skb_put(skb, pkt_size);
+ skb->protocol = eth_type_trans(skb, dev);
+
+- if (rtl8169_rx_vlan_skb(tp, desc, skb) < 0)
+- netif_receive_skb(skb);
++ if (rtl8169_rx_vlan_skb(tp, desc, skb, polling) < 0) {
++ if (likely(polling))
++ netif_receive_skb(skb);
++ else
++ netif_rx(skb);
++ }
+
+ dev->stats.rx_bytes += pkt_size;
+ dev->stats.rx_packets++;
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0008-r8169-clean-up-my-printk-uglyness.patch b/main/linux-pae/0008-r8169-clean-up-my-printk-uglyness.patch
new file mode 100644
index 00000000..dff3fd21
--- /dev/null
+++ b/main/linux-pae/0008-r8169-clean-up-my-printk-uglyness.patch
@@ -0,0 +1,36 @@
+From d1c9ac562923fa0b1738fceb4c7bafac3ab936ba Mon Sep 17 00:00:00 2001
+From: Neil Horman <nhorman@tuxdriver.com>
+Date: Thu, 1 Apr 2010 07:30:07 +0000
+Subject: [PATCH 08/18] r8169: clean up my printk uglyness
+
+Fix formatting on r8169 printk
+
+Brandon Philips noted that I had a spacing issue in my printk for the
+last r8169 patch that made it quite ugly. Fix that up and add the PFX
+macro to it as well so it looks like the other r8169 printks
+
+Signed-off-by: Neil Horman <nhorman@tuxdriver.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 93f4d91d879acfcb0ba9c2725e3133fcff2dfd1e)
+---
+ drivers/net/r8169.c | 4 ++--
+ 1 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c
+index bed1d47..790555e 100644
+--- a/drivers/net/r8169.c
++++ b/drivers/net/r8169.c
+@@ -3255,8 +3255,8 @@ static void rtl8169_set_rxbufsize(struct rtl8169_private *tp,
+ unsigned int max_frame = mtu + VLAN_ETH_HLEN + ETH_FCS_LEN;
+
+ if (max_frame != 16383)
+- printk(KERN_WARNING "WARNING! Changing of MTU on this NIC"
+- "May lead to frame reception errors!\n");
++ printk(KERN_WARNING PFX "WARNING! Changing of MTU on this "
++ "NIC may lead to frame reception errors!\n");
+
+ tp->rx_buf_sz = (max_frame > RX_BUF_SIZE) ? max_frame : RX_BUF_SIZE;
+ }
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0009-ipsec-Fix-bogus-bundle-flowi.patch b/main/linux-pae/0009-ipsec-Fix-bogus-bundle-flowi.patch
new file mode 100644
index 00000000..d4de0e1d
--- /dev/null
+++ b/main/linux-pae/0009-ipsec-Fix-bogus-bundle-flowi.patch
@@ -0,0 +1,110 @@
+From 21ee14f92ef1b6d4ca965c9b59135f3462919631 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Tue, 2 Mar 2010 02:51:56 +0000
+Subject: [PATCH 09/18] ipsec: Fix bogus bundle flowi
+
+When I merged the bundle creation code, I introduced a bogus
+flowi value in the bundle. Instead of getting from the caller,
+it was instead set to the flow in the route object, which is
+totally different.
+
+The end result is that the bundles we created never match, and
+we instead end up with an ever growing bundle list.
+
+Thanks to Jamal for find this problem.
+
+Reported-by: Jamal Hadi Salim <hadi@cyberus.ca>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Acked-by: Steffen Klassert <steffen.klassert@secunet.com>
+Acked-by: Jamal Hadi Salim <hadi@cyberus.ca>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 87c1e12b5eeb7b30b4b41291bef8e0b41fc3dde9)
+---
+ include/net/xfrm.h | 3 ++-
+ net/ipv4/xfrm4_policy.c | 5 +++--
+ net/ipv6/xfrm6_policy.c | 3 ++-
+ net/xfrm/xfrm_policy.c | 7 ++++---
+ 4 files changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 223e90a..6960be2 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -273,7 +273,8 @@ struct xfrm_policy_afinfo {
+ struct dst_entry *dst,
+ int nfheader_len);
+ int (*fill_dst)(struct xfrm_dst *xdst,
+- struct net_device *dev);
++ struct net_device *dev,
++ struct flowi *fl);
+ };
+
+ extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 74fb2eb..7009886 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -92,11 +92,12 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ return 0;
+ }
+
+-static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
++static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
++ struct flowi *fl)
+ {
+ struct rtable *rt = (struct rtable *)xdst->route;
+
+- xdst->u.rt.fl = rt->fl;
++ xdst->u.rt.fl = *fl;
+
+ xdst->u.dst.dev = dev;
+ dev_hold(dev);
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 8ec3d45..3f89ab7 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -117,7 +117,8 @@ static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ return 0;
+ }
+
+-static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
++static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
++ struct flowi *fl)
+ {
+ struct rt6_info *rt = (struct rt6_info*)xdst->route;
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index cb81ca3..d75047c 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -1341,7 +1341,8 @@ static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
+ return err;
+ }
+
+-static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
++static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
++ struct flowi *fl)
+ {
+ struct xfrm_policy_afinfo *afinfo =
+ xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
+@@ -1350,7 +1351,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
+ if (!afinfo)
+ return -EINVAL;
+
+- err = afinfo->fill_dst(xdst, dev);
++ err = afinfo->fill_dst(xdst, dev, fl);
+
+ xfrm_policy_put_afinfo(afinfo);
+
+@@ -1454,7 +1455,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
+ for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
+ struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
+
+- err = xfrm_fill_dst(xdst, dev);
++ err = xfrm_fill_dst(xdst, dev, fl);
+ if (err)
+ goto free_dst;
+
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0010-xfrm-Remove-xfrm_state_genid.patch b/main/linux-pae/0010-xfrm-Remove-xfrm_state_genid.patch
new file mode 100644
index 00000000..8cfffd73
--- /dev/null
+++ b/main/linux-pae/0010-xfrm-Remove-xfrm_state_genid.patch
@@ -0,0 +1,54 @@
+From f2c59932757a06851bb740dc757ce2ba1961fc08 Mon Sep 17 00:00:00 2001
+From: Herbert Xu <herbert@gondor.apana.org.au>
+Date: Wed, 31 Mar 2010 01:19:49 +0000
+Subject: [PATCH 10/18] xfrm: Remove xfrm_state_genid
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The xfrm state genid only needs to be matched against the copy
+saved in xfrm_dst. So we don't need a global genid at all. In
+fact, we don't even need to initialise it.
+
+Based on observation by Timo Teräs.
+
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 34996cb91dd72f0b0456d8fd3fef4aaee62232f2)
+---
+ net/xfrm/xfrm_state.c | 5 +----
+ 1 files changed, 1 insertions(+), 4 deletions(-)
+
+diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+index f2f7c63..8ee733f 100644
+--- a/net/xfrm/xfrm_state.c
++++ b/net/xfrm/xfrm_state.c
+@@ -34,7 +34,6 @@
+ static DEFINE_SPINLOCK(xfrm_state_lock);
+
+ static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
+-static unsigned int xfrm_state_genid;
+
+ static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
+ static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
+@@ -903,8 +902,6 @@ static void __xfrm_state_insert(struct xfrm_state *x)
+ struct net *net = xs_net(x);
+ unsigned int h;
+
+- x->genid = ++xfrm_state_genid;
+-
+ list_add(&x->km.all, &net->xfrm.state_all);
+
+ h = xfrm_dst_hash(net, &x->id.daddr, &x->props.saddr,
+@@ -948,7 +945,7 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
+ x->props.reqid == reqid &&
+ !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
+ !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
+- x->genid = xfrm_state_genid;
++ x->genid++;
+ }
+ }
+
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch b/main/linux-pae/0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch
new file mode 100644
index 00000000..ae2a0f91
--- /dev/null
+++ b/main/linux-pae/0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch
@@ -0,0 +1,35 @@
+From 5b3e87bccb0e48f2f8b78695e949c015a3695f8e Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 31 Mar 2010 00:17:04 +0000
+Subject: [PATCH 11/18] xfrm_user: verify policy direction at XFRM_MSG_POLEXPIRE handler
+
+Add missing check for policy direction verification. This is
+especially important since without this xfrm_user may end up
+deleting per-socket policy which is not allowed.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit c8bf4d04f970fafb3430d332533e1cf103f2a018)
+---
+ net/xfrm/xfrm_user.c | 4 ++++
+ 1 files changed, 4 insertions(+), 0 deletions(-)
+
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index b95a2d6..d1e9ee3 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1589,6 +1589,10 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
+ if (err)
+ return err;
+
++ err = verify_policy_dir(p->dir);
++ if (err)
++ return err;
++
+ if (p->index)
+ xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err);
+ else {
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch b/main/linux-pae/0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch
new file mode 100644
index 00000000..222caadd
--- /dev/null
+++ b/main/linux-pae/0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch
@@ -0,0 +1,105 @@
+From 7a400eb025dd53883c3560d0fdb069542f7ad3db Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 31 Mar 2010 00:17:05 +0000
+Subject: [PATCH 12/18] xfrm: remove policy lock when accessing policy->walk.dead
+
+All of the code considers ->dead as a hint that the cached policy
+needs to get refreshed. The read side can just drop the read lock
+without any side effects.
+
+The write side needs to make sure that it's written only exactly
+once. Only possible race is at xfrm_policy_kill(). This is fixed
+by checking result of __xfrm_policy_unlink() when needed. It will
+always succeed if the policy object is looked up from the hash
+list (so some checks are removed), but it needs to be checked if
+we are trying to unlink policy via a reference (appropriate
+checks added).
+
+Since policy->walk.dead is written exactly once, it no longer
+needs to be protected with a write lock.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(backported from commit ea2dea9dacc256fe927857feb423872051642ae7)
+---
+ net/xfrm/xfrm_policy.c | 20 +++++---------------
+ net/xfrm/xfrm_user.c | 6 +-----
+ 2 files changed, 6 insertions(+), 20 deletions(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index d75047c..110184f 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -156,7 +156,7 @@ static void xfrm_policy_timer(unsigned long data)
+
+ read_lock(&xp->lock);
+
+- if (xp->walk.dead)
++ if (unlikely(xp->walk.dead))
+ goto out;
+
+ dir = xfrm_policy_id2dir(xp->index);
+@@ -297,17 +297,7 @@ static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+
+ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ {
+- int dead;
+-
+- write_lock_bh(&policy->lock);
+- dead = policy->walk.dead;
+ policy->walk.dead = 1;
+- write_unlock_bh(&policy->lock);
+-
+- if (unlikely(dead)) {
+- WARN_ON(1);
+- return;
+- }
+
+ spin_lock_bh(&xfrm_policy_gc_lock);
+ hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
+@@ -1115,6 +1105,9 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
+ __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
+ }
+ if (old_pol)
++ /* Unlinking succeeds always. This is the only function
++ * allowed to delete or replace socket policy.
++ */
+ __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
+ write_unlock_bh(&xfrm_policy_lock);
+
+@@ -1705,11 +1698,8 @@ restart:
+ goto error;
+ }
+
+- for (pi = 0; pi < npols; pi++) {
+- read_lock_bh(&pols[pi]->lock);
++ for (pi = 0; pi < npols; pi++)
+ pol_dead |= pols[pi]->walk.dead;
+- read_unlock_bh(&pols[pi]->lock);
+- }
+
+ write_lock_bh(&policy->lock);
+ if (unlikely(pol_dead || stale_bundle(dst))) {
+diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+index d1e9ee3..f9c56e9 100644
+--- a/net/xfrm/xfrm_user.c
++++ b/net/xfrm/xfrm_user.c
+@@ -1617,13 +1617,9 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh,
+ if (xp == NULL)
+ return -ENOENT;
+
+- read_lock(&xp->lock);
+- if (xp->walk.dead) {
+- read_unlock(&xp->lock);
++ if (unlikely(xp->walk.dead))
+ goto out;
+- }
+
+- read_unlock(&xp->lock);
+ err = 0;
+ if (up->hard) {
+ uid_t loginuid = NETLINK_CB(skb).loginuid;
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0013-flow-structurize-flow-cache.patch b/main/linux-pae/0013-flow-structurize-flow-cache.patch
new file mode 100644
index 00000000..68fa753a
--- /dev/null
+++ b/main/linux-pae/0013-flow-structurize-flow-cache.patch
@@ -0,0 +1,395 @@
+From 884f6e44f0b405c06bd234b14cc228482291bb38 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 31 Mar 2010 00:17:06 +0000
+Subject: [PATCH 13/18] flow: structurize flow cache
+
+Group all per-cpu data to one structure instead of having many
+globals. Also prepare the internals so that we can have multiple
+instances of the flow cache if needed.
+
+Only the kmem_cache is left as a global as all flow caches share
+the same element size, and benefit from using a common cache.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit d7997fe1f4584da12e9c29fb682c18e9bdc13b73)
+---
+ net/core/flow.c | 223 +++++++++++++++++++++++++++++--------------------------
+ 1 files changed, 119 insertions(+), 104 deletions(-)
+
+diff --git a/net/core/flow.c b/net/core/flow.c
+index 9601587..1d27ca6 100644
+--- a/net/core/flow.c
++++ b/net/core/flow.c
+@@ -35,104 +35,105 @@ struct flow_cache_entry {
+ atomic_t *object_ref;
+ };
+
+-atomic_t flow_cache_genid = ATOMIC_INIT(0);
+-
+-static u32 flow_hash_shift;
+-#define flow_hash_size (1 << flow_hash_shift)
+-static DEFINE_PER_CPU(struct flow_cache_entry **, flow_tables) = { NULL };
+-
+-#define flow_table(cpu) (per_cpu(flow_tables, cpu))
+-
+-static struct kmem_cache *flow_cachep __read_mostly;
+-
+-static int flow_lwm, flow_hwm;
+-
+-struct flow_percpu_info {
+- int hash_rnd_recalc;
+- u32 hash_rnd;
+- int count;
++struct flow_cache_percpu {
++ struct flow_cache_entry ** hash_table;
++ int hash_count;
++ u32 hash_rnd;
++ int hash_rnd_recalc;
++ struct tasklet_struct flush_tasklet;
+ };
+-static DEFINE_PER_CPU(struct flow_percpu_info, flow_hash_info) = { 0 };
+-
+-#define flow_hash_rnd_recalc(cpu) \
+- (per_cpu(flow_hash_info, cpu).hash_rnd_recalc)
+-#define flow_hash_rnd(cpu) \
+- (per_cpu(flow_hash_info, cpu).hash_rnd)
+-#define flow_count(cpu) \
+- (per_cpu(flow_hash_info, cpu).count)
+-
+-static struct timer_list flow_hash_rnd_timer;
+-
+-#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
+
+ struct flow_flush_info {
+- atomic_t cpuleft;
+- struct completion completion;
++ struct flow_cache * cache;
++ atomic_t cpuleft;
++ struct completion completion;
+ };
+-static DEFINE_PER_CPU(struct tasklet_struct, flow_flush_tasklets) = { NULL };
+
+-#define flow_flush_tasklet(cpu) (&per_cpu(flow_flush_tasklets, cpu))
++struct flow_cache {
++ u32 hash_shift;
++ unsigned long order;
++ struct flow_cache_percpu * percpu;
++ struct notifier_block hotcpu_notifier;
++ int low_watermark;
++ int high_watermark;
++ struct timer_list rnd_timer;
++};
++
++atomic_t flow_cache_genid = ATOMIC_INIT(0);
++static struct flow_cache flow_cache_global;
++static struct kmem_cache *flow_cachep;
++
++#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
++#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
+
+ static void flow_cache_new_hashrnd(unsigned long arg)
+ {
++ struct flow_cache *fc = (void *) arg;
+ int i;
+
+ for_each_possible_cpu(i)
+- flow_hash_rnd_recalc(i) = 1;
++ per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1;
+
+- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+- add_timer(&flow_hash_rnd_timer);
++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++ add_timer(&fc->rnd_timer);
+ }
+
+-static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
++static void flow_entry_kill(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp,
++ struct flow_cache_entry *fle)
+ {
+ if (fle->object)
+ atomic_dec(fle->object_ref);
+ kmem_cache_free(flow_cachep, fle);
+- flow_count(cpu)--;
++ fcp->hash_count--;
+ }
+
+-static void __flow_cache_shrink(int cpu, int shrink_to)
++static void __flow_cache_shrink(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp,
++ int shrink_to)
+ {
+ struct flow_cache_entry *fle, **flp;
+ int i;
+
+- for (i = 0; i < flow_hash_size; i++) {
++ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ int k = 0;
+
+- flp = &flow_table(cpu)[i];
++ flp = &fcp->hash_table[i];
+ while ((fle = *flp) != NULL && k < shrink_to) {
+ k++;
+ flp = &fle->next;
+ }
+ while ((fle = *flp) != NULL) {
+ *flp = fle->next;
+- flow_entry_kill(cpu, fle);
++ flow_entry_kill(fc, fcp, fle);
+ }
+ }
+ }
+
+-static void flow_cache_shrink(int cpu)
++static void flow_cache_shrink(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
+ {
+- int shrink_to = flow_lwm / flow_hash_size;
++ int shrink_to = fc->low_watermark / flow_cache_hash_size(fc);
+
+- __flow_cache_shrink(cpu, shrink_to);
++ __flow_cache_shrink(fc, fcp, shrink_to);
+ }
+
+-static void flow_new_hash_rnd(int cpu)
++static void flow_new_hash_rnd(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
+ {
+- get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
+- flow_hash_rnd_recalc(cpu) = 0;
+-
+- __flow_cache_shrink(cpu, 0);
++ get_random_bytes(&fcp->hash_rnd, sizeof(u32));
++ fcp->hash_rnd_recalc = 0;
++ __flow_cache_shrink(fc, fcp, 0);
+ }
+
+-static u32 flow_hash_code(struct flowi *key, int cpu)
++static u32 flow_hash_code(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp,
++ struct flowi *key)
+ {
+ u32 *k = (u32 *) key;
+
+- return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
+- (flow_hash_size - 1));
++ return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd)
++ & (flow_cache_hash_size(fc) - 1));
+ }
+
+ #if (BITS_PER_LONG == 64)
+@@ -168,24 +169,25 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ flow_resolve_t resolver)
+ {
++ struct flow_cache *fc = &flow_cache_global;
++ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle, **head;
+ unsigned int hash;
+- int cpu;
+
+ local_bh_disable();
+- cpu = smp_processor_id();
++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+
+ fle = NULL;
+ /* Packet really early in init? Making flow_cache_init a
+ * pre-smp initcall would solve this. --RR */
+- if (!flow_table(cpu))
++ if (!fcp->hash_table)
+ goto nocache;
+
+- if (flow_hash_rnd_recalc(cpu))
+- flow_new_hash_rnd(cpu);
+- hash = flow_hash_code(key, cpu);
++ if (fcp->hash_rnd_recalc)
++ flow_new_hash_rnd(fc, fcp);
++ hash = flow_hash_code(fc, fcp, key);
+
+- head = &flow_table(cpu)[hash];
++ head = &fcp->hash_table[hash];
+ for (fle = *head; fle; fle = fle->next) {
+ if (fle->family == family &&
+ fle->dir == dir &&
+@@ -204,8 +206,8 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ }
+
+ if (!fle) {
+- if (flow_count(cpu) > flow_hwm)
+- flow_cache_shrink(cpu);
++ if (fcp->hash_count > fc->high_watermark)
++ flow_cache_shrink(fc, fcp);
+
+ fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+ if (fle) {
+@@ -215,7 +217,7 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ fle->dir = dir;
+ memcpy(&fle->key, key, sizeof(*key));
+ fle->object = NULL;
+- flow_count(cpu)++;
++ fcp->hash_count++;
+ }
+ }
+
+@@ -249,14 +251,15 @@ nocache:
+ static void flow_cache_flush_tasklet(unsigned long data)
+ {
+ struct flow_flush_info *info = (void *)data;
++ struct flow_cache *fc = info->cache;
++ struct flow_cache_percpu *fcp;
+ int i;
+- int cpu;
+
+- cpu = smp_processor_id();
+- for (i = 0; i < flow_hash_size; i++) {
++ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
++ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ struct flow_cache_entry *fle;
+
+- fle = flow_table(cpu)[i];
++ fle = fcp->hash_table[i];
+ for (; fle; fle = fle->next) {
+ unsigned genid = atomic_read(&flow_cache_genid);
+
+@@ -272,7 +275,6 @@ static void flow_cache_flush_tasklet(unsigned long data)
+ complete(&info->completion);
+ }
+
+-static void flow_cache_flush_per_cpu(void *) __attribute__((__unused__));
+ static void flow_cache_flush_per_cpu(void *data)
+ {
+ struct flow_flush_info *info = data;
+@@ -280,8 +282,7 @@ static void flow_cache_flush_per_cpu(void *data)
+ struct tasklet_struct *tasklet;
+
+ cpu = smp_processor_id();
+-
+- tasklet = flow_flush_tasklet(cpu);
++ tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet;
+ tasklet->data = (unsigned long)info;
+ tasklet_schedule(tasklet);
+ }
+@@ -294,6 +295,7 @@ void flow_cache_flush(void)
+ /* Don't want cpus going down or up during this. */
+ get_online_cpus();
+ mutex_lock(&flow_flush_sem);
++ info.cache = &flow_cache_global;
+ atomic_set(&info.cpuleft, num_online_cpus());
+ init_completion(&info.completion);
+
+@@ -307,62 +309,75 @@ void flow_cache_flush(void)
+ put_online_cpus();
+ }
+
+-static void __init flow_cache_cpu_prepare(int cpu)
++static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
++ struct flow_cache_percpu *fcp)
+ {
+- struct tasklet_struct *tasklet;
+- unsigned long order;
+-
+- for (order = 0;
+- (PAGE_SIZE << order) <
+- (sizeof(struct flow_cache_entry *)*flow_hash_size);
+- order++)
+- /* NOTHING */;
+-
+- flow_table(cpu) = (struct flow_cache_entry **)
+- __get_free_pages(GFP_KERNEL|__GFP_ZERO, order);
+- if (!flow_table(cpu))
+- panic("NET: failed to allocate flow cache order %lu\n", order);
+-
+- flow_hash_rnd_recalc(cpu) = 1;
+- flow_count(cpu) = 0;
+-
+- tasklet = flow_flush_tasklet(cpu);
+- tasklet_init(tasklet, flow_cache_flush_tasklet, 0);
++ fcp->hash_table = (struct flow_cache_entry **)
++ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
++ if (!fcp->hash_table)
++ panic("NET: failed to allocate flow cache order %lu\n", fc->order);
++
++ fcp->hash_rnd_recalc = 1;
++ fcp->hash_count = 0;
++ tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0);
+ }
+
+ static int flow_cache_cpu(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+ {
++ struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
++ int cpu = (unsigned long) hcpu;
++ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
++
+ if (action == CPU_DEAD || action == CPU_DEAD_FROZEN)
+- __flow_cache_shrink((unsigned long)hcpu, 0);
++ __flow_cache_shrink(fc, fcp, 0);
+ return NOTIFY_OK;
+ }
+
+-static int __init flow_cache_init(void)
++static int flow_cache_init(struct flow_cache *fc)
+ {
++ unsigned long order;
+ int i;
+
+- flow_cachep = kmem_cache_create("flow_cache",
+- sizeof(struct flow_cache_entry),
+- 0, SLAB_PANIC,
+- NULL);
+- flow_hash_shift = 10;
+- flow_lwm = 2 * flow_hash_size;
+- flow_hwm = 4 * flow_hash_size;
++ fc->hash_shift = 10;
++ fc->low_watermark = 2 * flow_cache_hash_size(fc);
++ fc->high_watermark = 4 * flow_cache_hash_size(fc);
++
++ for (order = 0;
++ (PAGE_SIZE << order) <
++ (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
++ order++)
++ /* NOTHING */;
++ fc->order = order;
++ fc->percpu = alloc_percpu(struct flow_cache_percpu);
+
+- setup_timer(&flow_hash_rnd_timer, flow_cache_new_hashrnd, 0);
+- flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
+- add_timer(&flow_hash_rnd_timer);
++ setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd,
++ (unsigned long) fc);
++ fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++ add_timer(&fc->rnd_timer);
+
+ for_each_possible_cpu(i)
+- flow_cache_cpu_prepare(i);
++ flow_cache_cpu_prepare(fc, per_cpu_ptr(fc->percpu, i));
++
++ fc->hotcpu_notifier = (struct notifier_block){
++ .notifier_call = flow_cache_cpu,
++ };
++ register_hotcpu_notifier(&fc->hotcpu_notifier);
+
+- hotcpu_notifier(flow_cache_cpu, 0);
+ return 0;
+ }
+
+-module_init(flow_cache_init);
++static int __init flow_cache_init_global(void)
++{
++ flow_cachep = kmem_cache_create("flow_cache",
++ sizeof(struct flow_cache_entry),
++ 0, SLAB_PANIC, NULL);
++
++ return flow_cache_init(&flow_cache_global);
++}
++
++module_init(flow_cache_init_global);
+
+ EXPORT_SYMBOL(flow_cache_genid);
+ EXPORT_SYMBOL(flow_cache_lookup);
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0014-flow-virtualize-flow-cache-entry-methods.patch b/main/linux-pae/0014-flow-virtualize-flow-cache-entry-methods.patch
new file mode 100644
index 00000000..5c4a9ea5
--- /dev/null
+++ b/main/linux-pae/0014-flow-virtualize-flow-cache-entry-methods.patch
@@ -0,0 +1,513 @@
+From d56cd1c538e5448fe43acc69991aa842f382a622 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 7 Apr 2010 00:30:04 +0000
+Subject: [PATCH 14/18] flow: virtualize flow cache entry methods
+
+This allows to validate the cached object before returning it.
+It also allows to destruct object properly, if the last reference
+was held in flow cache. This is also a prepartion for caching
+bundles in the flow cache.
+
+In return for virtualizing the methods, we save on:
+- not having to regenerate the whole flow cache on policy removal:
+ each flow matching a killed policy gets refreshed as the getter
+ function notices it smartly.
+- we do not have to call flow_cache_flush from policy gc, since the
+ flow cache now properly deletes the object if it had any references
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(backported from commit fe1a5f031e76bd8761a7803d75b95ee96e84a574)
+---
+ include/net/flow.h | 23 +++++++--
+ include/net/xfrm.h | 3 +
+ net/core/flow.c | 128 +++++++++++++++++++++++++----------------------
+ net/xfrm/xfrm_policy.c | 111 ++++++++++++++++++++++++++++--------------
+ 4 files changed, 164 insertions(+), 101 deletions(-)
+
+diff --git a/include/net/flow.h b/include/net/flow.h
+index 809970b..bb08692 100644
+--- a/include/net/flow.h
++++ b/include/net/flow.h
+@@ -86,11 +86,26 @@ struct flowi {
+
+ struct net;
+ struct sock;
+-typedef int (*flow_resolve_t)(struct net *net, struct flowi *key, u16 family,
+- u8 dir, void **objp, atomic_t **obj_refp);
++struct flow_cache_ops;
++
++struct flow_cache_object {
++ const struct flow_cache_ops *ops;
++};
++
++struct flow_cache_ops {
++ struct flow_cache_object *(*get)(struct flow_cache_object *);
++ int (*check)(struct flow_cache_object *);
++ void (*delete)(struct flow_cache_object *);
++};
++
++typedef struct flow_cache_object *(*flow_resolve_t)(
++ struct net *net, struct flowi *key, u16 family,
++ u8 dir, struct flow_cache_object *oldobj, void *ctx);
++
++extern struct flow_cache_object *flow_cache_lookup(
++ struct net *net, struct flowi *key, u16 family,
++ u8 dir, flow_resolve_t resolver, void *ctx);
+
+-extern void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family,
+- u8 dir, flow_resolve_t resolver);
+ extern void flow_cache_flush(void);
+ extern atomic_t flow_cache_genid;
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 6960be2..6023a48 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -19,6 +19,8 @@
+ #include <net/route.h>
+ #include <net/ipv6.h>
+ #include <net/ip6_fib.h>
++#include <net/flow.h>
++
+ #ifdef CONFIG_XFRM_STATISTICS
+ #include <net/snmp.h>
+ #endif
+@@ -482,6 +484,7 @@ struct xfrm_policy
+ atomic_t refcnt;
+ struct timer_list timer;
+
++ struct flow_cache_object flo;
+ u32 priority;
+ u32 index;
+ struct xfrm_selector selector;
+diff --git a/net/core/flow.c b/net/core/flow.c
+index 1d27ca6..521df52 100644
+--- a/net/core/flow.c
++++ b/net/core/flow.c
+@@ -26,17 +26,16 @@
+ #include <linux/security.h>
+
+ struct flow_cache_entry {
+- struct flow_cache_entry *next;
+- u16 family;
+- u8 dir;
+- u32 genid;
+- struct flowi key;
+- void *object;
+- atomic_t *object_ref;
++ struct flow_cache_entry *next;
++ u16 family;
++ u8 dir;
++ u32 genid;
++ struct flowi key;
++ struct flow_cache_object *object;
+ };
+
+ struct flow_cache_percpu {
+- struct flow_cache_entry ** hash_table;
++ struct flow_cache_entry **hash_table;
+ int hash_count;
+ u32 hash_rnd;
+ int hash_rnd_recalc;
+@@ -44,7 +43,7 @@ struct flow_cache_percpu {
+ };
+
+ struct flow_flush_info {
+- struct flow_cache * cache;
++ struct flow_cache *cache;
+ atomic_t cpuleft;
+ struct completion completion;
+ };
+@@ -52,7 +51,7 @@ struct flow_flush_info {
+ struct flow_cache {
+ u32 hash_shift;
+ unsigned long order;
+- struct flow_cache_percpu * percpu;
++ struct flow_cache_percpu *percpu;
+ struct notifier_block hotcpu_notifier;
+ int low_watermark;
+ int high_watermark;
+@@ -78,12 +77,21 @@ static void flow_cache_new_hashrnd(unsigned long arg)
+ add_timer(&fc->rnd_timer);
+ }
+
++static int flow_entry_valid(struct flow_cache_entry *fle)
++{
++ if (atomic_read(&flow_cache_genid) != fle->genid)
++ return 0;
++ if (fle->object && !fle->object->ops->check(fle->object))
++ return 0;
++ return 1;
++}
++
+ static void flow_entry_kill(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ struct flow_cache_entry *fle)
+ {
+ if (fle->object)
+- atomic_dec(fle->object_ref);
++ fle->object->ops->delete(fle->object);
+ kmem_cache_free(flow_cachep, fle);
+ fcp->hash_count--;
+ }
+@@ -96,16 +104,18 @@ static void __flow_cache_shrink(struct flow_cache *fc,
+ int i;
+
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+- int k = 0;
++ int saved = 0;
+
+ flp = &fcp->hash_table[i];
+- while ((fle = *flp) != NULL && k < shrink_to) {
+- k++;
+- flp = &fle->next;
+- }
+ while ((fle = *flp) != NULL) {
+- *flp = fle->next;
+- flow_entry_kill(fc, fcp, fle);
++ if (saved < shrink_to &&
++ flow_entry_valid(fle)) {
++ saved++;
++ flp = &fle->next;
++ } else {
++ *flp = fle->next;
++ flow_entry_kill(fc, fcp, fle);
++ }
+ }
+ }
+ }
+@@ -166,18 +176,21 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2)
+ return 0;
+ }
+
+-void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+- flow_resolve_t resolver)
++struct flow_cache_object *
++flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
++ flow_resolve_t resolver, void *ctx)
+ {
+ struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache_percpu *fcp;
+ struct flow_cache_entry *fle, **head;
++ struct flow_cache_object *flo;
+ unsigned int hash;
+
+ local_bh_disable();
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+
+ fle = NULL;
++ flo = NULL;
+ /* Packet really early in init? Making flow_cache_init a
+ * pre-smp initcall would solve this. --RR */
+ if (!fcp->hash_table)
+@@ -185,27 +198,17 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+
+ if (fcp->hash_rnd_recalc)
+ flow_new_hash_rnd(fc, fcp);
+- hash = flow_hash_code(fc, fcp, key);
+
++ hash = flow_hash_code(fc, fcp, key);
+ head = &fcp->hash_table[hash];
+ for (fle = *head; fle; fle = fle->next) {
+ if (fle->family == family &&
+ fle->dir == dir &&
+- flow_key_compare(key, &fle->key) == 0) {
+- if (fle->genid == atomic_read(&flow_cache_genid)) {
+- void *ret = fle->object;
+-
+- if (ret)
+- atomic_inc(fle->object_ref);
+- local_bh_enable();
+-
+- return ret;
+- }
++ flow_key_compare(key, &fle->key) == 0)
+ break;
+- }
+ }
+
+- if (!fle) {
++ if (unlikely(!fle)) {
+ if (fcp->hash_count > fc->high_watermark)
+ flow_cache_shrink(fc, fcp);
+
+@@ -219,33 +222,39 @@ void *flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ fle->object = NULL;
+ fcp->hash_count++;
+ }
++ } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
++ flo = fle->object;
++ if (!flo)
++ goto ret_object;
++ flo = flo->ops->get(flo);
++ if (flo)
++ goto ret_object;
++ } else if (fle->object) {
++ flo = fle->object;
++ flo->ops->delete(flo);
++ fle->object = NULL;
+ }
+
+ nocache:
+- {
+- int err;
+- void *obj;
+- atomic_t *obj_ref;
+-
+- err = resolver(net, key, family, dir, &obj, &obj_ref);
+-
+- if (fle && !err) {
+- fle->genid = atomic_read(&flow_cache_genid);
+-
+- if (fle->object)
+- atomic_dec(fle->object_ref);
+-
+- fle->object = obj;
+- fle->object_ref = obj_ref;
+- if (obj)
+- atomic_inc(fle->object_ref);
+- }
+- local_bh_enable();
+-
+- if (err)
+- obj = ERR_PTR(err);
+- return obj;
++ flo = NULL;
++ if (fle) {
++ flo = fle->object;
++ fle->object = NULL;
++ }
++ flo = resolver(net, key, family, dir, flo, ctx);
++ if (fle) {
++ fle->genid = atomic_read(&flow_cache_genid);
++ if (!IS_ERR(flo))
++ fle->object = flo;
++ else
++ fle->genid--;
++ } else {
++ if (flo && !IS_ERR(flo))
++ flo->ops->delete(flo);
+ }
++ret_object:
++ local_bh_enable();
++ return flo;
+ }
+
+ static void flow_cache_flush_tasklet(unsigned long data)
+@@ -261,13 +270,12 @@ static void flow_cache_flush_tasklet(unsigned long data)
+
+ fle = fcp->hash_table[i];
+ for (; fle; fle = fle->next) {
+- unsigned genid = atomic_read(&flow_cache_genid);
+-
+- if (!fle->object || fle->genid == genid)
++ if (flow_entry_valid(fle))
+ continue;
+
++ if (fle->object)
++ fle->object->ops->delete(fle->object);
+ fle->object = NULL;
+- atomic_dec(fle->object_ref);
+ }
+ }
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 110184f..d1eb2b5 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -216,6 +216,35 @@ expired:
+ xfrm_pol_put(xp);
+ }
+
++static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
++{
++ struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
++
++ if (unlikely(pol->walk.dead))
++ flo = NULL;
++ else
++ xfrm_pol_hold(pol);
++
++ return flo;
++}
++
++static int xfrm_policy_flo_check(struct flow_cache_object *flo)
++{
++ struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
++
++ return !pol->walk.dead;
++}
++
++static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
++{
++ xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
++}
++
++static const struct flow_cache_ops xfrm_policy_fc_ops = {
++ .get = xfrm_policy_flo_get,
++ .check = xfrm_policy_flo_check,
++ .delete = xfrm_policy_flo_delete,
++};
+
+ /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
+ * SPD calls.
+@@ -236,6 +265,7 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
+ atomic_set(&policy->refcnt, 1);
+ setup_timer(&policy->timer, xfrm_policy_timer,
+ (unsigned long)policy);
++ policy->flo.ops = &xfrm_policy_fc_ops;
+ }
+ return policy;
+ }
+@@ -269,9 +299,6 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+ if (del_timer(&policy->timer))
+ atomic_dec(&policy->refcnt);
+
+- if (atomic_read(&policy->refcnt) > 1)
+- flow_cache_flush();
+-
+ xfrm_pol_put(policy);
+ }
+
+@@ -658,10 +685,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir,
+ }
+ write_unlock_bh(&xfrm_policy_lock);
+
+- if (ret && delete) {
+- atomic_inc(&flow_cache_genid);
++ if (ret && delete)
+ xfrm_policy_kill(ret);
+- }
+ return ret;
+ }
+ EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
+@@ -699,10 +724,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id,
+ }
+ write_unlock_bh(&xfrm_policy_lock);
+
+- if (ret && delete) {
+- atomic_inc(&flow_cache_genid);
++ if (ret && delete)
+ xfrm_policy_kill(ret);
+- }
+ return ret;
+ }
+ EXPORT_SYMBOL(xfrm_policy_byid);
+@@ -967,32 +990,35 @@ fail:
+ return ret;
+ }
+
+-static int xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+- u8 dir, void **objp, atomic_t **obj_refp)
++static struct flow_cache_object *
++xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
++ u8 dir, struct flow_cache_object *old_obj, void *ctx)
+ {
+ struct xfrm_policy *pol;
+- int err = 0;
++
++ if (old_obj)
++ xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
+
+ #ifdef CONFIG_XFRM_SUB_POLICY
+ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+- if (IS_ERR(pol)) {
+- err = PTR_ERR(pol);
+- pol = NULL;
+- }
+- if (pol || err)
+- goto end;
++ if (IS_ERR(pol))
++ return ERR_CAST(pol);
++ if (pol)
++ goto found;
+ #endif
+ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+- if (IS_ERR(pol)) {
+- err = PTR_ERR(pol);
+- pol = NULL;
+- }
+-#ifdef CONFIG_XFRM_SUB_POLICY
+-end:
+-#endif
+- if ((*objp = (void *) pol) != NULL)
+- *obj_refp = &pol->refcnt;
+- return err;
++ if (IS_ERR(pol))
++ return ERR_CAST(pol);
++ if (pol)
++ goto found;
++ return NULL;
++
++found:
++ /* Resolver returns two references:
++ * one for cache and one for caller of flow_cache_lookup() */
++ xfrm_pol_hold(pol);
++
++ return &pol->flo;
+ }
+
+ static inline int policy_to_flow_dir(int dir)
+@@ -1077,8 +1103,6 @@ int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
+ pol = __xfrm_policy_unlink(pol, dir);
+ write_unlock_bh(&xfrm_policy_lock);
+ if (pol) {
+- if (dir < XFRM_POLICY_MAX)
+- atomic_inc(&flow_cache_genid);
+ xfrm_policy_kill(pol);
+ return 0;
+ }
+@@ -1549,18 +1573,24 @@ restart:
+ }
+
+ if (!policy) {
++ struct flow_cache_object *flo;
++
+ /* To accelerate a bit... */
+ if ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ goto nopol;
+
+- policy = flow_cache_lookup(net, fl, dst_orig->ops->family,
+- dir, xfrm_policy_lookup);
+- err = PTR_ERR(policy);
+- if (IS_ERR(policy)) {
++ flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
++ dir, xfrm_policy_lookup, NULL);
++ err = PTR_ERR(flo);
++ if (IS_ERR(flo)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+ goto dropdst;
+ }
++ if (flo)
++ policy = container_of(flo, struct xfrm_policy, flo);
++ else
++ policy = NULL;
+ }
+
+ if (!policy)
+@@ -1910,9 +1940,16 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
+ }
+ }
+
+- if (!pol)
+- pol = flow_cache_lookup(net, &fl, family, fl_dir,
+- xfrm_policy_lookup);
++ if (!pol) {
++ struct flow_cache_object *flo;
++
++ flo = flow_cache_lookup(net, &fl, family, fl_dir,
++ xfrm_policy_lookup, NULL);
++ if (flo == NULL || IS_ERR(flo))
++ pol = ERR_CAST(flo);
++ else
++ pol = container_of(flo, struct xfrm_policy, flo);
++ }
+
+ if (IS_ERR(pol)) {
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch b/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch
new file mode 100644
index 00000000..0d066c84
--- /dev/null
+++ b/main/linux-pae/0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch
@@ -0,0 +1,1068 @@
+From f89d21648e6dc06db2aeabc8926c270894c41446 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 7 Apr 2010 00:30:05 +0000
+Subject: [PATCH 15/18] xfrm: cache bundles instead of policies for outgoing flows
+
+__xfrm_lookup() is called for each packet transmitted out of
+system. The xfrm_find_bundle() does a linear search which can
+kill system performance depending on how many bundles are
+required per policy.
+
+This modifies __xfrm_lookup() to store bundles directly in
+the flow cache. If we did not get a hit, we just create a new
+bundle instead of doing slow search. This means that we can now
+get multiple xfrm_dst's for same flow (on per-cpu basis).
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(backported from commit 80c802f3073e84c956846e921e8a0b02dfa3755f)
+---
+ include/net/xfrm.h | 10 +-
+ net/ipv4/xfrm4_policy.c | 22 --
+ net/ipv6/xfrm6_policy.c | 31 --
+ net/xfrm/xfrm_policy.c | 710 +++++++++++++++++++++++++----------------------
+ 4 files changed, 383 insertions(+), 390 deletions(-)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index 6023a48..d51ef61 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -266,7 +266,6 @@ struct xfrm_policy_afinfo {
+ xfrm_address_t *saddr,
+ xfrm_address_t *daddr);
+ int (*get_saddr)(struct net *net, xfrm_address_t *saddr, xfrm_address_t *daddr);
+- struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
+ void (*decode_session)(struct sk_buff *skb,
+ struct flowi *fl,
+ int reverse);
+@@ -485,12 +484,12 @@ struct xfrm_policy
+ struct timer_list timer;
+
+ struct flow_cache_object flo;
++ atomic_t genid;
+ u32 priority;
+ u32 index;
+ struct xfrm_selector selector;
+ struct xfrm_lifetime_cfg lft;
+ struct xfrm_lifetime_cur curlft;
+- struct dst_entry *bundles;
+ struct xfrm_policy_walk_entry walk;
+ u8 type;
+ u8 action;
+@@ -883,11 +882,15 @@ struct xfrm_dst
+ struct rt6_info rt6;
+ } u;
+ struct dst_entry *route;
++ struct flow_cache_object flo;
++ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
++ int num_pols, num_xfrms;
+ #ifdef CONFIG_XFRM_SUB_POLICY
+ struct flowi *origin;
+ struct xfrm_selector *partner;
+ #endif
+- u32 genid;
++ u32 xfrm_genid;
++ u32 policy_genid;
+ u32 route_mtu_cached;
+ u32 child_mtu_cached;
+ u32 route_cookie;
+@@ -897,6 +900,7 @@ struct xfrm_dst
+ #ifdef CONFIG_XFRM
+ static inline void xfrm_dst_destroy(struct xfrm_dst *xdst)
+ {
++ xfrm_pols_put(xdst->pols, xdst->num_pols);
+ dst_release(xdst->route);
+ if (likely(xdst->u.dst.xfrm))
+ xfrm_state_put(xdst->u.dst.xfrm);
+diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+index 7009886..651a3e7 100644
+--- a/net/ipv4/xfrm4_policy.c
++++ b/net/ipv4/xfrm4_policy.c
+@@ -60,27 +60,6 @@ static int xfrm4_get_saddr(struct net *net,
+ return 0;
+ }
+
+-static struct dst_entry *
+-__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+-{
+- struct dst_entry *dst;
+-
+- read_lock_bh(&policy->lock);
+- for (dst = policy->bundles; dst; dst = dst->next) {
+- struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
+- if (xdst->u.rt.fl.oif == fl->oif && /*XXX*/
+- xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
+- xdst->u.rt.fl.fl4_src == fl->fl4_src &&
+- xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
+- xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
+- dst_clone(dst);
+- break;
+- }
+- }
+- read_unlock_bh(&policy->lock);
+- return dst;
+-}
+-
+ static int xfrm4_get_tos(struct flowi *fl)
+ {
+ return fl->fl4_tos;
+@@ -258,7 +237,6 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
+ .dst_ops = &xfrm4_dst_ops,
+ .dst_lookup = xfrm4_dst_lookup,
+ .get_saddr = xfrm4_get_saddr,
+- .find_bundle = __xfrm4_find_bundle,
+ .decode_session = _decode_session4,
+ .get_tos = xfrm4_get_tos,
+ .init_path = xfrm4_init_path,
+diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+index 3f89ab7..fb2a5b7 100644
+--- a/net/ipv6/xfrm6_policy.c
++++ b/net/ipv6/xfrm6_policy.c
+@@ -68,36 +68,6 @@ static int xfrm6_get_saddr(struct net *net,
+ return 0;
+ }
+
+-static struct dst_entry *
+-__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
+-{
+- struct dst_entry *dst;
+-
+- /* Still not clear if we should set fl->fl6_{src,dst}... */
+- read_lock_bh(&policy->lock);
+- for (dst = policy->bundles; dst; dst = dst->next) {
+- struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
+- struct in6_addr fl_dst_prefix, fl_src_prefix;
+-
+- ipv6_addr_prefix(&fl_dst_prefix,
+- &fl->fl6_dst,
+- xdst->u.rt6.rt6i_dst.plen);
+- ipv6_addr_prefix(&fl_src_prefix,
+- &fl->fl6_src,
+- xdst->u.rt6.rt6i_src.plen);
+- if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
+- ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
+- xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
+- (xdst->u.rt6.rt6i_dst.plen != 128 ||
+- xdst->u.rt6.rt6i_src.plen != 128))) {
+- dst_clone(dst);
+- break;
+- }
+- }
+- read_unlock_bh(&policy->lock);
+- return dst;
+-}
+-
+ static int xfrm6_get_tos(struct flowi *fl)
+ {
+ return 0;
+@@ -290,7 +260,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
+ .dst_ops = &xfrm6_dst_ops,
+ .dst_lookup = xfrm6_dst_lookup,
+ .get_saddr = xfrm6_get_saddr,
+- .find_bundle = __xfrm6_find_bundle,
+ .decode_session = _decode_session6,
+ .get_tos = xfrm6_get_tos,
+ .init_path = xfrm6_init_path,
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index d1eb2b5..0379d82 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -37,6 +37,8 @@
+ DEFINE_MUTEX(xfrm_cfg_mutex);
+ EXPORT_SYMBOL(xfrm_cfg_mutex);
+
++static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock);
++static struct dst_entry *xfrm_policy_sk_bundles;
+ static DEFINE_RWLOCK(xfrm_policy_lock);
+
+ static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
+@@ -50,6 +52,7 @@ static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+ static void xfrm_init_pmtu(struct dst_entry *dst);
++static int stale_bundle(struct dst_entry *dst);
+
+ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
+ int dir);
+@@ -277,8 +280,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
+ {
+ BUG_ON(!policy->walk.dead);
+
+- BUG_ON(policy->bundles);
+-
+ if (del_timer(&policy->timer))
+ BUG();
+
+@@ -289,12 +290,7 @@ EXPORT_SYMBOL(xfrm_policy_destroy);
+
+ static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+ {
+- struct dst_entry *dst;
+-
+- while ((dst = policy->bundles) != NULL) {
+- policy->bundles = dst->next;
+- dst_free(dst);
+- }
++ atomic_inc(&policy->genid);
+
+ if (del_timer(&policy->timer))
+ atomic_dec(&policy->refcnt);
+@@ -572,7 +568,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ struct xfrm_policy *delpol;
+ struct hlist_head *chain;
+ struct hlist_node *entry, *newpos;
+- struct dst_entry *gc_list;
+
+ write_lock_bh(&xfrm_policy_lock);
+ chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
+@@ -620,34 +615,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
+ else if (xfrm_bydst_should_resize(net, dir, NULL))
+ schedule_work(&net->xfrm.policy_hash_work);
+
+- read_lock_bh(&xfrm_policy_lock);
+- gc_list = NULL;
+- entry = &policy->bydst;
+- hlist_for_each_entry_continue(policy, entry, bydst) {
+- struct dst_entry *dst;
+-
+- write_lock(&policy->lock);
+- dst = policy->bundles;
+- if (dst) {
+- struct dst_entry *tail = dst;
+- while (tail->next)
+- tail = tail->next;
+- tail->next = gc_list;
+- gc_list = dst;
+-
+- policy->bundles = NULL;
+- }
+- write_unlock(&policy->lock);
+- }
+- read_unlock_bh(&xfrm_policy_lock);
+-
+- while (gc_list) {
+- struct dst_entry *dst = gc_list;
+-
+- gc_list = dst->next;
+- dst_free(dst);
+- }
+-
+ return 0;
+ }
+ EXPORT_SYMBOL(xfrm_policy_insert);
+@@ -990,6 +957,19 @@ fail:
+ return ret;
+ }
+
++static struct xfrm_policy *
++__xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir)
++{
++#ifdef CONFIG_XFRM_SUB_POLICY
++ struct xfrm_policy *pol;
++
++ pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
++ if (pol != NULL)
++ return pol;
++#endif
++ return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
++}
++
+ static struct flow_cache_object *
+ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+ u8 dir, struct flow_cache_object *old_obj, void *ctx)
+@@ -999,21 +979,10 @@ xfrm_policy_lookup(struct net *net, struct flowi *fl, u16 family,
+ if (old_obj)
+ xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
+
+-#ifdef CONFIG_XFRM_SUB_POLICY
+- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
+- if (IS_ERR(pol))
++ pol = __xfrm_policy_lookup(net, fl, family, dir);
++ if (pol == NULL || IS_ERR(pol))
+ return ERR_CAST(pol);
+- if (pol)
+- goto found;
+-#endif
+- pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
+- if (IS_ERR(pol))
+- return ERR_CAST(pol);
+- if (pol)
+- goto found;
+- return NULL;
+
+-found:
+ /* Resolver returns two references:
+ * one for cache and one for caller of flow_cache_lookup() */
+ xfrm_pol_hold(pol);
+@@ -1299,18 +1268,6 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
+ * still valid.
+ */
+
+-static struct dst_entry *
+-xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
+-{
+- struct dst_entry *x;
+- struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+- if (unlikely(afinfo == NULL))
+- return ERR_PTR(-EINVAL);
+- x = afinfo->find_bundle(fl, policy);
+- xfrm_policy_put_afinfo(afinfo);
+- return x;
+-}
+-
+ static inline int xfrm_get_tos(struct flowi *fl, int family)
+ {
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+@@ -1326,6 +1283,54 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
+ return tos;
+ }
+
++static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
++{
++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
++ struct dst_entry *dst = &xdst->u.dst;
++
++ if (xdst->route == NULL) {
++ /* Dummy bundle - if it has xfrms we were not
++ * able to build bundle as template resolution failed.
++ * It means we need to try again resolving. */
++ if (xdst->num_xfrms > 0)
++ return NULL;
++ } else {
++ /* Real bundle */
++ if (stale_bundle(dst))
++ return NULL;
++ }
++
++ dst_hold(dst);
++ return flo;
++}
++
++static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
++{
++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
++ struct dst_entry *dst = &xdst->u.dst;
++
++ if (!xdst->route)
++ return 0;
++ if (stale_bundle(dst))
++ return 0;
++
++ return 1;
++}
++
++static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
++{
++ struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
++ struct dst_entry *dst = &xdst->u.dst;
++
++ dst_free(dst);
++}
++
++static const struct flow_cache_ops xfrm_bundle_fc_ops = {
++ .get = xfrm_bundle_flo_get,
++ .check = xfrm_bundle_flo_check,
++ .delete = xfrm_bundle_flo_delete,
++};
++
+ static inline struct xfrm_dst *xfrm_alloc_dst(int family)
+ {
+ struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+@@ -1338,6 +1343,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(int family)
+
+ xfrm_policy_put_afinfo(afinfo);
+
++ xdst->flo.ops = &xfrm_bundle_fc_ops;
++
+ return xdst;
+ }
+
+@@ -1375,6 +1382,7 @@ static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
+ return err;
+ }
+
++
+ /* Allocate chain of dst_entry's, attach known xfrm's, calculate
+ * all the metrics... Shortly, bundle a bundle.
+ */
+@@ -1437,7 +1445,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
+ dst_hold(dst);
+
+ dst1->xfrm = xfrm[i];
+- xdst->genid = xfrm[i]->genid;
++ xdst->xfrm_genid = xfrm[i]->genid;
+
+ dst1->obsolete = -1;
+ dst1->flags |= DST_HOST;
+@@ -1530,7 +1538,186 @@ xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
+ #endif
+ }
+
+-static int stale_bundle(struct dst_entry *dst);
++static int xfrm_expand_policies(struct flowi *fl, u16 family,
++ struct xfrm_policy **pols,
++ int *num_pols, int *num_xfrms)
++{
++ int i;
++
++ if (*num_pols == 0 || !pols[0]) {
++ *num_pols = 0;
++ *num_xfrms = 0;
++ return 0;
++ }
++ if (IS_ERR(pols[0]))
++ return PTR_ERR(pols[0]);
++
++ *num_xfrms = pols[0]->xfrm_nr;
++
++#ifdef CONFIG_XFRM_SUB_POLICY
++ if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
++ pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
++ pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
++ XFRM_POLICY_TYPE_MAIN,
++ fl, family,
++ XFRM_POLICY_OUT);
++ if (pols[1]) {
++ if (IS_ERR(pols[1])) {
++ xfrm_pols_put(pols, *num_pols);
++ return PTR_ERR(pols[1]);
++ }
++ (*num_pols) ++;
++ (*num_xfrms) += pols[1]->xfrm_nr;
++ }
++ }
++#endif
++ for (i = 0; i < *num_pols; i++) {
++ if (pols[i]->action != XFRM_POLICY_ALLOW) {
++ *num_xfrms = -1;
++ break;
++ }
++ }
++
++ return 0;
++
++}
++
++static struct xfrm_dst *
++xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
++ struct flowi *fl, u16 family,
++ struct dst_entry *dst_orig)
++{
++ struct net *net = xp_net(pols[0]);
++ struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
++ struct dst_entry *dst;
++ struct xfrm_dst *xdst;
++ int err;
++
++ /* Try to instantiate a bundle */
++ err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
++ if (err < 0) {
++ if (err != -EAGAIN)
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++ return ERR_PTR(err);
++ }
++
++ dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
++ if (IS_ERR(dst)) {
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
++ return ERR_CAST(dst);
++ }
++
++ xdst = (struct xfrm_dst *)dst;
++ xdst->num_xfrms = err;
++ if (num_pols > 1)
++ err = xfrm_dst_update_parent(dst, &pols[1]->selector);
++ else
++ err = xfrm_dst_update_origin(dst, fl);
++ if (unlikely(err)) {
++ dst_free(dst);
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
++ return ERR_PTR(err);
++ }
++
++ xdst->num_pols = num_pols;
++ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
++ xdst->policy_genid = atomic_read(&pols[0]->genid);
++
++ return xdst;
++}
++
++static struct flow_cache_object *
++xfrm_bundle_lookup(struct net *net, struct flowi *fl, u16 family, u8 dir,
++ struct flow_cache_object *oldflo, void *ctx)
++{
++ struct dst_entry *dst_orig = (struct dst_entry *)ctx;
++ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
++ struct xfrm_dst *xdst, *new_xdst;
++ int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
++
++ /* Check if the policies from old bundle are usable */
++ xdst = NULL;
++ if (oldflo) {
++ xdst = container_of(oldflo, struct xfrm_dst, flo);
++ num_pols = xdst->num_pols;
++ num_xfrms = xdst->num_xfrms;
++ pol_dead = 0;
++ for (i = 0; i < num_pols; i++) {
++ pols[i] = xdst->pols[i];
++ pol_dead |= pols[i]->walk.dead;
++ }
++ if (pol_dead) {
++ dst_free(&xdst->u.dst);
++ xdst = NULL;
++ num_pols = 0;
++ num_xfrms = 0;
++ oldflo = NULL;
++ }
++ }
++
++ /* Resolve policies to use if we couldn't get them from
++ * previous cache entry */
++ if (xdst == NULL) {
++ num_pols = 1;
++ pols[0] = __xfrm_policy_lookup(net, fl, family, dir);
++ err = xfrm_expand_policies(fl, family, pols,
++ &num_pols, &num_xfrms);
++ if (err < 0)
++ goto inc_error;
++ if (num_pols == 0)
++ return NULL;
++ if (num_xfrms <= 0)
++ goto make_dummy_bundle;
++ }
++
++ new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig);
++ if (IS_ERR(new_xdst)) {
++ err = PTR_ERR(new_xdst);
++ if (err != -EAGAIN)
++ goto error;
++ if (oldflo == NULL)
++ goto make_dummy_bundle;
++ dst_hold(&xdst->u.dst);
++ return oldflo;
++ }
++
++ /* Kill the previous bundle */
++ if (xdst) {
++ /* The policies were stolen for newly generated bundle */
++ xdst->num_pols = 0;
++ dst_free(&xdst->u.dst);
++ }
++
++ /* Flow cache does not have reference, it dst_free()'s,
++ * but we do need to return one reference for original caller */
++ dst_hold(&new_xdst->u.dst);
++ return &new_xdst->flo;
++
++make_dummy_bundle:
++ /* We found policies, but there's no bundles to instantiate:
++ * either because the policy blocks, has no transformations or
++ * we could not build template (no xfrm_states).*/
++ xdst = xfrm_alloc_dst(family);
++ if (IS_ERR(xdst)) {
++ xfrm_pols_put(pols, num_pols);
++ return ERR_CAST(xdst);
++ }
++ xdst->num_pols = num_pols;
++ xdst->num_xfrms = num_xfrms;
++ memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols);
++
++ dst_hold(&xdst->u.dst);
++ return &xdst->flo;
++
++inc_error:
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++error:
++ if (xdst != NULL)
++ dst_free(&xdst->u.dst);
++ else
++ xfrm_pols_put(pols, num_pols);
++ return ERR_PTR(err);
++}
+
+ /* Main function: finds/creates a bundle for given flow.
+ *
+@@ -1540,248 +1727,152 @@ static int stale_bundle(struct dst_entry *dst);
+ int __xfrm_lookup(struct net *net, struct dst_entry **dst_p, struct flowi *fl,
+ struct sock *sk, int flags)
+ {
+- struct xfrm_policy *policy;
+ struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
+- int npols;
+- int pol_dead;
+- int xfrm_nr;
+- int pi;
+- struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
+- struct dst_entry *dst, *dst_orig = *dst_p;
+- int nx = 0;
+- int err;
+- u32 genid;
+- u16 family;
++ struct flow_cache_object *flo;
++ struct xfrm_dst *xdst;
++ struct dst_entry *dst, *dst_orig = *dst_p, *route;
++ u16 family = dst_orig->ops->family;
+ u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
++ int i, err, num_pols, num_xfrms, drop_pols = 0;
+
+ restart:
+- genid = atomic_read(&flow_cache_genid);
+- policy = NULL;
+- for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
+- pols[pi] = NULL;
+- npols = 0;
+- pol_dead = 0;
+- xfrm_nr = 0;
++ dst = NULL;
++ xdst = NULL;
++ route = NULL;
+
+ if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
+- policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
+- err = PTR_ERR(policy);
+- if (IS_ERR(policy)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++ num_pols = 1;
++ pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
++ err = xfrm_expand_policies(fl, family, pols,
++ &num_pols, &num_xfrms);
++ if (err < 0)
+ goto dropdst;
++
++ if (num_pols) {
++ if (num_xfrms <= 0) {
++ drop_pols = num_pols;
++ goto no_transform;
++ }
++
++ xdst = xfrm_resolve_and_create_bundle(
++ pols, num_pols, fl,
++ family, dst_orig);
++ if (IS_ERR(xdst)) {
++ xfrm_pols_put(pols, num_pols);
++ err = PTR_ERR(xdst);
++ goto dropdst;
++ }
++
++ spin_lock_bh(&xfrm_policy_sk_bundle_lock);
++ xdst->u.dst.next = xfrm_policy_sk_bundles;
++ xfrm_policy_sk_bundles = &xdst->u.dst;
++ spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
++
++ route = xdst->route;
+ }
+ }
+
+- if (!policy) {
+- struct flow_cache_object *flo;
+-
++ if (xdst == NULL) {
+ /* To accelerate a bit... */
+ if ((dst_orig->flags & DST_NOXFRM) ||
+ !net->xfrm.policy_count[XFRM_POLICY_OUT])
+ goto nopol;
+
+- flo = flow_cache_lookup(net, fl, dst_orig->ops->family,
+- dir, xfrm_policy_lookup, NULL);
+- err = PTR_ERR(flo);
++ flo = flow_cache_lookup(net, fl, family, dir,
++ xfrm_bundle_lookup, dst_orig);
++ if (flo == NULL)
++ goto nopol;
+ if (IS_ERR(flo)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
++ err = PTR_ERR(flo);
+ goto dropdst;
+ }
+- if (flo)
+- policy = container_of(flo, struct xfrm_policy, flo);
+- else
+- policy = NULL;
++ xdst = container_of(flo, struct xfrm_dst, flo);
++
++ num_pols = xdst->num_pols;
++ num_xfrms = xdst->num_xfrms;
++ memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols);
++ route = xdst->route;
++ }
++
++ dst = &xdst->u.dst;
++ if (route == NULL && num_xfrms > 0) {
++ /* The only case when xfrm_bundle_lookup() returns a
++ * bundle with null route, is when the template could
++ * not be resolved. It means policies are there, but
++ * bundle could not be created, since we don't yet
++ * have the xfrm_state's. We need to wait for KM to
++ * negotiate new SA's or bail out with error.*/
++ if (net->xfrm.sysctl_larval_drop) {
++ /* EREMOTE tells the caller to generate
++ * a one-shot blackhole route. */
++ dst_release(dst);
++ xfrm_pols_put(pols, num_pols);
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
++ return -EREMOTE;
++ }
++ if (flags & XFRM_LOOKUP_WAIT) {
++ DECLARE_WAITQUEUE(wait, current);
++
++ add_wait_queue(&net->xfrm.km_waitq, &wait);
++ set_current_state(TASK_INTERRUPTIBLE);
++ schedule();
++ set_current_state(TASK_RUNNING);
++ remove_wait_queue(&net->xfrm.km_waitq, &wait);
++
++ if (!signal_pending(current)) {
++ dst_release(dst);
++ goto restart;
++ }
++
++ err = -ERESTART;
++ } else
++ err = -EAGAIN;
++
++ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
++ goto error;
+ }
+
+- if (!policy)
++no_transform:
++ if (num_pols == 0)
+ goto nopol;
+
+- family = dst_orig->ops->family;
+- pols[0] = policy;
+- npols ++;
+- xfrm_nr += pols[0]->xfrm_nr;
+-
+- err = -ENOENT;
+- if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
++ if ((flags & XFRM_LOOKUP_ICMP) &&
++ !(pols[0]->flags & XFRM_POLICY_ICMP)) {
++ err = -ENOENT;
+ goto error;
++ }
+
+- policy->curlft.use_time = get_seconds();
++ for (i = 0; i < num_pols; i++)
++ pols[i]->curlft.use_time = get_seconds();
+
+- switch (policy->action) {
+- default:
+- case XFRM_POLICY_BLOCK:
++ if (num_xfrms < 0) {
+ /* Prohibit the flow */
+ XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
+ err = -EPERM;
+ goto error;
+-
+- case XFRM_POLICY_ALLOW:
+-#ifndef CONFIG_XFRM_SUB_POLICY
+- if (policy->xfrm_nr == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pol_put(policy);
+- return 0;
+- }
+-#endif
+-
+- /* Try to find matching bundle.
+- *
+- * LATER: help from flow cache. It is optional, this
+- * is required only for output policy.
+- */
+- dst = xfrm_find_bundle(fl, policy, family);
+- if (IS_ERR(dst)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+- err = PTR_ERR(dst);
+- goto error;
+- }
+-
+- if (dst)
+- break;
+-
+-#ifdef CONFIG_XFRM_SUB_POLICY
+- if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
+- pols[1] = xfrm_policy_lookup_bytype(net,
+- XFRM_POLICY_TYPE_MAIN,
+- fl, family,
+- XFRM_POLICY_OUT);
+- if (pols[1]) {
+- if (IS_ERR(pols[1])) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
+- err = PTR_ERR(pols[1]);
+- goto error;
+- }
+- if (pols[1]->action == XFRM_POLICY_BLOCK) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
+- err = -EPERM;
+- goto error;
+- }
+- npols ++;
+- xfrm_nr += pols[1]->xfrm_nr;
+- }
+- }
+-
+- /*
+- * Because neither flowi nor bundle information knows about
+- * transformation template size. On more than one policy usage
+- * we can realize whether all of them is bypass or not after
+- * they are searched. See above not-transformed bypass
+- * is surrounded by non-sub policy configuration, too.
+- */
+- if (xfrm_nr == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pols_put(pols, npols);
+- return 0;
+- }
+-
+-#endif
+- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
+-
+- if (unlikely(nx<0)) {
+- err = nx;
+- if (err == -EAGAIN && net->xfrm.sysctl_larval_drop) {
+- /* EREMOTE tells the caller to generate
+- * a one-shot blackhole route.
+- */
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+- xfrm_pol_put(policy);
+- return -EREMOTE;
+- }
+- if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
+- DECLARE_WAITQUEUE(wait, current);
+-
+- add_wait_queue(&net->xfrm.km_waitq, &wait);
+- set_current_state(TASK_INTERRUPTIBLE);
+- schedule();
+- set_current_state(TASK_RUNNING);
+- remove_wait_queue(&net->xfrm.km_waitq, &wait);
+-
+- nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
+-
+- if (nx == -EAGAIN && signal_pending(current)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+- err = -ERESTART;
+- goto error;
+- }
+- if (nx == -EAGAIN ||
+- genid != atomic_read(&flow_cache_genid)) {
+- xfrm_pols_put(pols, npols);
+- goto restart;
+- }
+- err = nx;
+- }
+- if (err < 0) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
+- goto error;
+- }
+- }
+- if (nx == 0) {
+- /* Flow passes not transformed. */
+- xfrm_pols_put(pols, npols);
+- return 0;
+- }
+-
+- dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
+- err = PTR_ERR(dst);
+- if (IS_ERR(dst)) {
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
+- goto error;
+- }
+-
+- for (pi = 0; pi < npols; pi++)
+- pol_dead |= pols[pi]->walk.dead;
+-
+- write_lock_bh(&policy->lock);
+- if (unlikely(pol_dead || stale_bundle(dst))) {
+- /* Wow! While we worked on resolving, this
+- * policy has gone. Retry. It is not paranoia,
+- * we just cannot enlist new bundle to dead object.
+- * We can't enlist stable bundles either.
+- */
+- write_unlock_bh(&policy->lock);
+- dst_free(dst);
+-
+- if (pol_dead)
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLDEAD);
+- else
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+- err = -EHOSTUNREACH;
+- goto error;
+- }
+-
+- if (npols > 1)
+- err = xfrm_dst_update_parent(dst, &pols[1]->selector);
+- else
+- err = xfrm_dst_update_origin(dst, fl);
+- if (unlikely(err)) {
+- write_unlock_bh(&policy->lock);
+- dst_free(dst);
+- XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
+- goto error;
+- }
+-
+- dst->next = policy->bundles;
+- policy->bundles = dst;
+- dst_hold(dst);
+- write_unlock_bh(&policy->lock);
++ } else if (num_xfrms > 0) {
++ /* Flow transformed */
++ *dst_p = dst;
++ dst_release(dst_orig);
++ } else {
++ /* Flow passes untransformed */
++ dst_release(dst);
+ }
+- *dst_p = dst;
+- dst_release(dst_orig);
+- xfrm_pols_put(pols, npols);
++ok:
++ xfrm_pols_put(pols, drop_pols);
+ return 0;
+
++nopol:
++ if (!(flags & XFRM_LOOKUP_ICMP))
++ goto ok;
++ err = -ENOENT;
+ error:
+- xfrm_pols_put(pols, npols);
++ dst_release(dst);
+ dropdst:
+ dst_release(dst_orig);
+ *dst_p = NULL;
++ xfrm_pols_put(pols, drop_pols);
+ return err;
+-
+-nopol:
+- err = -ENOENT;
+- if (flags & XFRM_LOOKUP_ICMP)
+- goto dropdst;
+- return 0;
+ }
+ EXPORT_SYMBOL(__xfrm_lookup);
+
+@@ -2134,71 +2225,24 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
+ return dst;
+ }
+
+-static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
+-{
+- struct dst_entry *dst, **dstp;
+-
+- write_lock(&pol->lock);
+- dstp = &pol->bundles;
+- while ((dst=*dstp) != NULL) {
+- if (func(dst)) {
+- *dstp = dst->next;
+- dst->next = *gc_list_p;
+- *gc_list_p = dst;
+- } else {
+- dstp = &dst->next;
+- }
+- }
+- write_unlock(&pol->lock);
+-}
+-
+-static void xfrm_prune_bundles(struct net *net, int (*func)(struct dst_entry *))
++static void __xfrm_garbage_collect(struct net *net)
+ {
+- struct dst_entry *gc_list = NULL;
+- int dir;
++ struct dst_entry *head, *next;
+
+- read_lock_bh(&xfrm_policy_lock);
+- for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
+- struct xfrm_policy *pol;
+- struct hlist_node *entry;
+- struct hlist_head *table;
+- int i;
++ flow_cache_flush();
+
+- hlist_for_each_entry(pol, entry,
+- &net->xfrm.policy_inexact[dir], bydst)
+- prune_one_bundle(pol, func, &gc_list);
++ spin_lock_bh(&xfrm_policy_sk_bundle_lock);
++ head = xfrm_policy_sk_bundles;
++ xfrm_policy_sk_bundles = NULL;
++ spin_unlock_bh(&xfrm_policy_sk_bundle_lock);
+
+- table = net->xfrm.policy_bydst[dir].table;
+- for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
+- hlist_for_each_entry(pol, entry, table + i, bydst)
+- prune_one_bundle(pol, func, &gc_list);
+- }
+- }
+- read_unlock_bh(&xfrm_policy_lock);
+-
+- while (gc_list) {
+- struct dst_entry *dst = gc_list;
+- gc_list = dst->next;
+- dst_free(dst);
++ while (head) {
++ next = head->next;
++ dst_free(head);
++ head = next;
+ }
+ }
+
+-static int unused_bundle(struct dst_entry *dst)
+-{
+- return !atomic_read(&dst->__refcnt);
+-}
+-
+-static void __xfrm_garbage_collect(struct net *net)
+-{
+- xfrm_prune_bundles(net, unused_bundle);
+-}
+-
+-static int xfrm_flush_bundles(struct net *net)
+-{
+- xfrm_prune_bundles(net, stale_bundle);
+- return 0;
+-}
+-
+ static void xfrm_init_pmtu(struct dst_entry *dst)
+ {
+ do {
+@@ -2256,7 +2300,9 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
+ return 0;
+ if (dst->xfrm->km.state != XFRM_STATE_VALID)
+ return 0;
+- if (xdst->genid != dst->xfrm->genid)
++ if (xdst->xfrm_genid != dst->xfrm->genid)
++ return 0;
++ if (xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
+ return 0;
+
+ if (strict && fl &&
+@@ -2383,7 +2429,7 @@ static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void
+
+ switch (event) {
+ case NETDEV_DOWN:
+- xfrm_flush_bundles(dev_net(dev));
++ __xfrm_garbage_collect(dev_net(dev));
+ }
+ return NOTIFY_DONE;
+ }
+@@ -2714,7 +2760,6 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ struct xfrm_migrate *m, int num_migrate)
+ {
+ struct xfrm_migrate *mp;
+- struct dst_entry *dst;
+ int i, j, n = 0;
+
+ write_lock_bh(&pol->lock);
+@@ -2739,10 +2784,7 @@ static int xfrm_policy_migrate(struct xfrm_policy *pol,
+ sizeof(pol->xfrm_vec[i].saddr));
+ pol->xfrm_vec[i].encap_family = mp->new_family;
+ /* flush bundles */
+- while ((dst = pol->bundles) != NULL) {
+- pol->bundles = dst->next;
+- dst_free(dst);
+- }
++ atomic_inc(&pol->genid);
+ }
+ }
+
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0016-xfrm-remove-policy-garbage-collection.patch b/main/linux-pae/0016-xfrm-remove-policy-garbage-collection.patch
new file mode 100644
index 00000000..4a45c7f4
--- /dev/null
+++ b/main/linux-pae/0016-xfrm-remove-policy-garbage-collection.patch
@@ -0,0 +1,91 @@
+From 4c53c9239069f48ec9a86f8e596c163b72e8bc4d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 7 Apr 2010 00:30:06 +0000
+Subject: [PATCH 16/18] xfrm: remove policy garbage collection
+
+Policies are now properly reference counted and destroyed from
+all code paths. The delayed gc is just an overhead now and can
+be removed.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 285ead175c5dd5075cab5b6c94f35a3e6c0a3ae6)
+---
+ net/xfrm/xfrm_policy.c | 39 +++++----------------------------------
+ 1 files changed, 5 insertions(+), 34 deletions(-)
+
+diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+index 0379d82..5606841 100644
+--- a/net/xfrm/xfrm_policy.c
++++ b/net/xfrm/xfrm_policy.c
+@@ -46,9 +46,6 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
+
+ static struct kmem_cache *xfrm_dst_cache __read_mostly;
+
+-static HLIST_HEAD(xfrm_policy_gc_list);
+-static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
+-
+ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
+ static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
+ static void xfrm_init_pmtu(struct dst_entry *dst);
+@@ -288,32 +285,6 @@ void xfrm_policy_destroy(struct xfrm_policy *policy)
+ }
+ EXPORT_SYMBOL(xfrm_policy_destroy);
+
+-static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
+-{
+- atomic_inc(&policy->genid);
+-
+- if (del_timer(&policy->timer))
+- atomic_dec(&policy->refcnt);
+-
+- xfrm_pol_put(policy);
+-}
+-
+-static void xfrm_policy_gc_task(struct work_struct *work)
+-{
+- struct xfrm_policy *policy;
+- struct hlist_node *entry, *tmp;
+- struct hlist_head gc_list;
+-
+- spin_lock_bh(&xfrm_policy_gc_lock);
+- gc_list.first = xfrm_policy_gc_list.first;
+- INIT_HLIST_HEAD(&xfrm_policy_gc_list);
+- spin_unlock_bh(&xfrm_policy_gc_lock);
+-
+- hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
+- xfrm_policy_gc_kill(policy);
+-}
+-static DECLARE_WORK(xfrm_policy_gc_work, xfrm_policy_gc_task);
+-
+ /* Rule must be locked. Release descentant resources, announce
+ * entry dead. The rule must be unlinked from lists to the moment.
+ */
+@@ -322,11 +293,12 @@ static void xfrm_policy_kill(struct xfrm_policy *policy)
+ {
+ policy->walk.dead = 1;
+
+- spin_lock_bh(&xfrm_policy_gc_lock);
+- hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
+- spin_unlock_bh(&xfrm_policy_gc_lock);
++ atomic_inc(&policy->genid);
+
+- schedule_work(&xfrm_policy_gc_work);
++ if (del_timer(&policy->timer))
++ xfrm_pol_put(policy);
++
++ xfrm_pol_put(policy);
+ }
+
+ static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
+@@ -2535,7 +2507,6 @@ static void xfrm_policy_fini(struct net *net)
+ audit_info.sessionid = -1;
+ audit_info.secid = 0;
+ xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
+- flush_work(&xfrm_policy_gc_work);
+
+ WARN_ON(!list_empty(&net->xfrm.policy_all));
+
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0017-flow-delayed-deletion-of-flow-cache-entries.patch b/main/linux-pae/0017-flow-delayed-deletion-of-flow-cache-entries.patch
new file mode 100644
index 00000000..7d17d41a
--- /dev/null
+++ b/main/linux-pae/0017-flow-delayed-deletion-of-flow-cache-entries.patch
@@ -0,0 +1,231 @@
+From fede05e99e2d860e97bc877b8b77fb9e63f55cc8 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Wed, 7 Apr 2010 00:30:07 +0000
+Subject: [PATCH 17/18] flow: delayed deletion of flow cache entries
+
+Speed up lookups by freeing flow cache entries later. After
+virtualizing flow cache entry operations, the flow cache may now
+end up calling policy or bundle destructor which can be slowish.
+
+As gc_list is more effective with double linked list, the flow cache
+is converted to use common hlist and list macroes where appropriate.
+
+Signed-off-by: Timo Teras <timo.teras@iki.fi>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit 8e4795605d1e1b39113818ad7c147b8a867a1f6a)
+---
+ net/core/flow.c | 100 ++++++++++++++++++++++++++++++++++++++-----------------
+ 1 files changed, 69 insertions(+), 31 deletions(-)
+
+diff --git a/net/core/flow.c b/net/core/flow.c
+index 521df52..1619006 100644
+--- a/net/core/flow.c
++++ b/net/core/flow.c
+@@ -26,7 +26,10 @@
+ #include <linux/security.h>
+
+ struct flow_cache_entry {
+- struct flow_cache_entry *next;
++ union {
++ struct hlist_node hlist;
++ struct list_head gc_list;
++ } u;
+ u16 family;
+ u8 dir;
+ u32 genid;
+@@ -35,7 +38,7 @@ struct flow_cache_entry {
+ };
+
+ struct flow_cache_percpu {
+- struct flow_cache_entry **hash_table;
++ struct hlist_head *hash_table;
+ int hash_count;
+ u32 hash_rnd;
+ int hash_rnd_recalc;
+@@ -62,6 +65,9 @@ atomic_t flow_cache_genid = ATOMIC_INIT(0);
+ static struct flow_cache flow_cache_global;
+ static struct kmem_cache *flow_cachep;
+
++static DEFINE_SPINLOCK(flow_cache_gc_lock);
++static LIST_HEAD(flow_cache_gc_list);
++
+ #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
+ #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
+
+@@ -86,38 +92,66 @@ static int flow_entry_valid(struct flow_cache_entry *fle)
+ return 1;
+ }
+
+-static void flow_entry_kill(struct flow_cache *fc,
+- struct flow_cache_percpu *fcp,
+- struct flow_cache_entry *fle)
++static void flow_entry_kill(struct flow_cache_entry *fle)
+ {
+ if (fle->object)
+ fle->object->ops->delete(fle->object);
+ kmem_cache_free(flow_cachep, fle);
+- fcp->hash_count--;
++}
++
++static void flow_cache_gc_task(struct work_struct *work)
++{
++ struct list_head gc_list;
++ struct flow_cache_entry *fce, *n;
++
++ INIT_LIST_HEAD(&gc_list);
++ spin_lock_bh(&flow_cache_gc_lock);
++ list_splice_tail_init(&flow_cache_gc_list, &gc_list);
++ spin_unlock_bh(&flow_cache_gc_lock);
++
++ list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
++ flow_entry_kill(fce);
++}
++static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
++
++static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
++ int deleted, struct list_head *gc_list)
++{
++ if (deleted) {
++ fcp->hash_count -= deleted;
++ spin_lock_bh(&flow_cache_gc_lock);
++ list_splice_tail(gc_list, &flow_cache_gc_list);
++ spin_unlock_bh(&flow_cache_gc_lock);
++ schedule_work(&flow_cache_gc_work);
++ }
+ }
+
+ static void __flow_cache_shrink(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp,
+ int shrink_to)
+ {
+- struct flow_cache_entry *fle, **flp;
+- int i;
++ struct flow_cache_entry *fle;
++ struct hlist_node *entry, *tmp;
++ LIST_HEAD(gc_list);
++ int i, deleted = 0;
+
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+ int saved = 0;
+
+- flp = &fcp->hash_table[i];
+- while ((fle = *flp) != NULL) {
++ hlist_for_each_entry_safe(fle, entry, tmp,
++ &fcp->hash_table[i], u.hlist) {
+ if (saved < shrink_to &&
+ flow_entry_valid(fle)) {
+ saved++;
+- flp = &fle->next;
+ } else {
+- *flp = fle->next;
+- flow_entry_kill(fc, fcp, fle);
++ deleted++;
++ hlist_del(&fle->u.hlist);
++ list_add_tail(&fle->u.gc_list, &gc_list);
+ }
+ }
+ }
++
++ flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ }
+
+ static void flow_cache_shrink(struct flow_cache *fc,
+@@ -182,7 +216,8 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ {
+ struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache_percpu *fcp;
+- struct flow_cache_entry *fle, **head;
++ struct flow_cache_entry *fle, *tfle;
++ struct hlist_node *entry;
+ struct flow_cache_object *flo;
+ unsigned int hash;
+
+@@ -200,12 +235,13 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+ flow_new_hash_rnd(fc, fcp);
+
+ hash = flow_hash_code(fc, fcp, key);
+- head = &fcp->hash_table[hash];
+- for (fle = *head; fle; fle = fle->next) {
+- if (fle->family == family &&
+- fle->dir == dir &&
+- flow_key_compare(key, &fle->key) == 0)
++ hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) {
++ if (tfle->family == family &&
++ tfle->dir == dir &&
++ flow_key_compare(key, &tfle->key) == 0) {
++ fle = tfle;
+ break;
++ }
+ }
+
+ if (unlikely(!fle)) {
+@@ -214,12 +250,11 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir,
+
+ fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC);
+ if (fle) {
+- fle->next = *head;
+- *head = fle;
+ fle->family = family;
+ fle->dir = dir;
+ memcpy(&fle->key, key, sizeof(*key));
+ fle->object = NULL;
++ hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
+ fcp->hash_count++;
+ }
+ } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+@@ -262,23 +297,26 @@ static void flow_cache_flush_tasklet(unsigned long data)
+ struct flow_flush_info *info = (void *)data;
+ struct flow_cache *fc = info->cache;
+ struct flow_cache_percpu *fcp;
+- int i;
++ struct flow_cache_entry *fle;
++ struct hlist_node *entry, *tmp;
++ LIST_HEAD(gc_list);
++ int i, deleted = 0;
+
+ fcp = per_cpu_ptr(fc->percpu, smp_processor_id());
+ for (i = 0; i < flow_cache_hash_size(fc); i++) {
+- struct flow_cache_entry *fle;
+-
+- fle = fcp->hash_table[i];
+- for (; fle; fle = fle->next) {
++ hlist_for_each_entry_safe(fle, entry, tmp,
++ &fcp->hash_table[i], u.hlist) {
+ if (flow_entry_valid(fle))
+ continue;
+
+- if (fle->object)
+- fle->object->ops->delete(fle->object);
+- fle->object = NULL;
++ deleted++;
++ hlist_del(&fle->u.hlist);
++ list_add_tail(&fle->u.gc_list, &gc_list);
+ }
+ }
+
++ flow_cache_queue_garbage(fcp, deleted, &gc_list);
++
+ if (atomic_dec_and_test(&info->cpuleft))
+ complete(&info->completion);
+ }
+@@ -320,7 +358,7 @@ void flow_cache_flush(void)
+ static void __init flow_cache_cpu_prepare(struct flow_cache *fc,
+ struct flow_cache_percpu *fcp)
+ {
+- fcp->hash_table = (struct flow_cache_entry **)
++ fcp->hash_table = (struct hlist_head *)
+ __get_free_pages(GFP_KERNEL|__GFP_ZERO, fc->order);
+ if (!fcp->hash_table)
+ panic("NET: failed to allocate flow cache order %lu\n", fc->order);
+@@ -354,7 +392,7 @@ static int flow_cache_init(struct flow_cache *fc)
+
+ for (order = 0;
+ (PAGE_SIZE << order) <
+- (sizeof(struct flow_cache_entry *)*flow_cache_hash_size(fc));
++ (sizeof(struct hlist_head)*flow_cache_hash_size(fc));
+ order++)
+ /* NOTHING */;
+ fc->order = order;
+--
+1.7.0.2
+
diff --git a/main/linux-pae/0018-xfrm-Fix-crashes-in-xfrm_lookup.patch b/main/linux-pae/0018-xfrm-Fix-crashes-in-xfrm_lookup.patch
new file mode 100644
index 00000000..6f0dc912
--- /dev/null
+++ b/main/linux-pae/0018-xfrm-Fix-crashes-in-xfrm_lookup.patch
@@ -0,0 +1,46 @@
+From e0c0800740cdf64fe7b121c2ef235c01f1957af0 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Timo=20Ter=C3=A4s?= <timo.teras@iki.fi>
+Date: Thu, 8 Apr 2010 11:27:42 -0700
+Subject: [PATCH 18/18] xfrm: Fix crashes in xfrm_lookup()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Timo Teräs <timo.teras@iki.fi>
+
+Happens because CONFIG_XFRM_SUB_POLICY is not enabled, and one of
+the helper functions I used did unexpected things in that case.
+
+Signed-off-by: David S. Miller <davem@davemloft.net>
+(cherry picked from commit e4077e018b5ead3de9951fc01d8bf12eeeeeefed)
+---
+ include/net/xfrm.h | 7 -------
+ 1 files changed, 0 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index d51ef61..280f46f 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -738,19 +738,12 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy)
+ xfrm_policy_destroy(policy);
+ }
+
+-#ifdef CONFIG_XFRM_SUB_POLICY
+ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+ {
+ int i;
+ for (i = npols - 1; i >= 0; --i)
+ xfrm_pol_put(pols[i]);
+ }
+-#else
+-static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
+-{
+- xfrm_pol_put(pols[0]);
+-}
+-#endif
+
+ extern void __xfrm_state_destroy(struct xfrm_state *);
+
+--
+1.7.0.2
+
diff --git a/main/linux-pae/APKBUILD b/main/linux-pae/APKBUILD
index 971800f4..95d20f1a 100644
--- a/main/linux-pae/APKBUILD
+++ b/main/linux-pae/APKBUILD
@@ -4,7 +4,7 @@ _flavor=pae
pkgname=linux-${_flavor}
pkgver=2.6.32.11
_kernver=2.6.32
-pkgrel=0
+pkgrel=1
pkgdesc="Linux kernel with PAE enabled"
url=http://www.kernel.org
depends="mkinitfs linux-firmware"
@@ -14,12 +14,23 @@ _config=${config:-kernelconfig.${CARCH:-x86}}
install=
source="ftp://ftp.kernel.org/pub/linux/kernel/v2.6/linux-$_kernver.tar.bz2
ftp://ftp.kernel.org/pub/linux/kernel/v2.6/patch-$pkgver.bz2
- ip_gre.patch
- ip_gre2.patch
- arp.patch
- xfrm-cache-size-revert.patch
- net-git-78f1cd-r8169-fix-broken-register-writes.patch
- net-git-c0cd88-r8169-offical-fix-for-CVE-2009-4537-overlength-frame-DMAs.patch
+ 0002-gre-fix-hard-header-destination-address-checking.patch
+ 0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch
+ 0004-arp-flush-arp-cache-on-device-change.patch
+ 0005-r8169-fix-broken-register-writes.patch
+ 0006-r8169-offical-fix-for-CVE-2009-4537-overlength-frame.patch
+ 0007-r8169-Fix-rtl8169_rx_interrupt.patch
+ 0008-r8169-clean-up-my-printk-uglyness.patch
+ 0009-ipsec-Fix-bogus-bundle-flowi.patch
+ 0010-xfrm-Remove-xfrm_state_genid.patch
+ 0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch
+ 0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch
+ 0013-flow-structurize-flow-cache.patch
+ 0014-flow-virtualize-flow-cache-entry-methods.patch
+ 0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch
+ 0016-xfrm-remove-policy-garbage-collection.patch
+ 0017-flow-delayed-deletion-of-flow-cache-entries.patch
+ 0018-xfrm-Fix-crashes-in-xfrm_lookup.patch
kernelconfig.x86
"
subpackages="$pkgname-dev"
@@ -42,7 +53,7 @@ prepare() {
mkdir -p "$srcdir"/build
cp "$srcdir"/$_config "$srcdir"/build/.config
echo "-${_flavor}" > "$srcdir"/linux-$_kernver/localversion-${_flavor}
- make -C "$srcdir"/linux-$_kernver O="$srcdir"/build HOSTCC="$CC" \
+ make -C "$srcdir"/linux-$_kernver O="$srcdir"/build HOSTCC="${CC:-gcc}" \
silentoldconfig
}
@@ -55,7 +66,7 @@ menuconfig() {
build() {
cd "$srcdir"/build
- make CC="$CC" || return 1
+ make CC="${CC:-gcc}" || return 1
}
package() {
@@ -88,7 +99,7 @@ dev() {
# external modules, and create the scripts
mkdir -p "$dir"
cp "$srcdir"/$_config "$dir"/.config
- make -j1 -C "$srcdir"/linux-$_kernver O="$dir" HOSTCC="$CC" \
+ make -j1 -C "$srcdir"/linux-$_kernver O="$dir" HOSTCC="${CC:-gcc}" \
silentoldconfig prepare scripts
# remove the stuff that poits to real sources. we want 3rd party
@@ -121,10 +132,21 @@ dev() {
md5sums="260551284ac224c3a43c4adac7df4879 linux-2.6.32.tar.bz2
855c248334a71ef5ca3d8cb89d51334f patch-2.6.32.11.bz2
-3ef822f3a2723b9a80c3f12954457225 ip_gre.patch
-13ca9e91700e459da269c957062bbea7 ip_gre2.patch
-4c39a161d918e7f274292ecfd168b891 arp.patch
-329fcab881425e001d3243caa4648478 xfrm-cache-size-revert.patch
-21ed38773d846097b7315e1e0801d87a net-git-78f1cd-r8169-fix-broken-register-writes.patch
-962a6dd7c639612fc8bdaeb836388b0b net-git-c0cd88-r8169-offical-fix-for-CVE-2009-4537-overlength-frame-DMAs.patch
+437317f88ec13ace8d39c31983a41696 0002-gre-fix-hard-header-destination-address-checking.patch
+151b29a161178ed39d62a08f21f3484d 0003-ip_gre-include-route-header_len-in-max_headroom-calc.patch
+776adeeb5272093574f8836c5037dd7d 0004-arp-flush-arp-cache-on-device-change.patch
+afa06334c81f21c20571286a83d3d928 0005-r8169-fix-broken-register-writes.patch
+c538c0f735d79fd71b47dde02bf1f790 0006-r8169-offical-fix-for-CVE-2009-4537-overlength-frame.patch
+5f8b9a76d95319c5b1aa26b54a42e6b5 0007-r8169-Fix-rtl8169_rx_interrupt.patch
+f878c802700e3babd03be3505119c5c2 0008-r8169-clean-up-my-printk-uglyness.patch
+cf168620efa63479a6e03da78906e32f 0009-ipsec-Fix-bogus-bundle-flowi.patch
+3af4b5ae1afae3278b0070f585b874e3 0010-xfrm-Remove-xfrm_state_genid.patch
+9f284c3fd5ab38cef4544efc1f50c6ba 0011-xfrm_user-verify-policy-direction-at-XFRM_MSG_POLEXP.patch
+b035114e893883cf67530350678e00f5 0012-xfrm-remove-policy-lock-when-accessing-policy-walk.d.patch
+9dea03ec19aaf9a384e4f56f57009257 0013-flow-structurize-flow-cache.patch
+fc9ab26abbfec0d3f20000b5e695620b 0014-flow-virtualize-flow-cache-entry-methods.patch
+c09b82b89a49ba2a3836a0bc3a3312f4 0015-xfrm-cache-bundles-instead-of-policies-for-outgoing-.patch
+41618efb65ab9ddacfb59a1cde9b4edd 0016-xfrm-remove-policy-garbage-collection.patch
+3b83f0972ab715819d1119b120a987e7 0017-flow-delayed-deletion-of-flow-cache-entries.patch
+45a676c7a1759fec60b724d557b4e295 0018-xfrm-Fix-crashes-in-xfrm_lookup.patch
bf15e3ee69e03319dab0d59e08b67195 kernelconfig.x86"
diff --git a/main/linux-pae/arp.patch b/main/linux-pae/arp.patch
deleted file mode 100644
index d2682690..00000000
--- a/main/linux-pae/arp.patch
+++ /dev/null
@@ -1,14 +0,0 @@
-diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
-index c95cd93..71ab56f 100644
---- a/net/ipv4/arp.c
-+++ b/net/ipv4/arp.c
-@@ -1200,6 +1200,9 @@ static int arp_netdev_event(struct notifier_block *this, unsigned long event, vo
- neigh_changeaddr(&arp_tbl, dev);
- rt_cache_flush(dev_net(dev), 0);
- break;
-+ case NETDEV_CHANGE:
-+ neigh_changeaddr(&arp_tbl, dev);
-+ break;
- default:
- break;
- }
diff --git a/main/linux-pae/ip_gre.patch b/main/linux-pae/ip_gre.patch
deleted file mode 100644
index ba5f19b3..00000000
--- a/main/linux-pae/ip_gre.patch
+++ /dev/null
@@ -1,15 +0,0 @@
---- a/net/ipv4/ip_gre.c.orig
-+++ b/net/ipv4/ip_gre.c
-@@ -1137,11 +1137,8 @@
-
- if (saddr)
- memcpy(&iph->saddr, saddr, 4);
--
-- if (daddr) {
-+ if (daddr)
- memcpy(&iph->daddr, daddr, 4);
-- return t->hlen;
-- }
- if (iph->daddr && !ipv4_is_multicast(iph->daddr))
- return t->hlen;
-
diff --git a/main/linux-pae/ip_gre2.patch b/main/linux-pae/ip_gre2.patch
deleted file mode 100644
index 52c44076..00000000
--- a/main/linux-pae/ip_gre2.patch
+++ /dev/null
@@ -1,17 +0,0 @@
---- linux-2.6.32/net/ipv4/ip_gre.c.orig
-+++ linux-2.6.32/net/ipv4/ip_gre.c
-@@ -803,11 +803,13 @@
- tunnel->err_count = 0;
- }
-
-- max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
-+ max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + rt->u.dst.header_len;
-
- if (skb_headroom(skb) < max_headroom || skb_shared(skb)||
- (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
- struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
-+ if (max_headroom > dev->needed_headroom)
-+ dev->needed_headroom = max_headroom;
- if (!new_skb) {
- ip_rt_put(rt);
- stats->tx_dropped++;
diff --git a/main/linux-pae/xfrm-cache-size-revert.patch b/main/linux-pae/xfrm-cache-size-revert.patch
deleted file mode 100644
index c8fcbd0d..00000000
--- a/main/linux-pae/xfrm-cache-size-revert.patch
+++ /dev/null
@@ -1,12 +0,0 @@
-diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
-index 74fb2eb..e158860 100644
---- a/net/ipv4/xfrm4_policy.c
-+++ b/net/ipv4/xfrm4_policy.c
-@@ -308,7 +308,6 @@ void __init xfrm4_init(int rt_max_size)
- * That will let us store an ipsec connection per route table entry,
- * and start cleaning when were 1/2 full
- */
-- xfrm4_dst_ops.gc_thresh = rt_max_size/2;
- #ifdef CONFIG_SYSCTL
- sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
- xfrm4_policy_table);