summaryrefslogtreecommitdiffstats
path: root/testing/ipt-netflow-grsec
diff options
context:
space:
mode:
authorNatanael Copa <ncopa@alpinelinux.org>2014-08-11 07:28:54 +0000
committerNatanael Copa <ncopa@alpinelinux.org>2014-08-11 07:34:22 +0000
commitee56bcc1215273fd67c646ac0ee241f1fe66f28f (patch)
tree9d1a43ec136a8201a3bdc94552617ab1240af6f4 /testing/ipt-netflow-grsec
parentf60c24a9b29efb20a2652a64aa6e03a885d9c701 (diff)
downloadaports-ee56bcc1215273fd67c646ac0ee241f1fe66f28f.tar.bz2
aports-ee56bcc1215273fd67c646ac0ee241f1fe66f28f.tar.xz
testing/ipt-netflow-grsec: rebuild against kernel 3.14.16-r0 and upgrade to 2.0
Diffstat (limited to 'testing/ipt-netflow-grsec')
-rw-r--r--testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch32
-rw-r--r--testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch28
-rw-r--r--testing/ipt-netflow-grsec/APKBUILD16
-rw-r--r--testing/ipt-netflow-grsec/git.patch4308
4 files changed, 6 insertions, 4378 deletions
diff --git a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch b/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch
deleted file mode 100644
index 99ebffe38..000000000
--- a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From b1588f736edbeeb2b3de6081e92dde8840cef66d Mon Sep 17 00:00:00 2001
-From: Natanael Copa <ncopa@alpinelinux.org>
-Date: Mon, 5 Aug 2013 12:56:19 +0000
-Subject: [PATCH] Fix compilation with 3.10.y kernel
-
-use proc_create instead of create_proc_entry
----
- ipt_NETFLOW.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c
-index 4e1b9f3..758bccc 100644
---- a/ipt_NETFLOW.c
-+++ b/ipt_NETFLOW.c
-@@ -1432,12 +1432,12 @@ static int __init ipt_netflow_init(void)
- }
-
- #ifdef CONFIG_PROC_FS
-- proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat));
-+ proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat),
-+ &nf_seq_fops);
- if (!proc_stat) {
- printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
- goto err_free_netflow_slab;
- }
-- proc_stat->proc_fops = &nf_seq_fops;
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
- proc_stat->owner = THIS_MODULE;
- #endif
---
-1.8.3.4
-
diff --git a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch b/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch
deleted file mode 100644
index f3cbfae42..000000000
--- a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 2f4b540bb2e60aca47263cf0fd2b3abc0a499d87 Mon Sep 17 00:00:00 2001
-From: Natanael Copa <ncopa@alpinelinux.org>
-Date: Tue, 14 May 2013 07:25:47 +0000
-Subject: [PATCH] Fix compilation with 3.9.y kernel
-
-use new hlist_for_each api
----
- ipt_NETFLOW.c | 4 +---
- 1 file changed, 1 insertion(+), 3 deletions(-)
-
-diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c
-index d4c91e1..4e1b9f3 100644
---- a/ipt_NETFLOW.c
-+++ b/ipt_NETFLOW.c
-@@ -854,9 +854,7 @@ static struct ipt_netflow *
- ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
- {
- struct ipt_netflow *nf;
-- struct hlist_node *pos;
--
-- hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
-+ hlist_for_each_entry(nf, &ipt_netflow_hash[hash], hlist) {
- if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
- nf->nr_bytes < FLOW_FULL_WATERMARK) {
- NETFLOW_STAT_INC(found);
---
-1.8.2.2
-
diff --git a/testing/ipt-netflow-grsec/APKBUILD b/testing/ipt-netflow-grsec/APKBUILD
index 03e4f3fae..fc3a3ec82 100644
--- a/testing/ipt-netflow-grsec/APKBUILD
+++ b/testing/ipt-netflow-grsec/APKBUILD
@@ -2,11 +2,11 @@
_flavor=grsec
_kpkg=linux-$_flavor
-_kver=3.14.15
-_kpkgrel=1
+_kver=3.14.16
+_kpkgrel=0
# when chaning _ver we *must* bump _mypkgrel
-_ver=1.8
+_ver=2.0
_mypkgrel=0
@@ -30,7 +30,6 @@ url="http://ipt-netflow.sourceforge.net/"
arch="all"
license=GPL3+
source="ipt-netflow-$_ver.tar.gz::https://github.com/aabc/ipt-netflow/archive/v$_ver.tar.gz
- git.patch
"
depends="$_kpkg-dev=$_kpkgver"
makedepends="linux-${_flavor}-dev=$_kpkgver iptables-dev bash"
@@ -64,9 +63,6 @@ dev() {
default_dev
}
-md5sums="922a7d9dd17f28a2c3551a1c5e9849eb ipt-netflow-1.8.tar.gz
-77875a5979589bca8c49a8331e7cc22b git.patch"
-sha256sums="eefe766eda6d4d918e4cc919b5d5acd2b681a258246017ab7948b80f0cb95112 ipt-netflow-1.8.tar.gz
-75ea3afe9532699d4fd1bef7f198dfaa97b3a310234a86b48371522ae0704a60 git.patch"
-sha512sums="c413515deeac1d604e36a53b39edcf340d6c3f78c29e53979fede8aa013e324ada4622b571ac5270f7495ab6982096cd2bd9e9283c9cc2cc7360aa5c3954792d ipt-netflow-1.8.tar.gz
-3fade2ceb00ddae15dd6b1eacebc36480efe0c5c2d050e050e0aef37382ad72ca454f4d23080b7263ab2f078f72db4572e2514d3faca113d8e437437f7c3afcf git.patch"
+md5sums="d1b40ee12b563edc4faae5e3234c3436 ipt-netflow-2.0.tar.gz"
+sha256sums="547ca6c2e8d82fc7d3d113d2ab3f602106d1efa7739167f8c7850a43e5bbe46e ipt-netflow-2.0.tar.gz"
+sha512sums="7bf330ac665180218ea4cc42fe6ac6d365b45632039ff9601559f7a4675738c409853433b15dd431dba68cfe36269d6992754154411e2761041fec021e19bf8d ipt-netflow-2.0.tar.gz"
diff --git a/testing/ipt-netflow-grsec/git.patch b/testing/ipt-netflow-grsec/git.patch
deleted file mode 100644
index faa49ffdc..000000000
--- a/testing/ipt-netflow-grsec/git.patch
+++ /dev/null
@@ -1,4308 +0,0 @@
-diff --git a/Makefile.in b/Makefile.in
-index 30ebbfe..0a82c67 100644
---- a/Makefile.in
-+++ b/Makefile.in
-@@ -1,43 +1,73 @@
--#
-+# Edit Makefile.in and run ./configure
-
- KVERSION = @KVERSION@
- KDIR = @KDIR@
-+KINSTDIR = $(shell dirname @KDIR@)
- IPTABLES_CFLAGS = @IPTABLES_CFLAGS@
- IPTABLES_MODULES = @IPTABLES_MODULES@
-+DEPMOD = depmod -a
-
-+# https://www.kernel.org/doc/Documentation/kbuild/modules.txt
-+# https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt
- obj-m = ipt_NETFLOW.o
-
--ipt_NETFLOW.ko: ipt_NETFLOW.c ipt_NETFLOW.h
-+all: ipt_NETFLOW.ko libipt_NETFLOW.so libip6t_NETFLOW.so
-+ipt_NETFLOW.ko: version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile
- @echo Compiling for kernel $(KVERSION)
- make -C $(KDIR) M=$(CURDIR) modules
--all: ipt_NETFLOW.ko libipt_NETFLOW.so
--minstall:
-- make -C $(KDIR) M=$(CURDIR) modules_install
-+ @touch $@
-+sparse: | version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile
-+ @rm -f ipt_NETFLOW.ko ipt_NETFLOW.o
-+ @echo Compiling for kernel $(KVERSION)
-+ make -C $(KDIR) M=$(CURDIR) modules C=1
-+ @touch ipt_NETFLOW.ko
-+minstall: | ipt_NETFLOW.ko
-+ make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR)
-+ $(DEPMOD)
- mclean:
- make -C $(KDIR) M=$(CURDIR) clean
- lclean:
- -rm -f *.so *_sh.o
- clean: mclean lclean
-- -rm -f *.so *.o modules.order
-+ -rm -f *.so *.o modules.order version.h
-+
-+%_sh.o: libipt_NETFLOW.c
-+ gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c
-+
-+%.so: %_sh.o
-+ gcc -shared -o $@ $<
-
--libipt_NETFLOW.so: libipt_NETFLOW.c
-- gcc -O2 -Wall -Wunused -I$(KDIR)/include $(IPTABLES_CFLAGS) -fPIC -o libipt_NETFLOW_sh.o -c libipt_NETFLOW.c
-- gcc -shared -o libipt_NETFLOW.so libipt_NETFLOW_sh.o
-+version.h: ipt_NETFLOW.c ipt_NETFLOW.h Makefile
-+ @if [ -d .git ] && type git >/dev/null 2>&1; then \
-+ echo "#define GITVERSION \"`git describe --dirty`\""; \
-+ fi > version.h
-
--linstall: ipt_NETFLOW.ko libipt_NETFLOW.so
-- cp -a libipt_NETFLOW.so $(IPTABLES_MODULES)
-+linstall: | libipt_NETFLOW.so libip6t_NETFLOW.so
-+ install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
-+ install -D libip6t_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so
-
- install: minstall linstall
-
-+uninstall:
-+ -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
-+ -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so
-+ -rm -f $(DESTDIR)$(KINSTDIR)/extra/ipt_NETFLOW.ko
-+
- Makefile: Makefile.in configure
- ./configure --make
-
- load: all
-- insmod ipt_NETFLOW.ko active_timeout=5
-- iptables -A OUTPUT -d 0/0 -j NETFLOW
-- iptables -A INPUT -d 0/0 -j NETFLOW
-+ -insmod ipt_NETFLOW.ko active_timeout=5 protocol=9
-+ -iptables -I OUTPUT -j NETFLOW
-+ -iptables -I INPUT -j NETFLOW
-+ -ip6tables -I OUTPUT -j NETFLOW
-+ -ip6tables -I INPUT -j NETFLOW
-
- unload:
-- iptables -D OUTPUT -d 0/0 -j NETFLOW
-- iptables -D INPUT -d 0/0 -j NETFLOW
-- rmmod ipt_NETFLOW.ko
-+ -iptables -D OUTPUT -j NETFLOW
-+ -iptables -D INPUT -j NETFLOW
-+ -ip6tables -D OUTPUT -j NETFLOW
-+ -ip6tables -D INPUT -j NETFLOW
-+ -rmmod ipt_NETFLOW.ko
-+
-+reload: unload load
-diff --git a/README b/README
-index 213f02c..56a4fde 100644
---- a/README
-+++ b/README
-@@ -1,10 +1,17 @@
--ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008
-+ipt_NETFLOW linux 2.6.x-3.x kernel module by <abc@telekom.ru> -- 2008-2013.
-+
-+ High performance NetFlow v5, v9, IPFIX flow data export module for Linux
-+ kernel. Supporting IPv4 and IPv6. Created to be useful for highly loaded
-+ linux router. It should be used as iptables target. Also can export NAT
-+ translation events using NetFlow Event Logging (NEL) for v9, IPFIX, or
-+ specially crafted v5 flows.
-+
-
- ============================
- = OBTAINING LATEST VERSION =
- ============================
-
-- $ git clone git://ipt-netflow.git.sourceforge.net/gitroot/ipt-netflow/ipt-netflow
-+ $ git clone git://git.code.sf.net/p/ipt-netflow/code ipt-netflow
- $ cd ipt-netflow
-
-
-@@ -12,94 +19,220 @@ ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008
- = INSTALLATION =
- ================
-
--1. Besides kernel you will need iptables/netfilter source matching your
-- installation or just fresh install from there: ftp://ftp.netfilter.org/pub/iptables/snapshot/
-- I have this: ftp://ftp.netfilter.org/pub/iptables/snapshot/iptables-1.3.7-20070329.tar.bz2
-- Unpack it somewhere and build with make.
-+ Four easy steps.
-+
-+** 1. Prepare Kernel source
-+
-+ If you have package system install kernel-devel package, otherwise install
-+ raw kernel source from http://kernel.org matching _exactly_ version of your
-+ installed kernel.
-+
-+ a) What to do for Centos:
-+
-+ ~# yum install kernel-devel
-+
-+ b) What to do for Debian:
-+
-+ ~# apt-get install module-assistant
-+ ~# m-a prepare
-+
-+ c) Otherwise, if you downloaded raw kernel sources don't forget to create
-+ .config by copying it from your distribution's kernel. Its copy could reside
-+ in /boot or sometimes in /proc, examples:
-+
-+ kernel-src-dir/# cp /boot/config-`uname -r` .config
-+ or
-+ kernel-src-dir/# zcat /proc/config.gz > .config
-+
-+ Assuming you unpacked kernel source into `kernel-src-dir/' directory.
-+ Then run:
-+
-+ kernel-src-dir/# make oldconfig
-+
-+ After that you'll need to prepare kernel for modules build:
-+
-+ kernel-src-dir/# make prepare modules_prepare
-+
-+ Note: Don't try to `make prepare' in Centos kernel-devel package directory
-+ (which is usually something like /usr/src/kernels/2.6.32-431.el6.x86_64)
-+ as this is wrong and meaningless.
-+
-+** 2. Prepare Iptables
-+
-+ Before this step it also would be useful to install pkg-config if don't
-+ already have.
-+
-+ If you have package system just install iptables-devel (or iptables-dev)
-+ package, otherwise install iptables source matching version of your
-+ installation from ftp://ftp.netfilter.org/pub/iptables/
-+
-+ a) What to do for Centos:
-+
-+ # yum install iptables-devel
-+
-+ b) What to do for Debian:
-+
-+ # apt-get install iptables-dev pkg-config
-
--2. Run ./configure script and it will create Makefile
-+ c) Otherwise, for raw iptables source build it and make install.
-
--3. make all install; depmod
-- This will install kernel module and iptable specific library.
-+** 3. Now, to actually build the module run:
-
--Troubleshooting:
-- 1) Sometimes you will want to add CC=gcc-3 to make command.
-- Example: make CC=gcc-3.3
-+ ~/ipt-netflow# ./configure
-+ ~/ipt-netflow# make all install
-+ ~/ipt-netflow# depmod
-
-- 2) Compile module with actual kernel source compiled.
-- I.e. first compile kernel and boot into it, and then compile module.
-+ This will install kernel module and iptables specific library.
-
-- 3) For autoloading module after reboot: set net.netflow.destination (or load
-- module, if idestination set on load) after interfaces are up. Becasue module
-- needs exporting interface (usually lo) to establish export connection.
-+ Troubleshooting:
-
--4. After this point you should be able to load module
-- and use -j NETFLOW target in your iptables. See next section.
-+ a) Sometimes you will want to add CC=gcc-3 to make command.
-+ Example: make CC=gcc-3.3
-+
-+ b) Compile module with actual kernel source compiled.
-+ I.e. first compile kernel and boot into it, and then compile module.
-+ If you are using kernel-devel package check that its version matches
-+ your kernel package.
-+
-+ c) If you have sources in non-standard places or configure isn't able to
-+ find something run ./configure --help to see how to specify paths manually.
-+
-+** 4. After this point you should be able to load module and
-+ use -j NETFLOW target in your iptables. See next section.
-
-
- ===========
- = RUNNING =
- ===========
-
--1. You can load module by insmod like this:
-- # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1
-+1. You can load module directly by insmod like this:
-+
-+ # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1
-
- Or if properly installed (make install; depmod) by this:
-- # modprobe ipt_NETFLOW destination=127.0.0.1:2055
-+
-+ # modprobe ipt_NETFLOW destination=127.0.0.1:2055
-
- See, you may add options in insmod/modprobe command line, or add
-- them in /etc/ to modules.conf or modprobe.conf like thus:
-- options ipt_NETFLOW destination=127.0.0.1:2055
-+ them in /etc/modprobe.conf or /etc/modprobe.d/ipt_NETFLOW.conf
-+ like thus:
-+
-+ options ipt_NETFLOW destination=127.0.0.1:2055 protocol=9 natevents=1
-
- 2. Statistics is in /proc/net/stat/ipt_netflow
-- To view slab statistics: grep ipt_netflow /proc/slabinfo
-+ To view boring slab statistics: grep ipt_netflow /proc/slabinfo
-
- 3. You can view parameters and control them via sysctl, example:
-- # sysctl -w net.netflow.hashsize=32768
-
--4. Example of directing all traffic into module:
-- # iptables -A FORWARD -j NETFLOW
-- # iptables -A INPUT -j NETFLOW
-- # iptables -A OUTPUT -j NETFLOW
-+ # sysctl net.netflow
-+ # sysctl net.netflow.hashsize=32768
-+
-+ Note: For after-reboot configuration I recommend to store module parameters
-+ in modprobe configs instead of storing them in /etc/sysctl.conf, as it's
-+ less clear when init process will apply sysctl.conf, before of after
-+ module's load.
-+
-+4. Example of directing all IPv4 traffic into the module:
-+
-+ # iptables -I FORWARD -j NETFLOW
-+ # iptables -I INPUT -j NETFLOW
-+ # iptables -I OUTPUT -j NETFLOW
-+
-+ Note: It is preferable (because easier to understand) to _insert_
-+ NETFLOW target at the top of the chain, otherwise not all traffic may
-+ reach NETFLOW if your iptables configuration is complicated and some
-+ other rule inadvertently consume the traffic (dropping or acepting before
-+ NETFLOW is reached). It's always good to test your configuration.
-+ Use iptables -L -nvx to check pkts/bytes counters on the rules.
-+
-+5. If you want to account IPv6 traffic you should use protocol 9 or 10.
-+ Example of directing all IPv6 traffic into the module:
-
-+ # sysctl net.netflow.protocol=10
-+ # ip6tables -I FORWARD -j NETFLOW
-+ # ip6tables -I INPUT -j NETFLOW
-+ # ip6tables -I OUTPUT -j NETFLOW
-+
-+ Note: First enable right version of protocol and after that add ip6tables
-+ rules, otherwise you will get errors in dmesg.
-+
-+6. If you want to account NAT events (NEL):
-+
-+ # sysctl net.netflow.natevents=1
-+
-+ Note that natevents feature is completely independent from traffic accounting
-+ (it's using so called conntrack events), thus you don't need to set or change
-+ any iptables rules to use that. You may need to enable kernel config option
-+ CONFIG_NF_CONNTRACK_EVENTS though (if it isn't already enabled).
-+ For details on how they are exported for different protocol versions see
-+ below.
-
- ===========
- = OPTIONS =
- ===========
-
-+ protocol=5
-+ - what version of NetFlow protocol to use. Default is 5.
-+ You can choose from 5, 9, or 10 (where 10 is IPFIX). If you plan
-+ to account IPv6 traffic you should use protocol 9 or 10 (IPFIX),
-+ because NetFlow v5 isn't compatible with IPv6.
-+
- destination=127.0.0.1:2055
- - where to export netflow, to this ip address
- You will see this connection in netstat like this:
- udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED
-
- destination=127.0.0.1:2055,192.0.0.1:2055
-- - mirror flows to two (can be more) addresses,
-- separate addresses with comma.
-+ - mirror flows to two (can be more) addresses, separate addresses
-+ with comma.
-+
-+ natevents=1
-+ - Collect and send NAT translation events as NetFlow Event Logging (NEL)
-+ for NetFlow v9/IPFIX, or as dummy flows compatible with NetFlow v5.
-+ Default is 0 (don't send).
-+
-+ For NetFlow v5 protocol meaning of fields in dummy flows are such:
-+ Src IP, Src Port is Pre-nat source address.
-+ Dst IP, Dst Port is Post-nat destination address.
-+ - These two fields made equal to data flows catched in FORWARD chain.
-+ Nexthop, Src AS is Post-nat source address for SNAT. Or,
-+ Nexthop, Dst AS is Pre-nat destination address for DNAT.
-+ TCP Flags is SYN+SCK for start event, RST+FIN for stop event.
-+ Pkt/Traffic size is 0 (zero), so it won't interfere with accounting.
-
- inactive_timeout=15
- - export flow after it's inactive 15 seconds. Default value is 15.
-
- active_timeout=1800
-- - export flow after it's active 1800 seconds (30 minutes). Default value is 1800.
-+ - export flow after it's active 1800 seconds (30 minutes). Default valuae
-+ is 1800.
-+
-+ refresh-rate=20
-+ - for NetFlow v9 and IPFIX it's rate how frequently to re-send templates
-+ (per packets). You probably don't need to change default (which is 20).
-+
-+ timeout-rate=30
-+ - for NetFlow v9 and IPFIX it's rate when to re-send old templates (in
-+ minutes). No need to change it.
-
- debug=0
- - debug level (none).
-
- sndbuf=number
-- - size of output socket buffer in bytes. Recommend you to put
-+ - size of output socket buffer in bytes. I recommend you to put
- higher value if you experience netflow packet drops (can be
- seen in statistics as 'sock: fail' number.)
- Default value is system default.
-
- hashsize=number
- - Hash table bucket size. Used for performance tuning.
-- Abstractly speaking, it should be two times bigger than flows
-+ Abstractly speaking, it should be minimum two times bigger than flows
- you usually have, but not need to.
- Default is system memory dependent small enough value.
-
- maxflows=2000000
-- - Maximum number of flows to account. It's here to prevent DOS attacks. After
-- this limit reached new flows will not be accounted. Default is
-+ - Maximum number of flows to account. It's here to prevent DOS attacks.
-+ After this limit reached new flows will not be accounted. Default is
- 2000000, zero is unlimited.
-
- aggregation=string..
-@@ -130,14 +263,15 @@ Troubleshooting:
- = HOW TO READ STAT =
- ====================
-
-- Statistics is your friend to fine tune and understand netflow module performance.
-+ Statistics is your friend to fine tune and understand netflow module
-+ performance.
-
- To see stat:
- # cat /proc/net/stat/ipt_netflow
-
- How to interpret the data:
-
--> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K
-+> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K, worker delay 100/1000.
-
- active X: currently active flows in memory cache.
- - for optimum CPU performance it is recommended to set hash table size to
-@@ -146,8 +280,9 @@ Troubleshooting:
- mem XK: how much kilobytes of memory currently taken by active flows.
- - one active flow taking 56 bytes of memory.
- - there is system limit on cache size too.
-+ worker delay X/HZ: how frequently exporter scan flows table per second.
-
--> Hash: size 8192 (mem 32K), metric 1.0, 1.0, 1.0, 1.0. MemTraf: 1420 pkt, 364 K (pdu 0, 0).
-+> Hash: size 8192 (mem 32K), metric 1.00, [1.00, 1.00, 1.00]. MemTraf: 1420 pkt, 364 K (pdu 0, 0).
-
- Hash: size X: current hash size/limit.
- - you can control this by sysctl net.netflow.hashsize variable.
-@@ -156,18 +291,22 @@ Troubleshooting:
- - optimal value is twice of average of active flows.
- mem XK: how much memory occupied by hash table.
- - hash table is fixed size by nature, taking 4 bytes per entry.
-- metric X, X, X, X: how optimal is your hash table being used.
-+ metric X, [X, X, X]: how optimal is your hash table being used.
- - lesser value mean more optimal hash table use, min is 1.0.
-- - this is moving average (EWMA) of hash table access divided
-- by match rate (searches / matches) for 4sec, and 1, 5, 15 minutes.
-- Sort of hash table load average.
-+ - last three numbers in squares is moving average (EWMA) of hash table
-+ access divided by match rate (searches / matches) for 4sec, and 1, 5, and
-+ 15 minutes. Sort of hash table load average. First value is instantaneous.
-+ You can try to increase hashsize if averages more than 1 (increase
-+ certainly if >= 2).
- MemTraf: X pkt, X K: how much traffic accounted for flows that are in memory.
- - these flows that are residing in internal hash table.
- pdu X, X: how much traffic in flows preparing to be exported.
- - it is included already in aforementioned MemTraf total.
-
--> Timeout: active 1800, inactive 15. Maxflows 2000000
-+> Protocol version 10 (ipfix), refresh-rate 20, timeout-rate 30, (templates 2, active 2). Timeouts: active 5, inactive 15. Maxflows 2000000
-
-+ Protocol version currently in use. Refresh-rate and timeout-rate
-+ for v9 and IPFIX. Total templates generated and currently active.
- Timeout: active X: how much seconds to wait before exporting active flow.
- - same as sysctl net.netflow.active_timeout variable.
- inactive X: how much seconds to wait before exporting inactive flow.
-@@ -180,20 +319,22 @@ Troubleshooting:
-
- - Module throughput values for 1 second, 1 minute, and 5 minutes.
-
--> cpu# stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>
--> cpu0 stat: 980540 10473 180600, 0 0 0 0, sock: 4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K
-+> cpu# stat: <search found new [metric], trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>
-+> cpu0 stat: 980540 10473 180600 [1.03], 0 0 0 0, sock: 4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K
-
- cpu#: this is Total and per CPU statistics for:
- stat: <search found new, trunc frag alloc maxflows>: internal stat for:
- search found new: hash table searched, found, and not found counters.
-- trunc: how much truncated packets is ignored
-+ [metric]: average hash metric since module load.
-+ trunc: how much truncated packets are ignored
- - these are that possible don't have valid IP header.
- - accounted in drop packets counter but not in drop bytes.
- frag: how much fragmented packets have seen.
- - kernel always defragments INPUT/OUTPUT chains for us.
- - these packets are not ignored but not reassembled either, so:
-- - if there is no enough data in fragment (ex. tcp ports) it is considered zero.
-- alloc: how much cache memory allocations is failed.
-+ - if there is no enough data in fragment (ex. tcp ports) it is considered
-+ zero.
-+ alloc: how much cache memory allocations are failed.
- - packets ignored and accounted in drop stat.
- - probably increase system memory if this ever happen.
- maxflows: how much packets ignored on maxflows (maximum active flows reached).
-@@ -203,7 +344,8 @@ Troubleshooting:
- sock: <ok fail cberr, bytes>: table of exporting stats for:
- ok: how much Netflow PDUs are exported (i.e. UDP packets sent by module).
- fail: how much socket errors (i.e. packets failed to be sent).
-- - packets dropped and their internal statistics cumulatively accounted in drop stat.
-+ - packets dropped and their internal statistics cumulatively accounted in
-+ drop stat.
- cberr: how much connection refused ICMP errors we got from export target.
- - probably you not launched collector software on destination,
- - or specified wrong destination address.
-@@ -225,20 +367,34 @@ Troubleshooting:
- packet is for new flow but maxflows is already reached,
- all flows in export packets that got socket error.
-
--> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, other 0
-+> Natevents disabled, count start 0, stop 0.
-+
-+ - Natevents mode disabled or enabled, and how much start or stop events
-+ are reported.
-+
-+> sock0: 10.0.0.2:2055 unconnected (1 attempts).
-+
-+ If socket is unconnected (for example if module loaded before interfaces is
-+ up) it shows now much connection attempts was failed. It will try to connect
-+ until success.
-+
-+> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, connect 0, other 0
-
- sockX: per destination stats for:
- X.X.X.X:Y: destination ip address and port.
- - controlled by sysctl net.netflow.destination variable.
- sndbuf X: how much data socket can hold in buffers.
- - controlled by sysctl net.netflow.sndbuf variable.
-- - if you have packet drops due to sndbuf reached (error -11) increase this value.
-+ - if you have packet drops due to sndbuf reached (error -11) increase this
-+ value.
- filled X: how much data in socket buffers right now.
- peak X: peak value of how much data in socket buffers was.
- - you will be interested to keep it below sndbuf value.
- err: how much packets are dropped due to errors.
- - all flows from them will be accounted in drop stat.
-- sndbuf reached X: how much packets dropped due to sndbuf being too small (error -11).
-+ sndbuf reached X: how much packets dropped due to sndbuf being too small
-+ (error -11).
-+ connect X: how much connection attempts was failed.
- other X: dropped due to other possible errors.
-
- > aggr0: ...
-diff --git a/README.promisc b/README.promisc
-index 60ca922..31d774f 100644
---- a/README.promisc
-+++ b/README.promisc
-@@ -2,9 +2,14 @@ Hello,
-
- If you wish to account with netflow module traffic mirrored on switch you may follow this example:
-
--**************
--* Solution 1 *
--**************
-+
-+ Solution 1: General kernel patch.
-+ Solution 2: Alternative w/o kernel patch.
-+
-+
-+ **************
-+ * Solution 1 *
-+ **************
-
- 1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic.
-
-@@ -33,17 +38,7 @@ If you wish to account with netflow module traffic mirrored on switch you may fo
- # /sbin/vconfig add eth1 47
- # /sbin/ifconfig eth1.47 up
-
--5. Recompile ipt_netflow module with #define RAW_PROMISC_HACK uncommented:
--
-- Find this line in ipt_NETFLOW.c (should be line 7):
--
--//#define RAW_PROMISC_HACK
--
-- And remove two slashes at beginning of the line, so it become like this:
--
--#define RAW_PROMISC_HACK
--
-- Re-compile module:
-+5. Compile module:
-
- # make clean all install
-
-@@ -55,13 +50,14 @@ If you wish to account with netflow module traffic mirrored on switch you may fo
-
- # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW
-
--
- Voila.
-
-+ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch
-
--**************
--* Solution 2 *
--**************
-+
-+ **************
-+ * Solution 2 *
-+ **************
-
- By Anonymous.
-
-@@ -81,4 +77,3 @@ Sometimes you may need to run:
-
- for this scheme to work.
-
--
-diff --git a/configure b/configure
-index 677dd7f..3f10e2a 100755
---- a/configure
-+++ b/configure
-@@ -3,7 +3,7 @@
- PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin
-
- error() {
-- echo "! Error: $@"
-+ echo -e "! Error: $@"
- exit 1
- }
-
-@@ -56,19 +56,20 @@ get_lib_from_lib() {
- }
-
- iptables_inc() {
-- echo -n "Iptables include path: "
-+ echo -n "Iptables include flags: "
- if [ "$IPTINC" ]; then
-- echo "$IPTINC (user specified)"
- IPTINC="-I$IPTINC"
-+ echo "$IPTINC (user specified)"
-+ elif [ "$PKGVER" ]; then
-+ IPTINC="$PKGINC"
-+ echo "$IPTINC (pkg-config)"
-+ elif [ "$NOIPTSRC" ]; then
-+ IPTINC=
-+ echo "none (default)"
- else
-- if [ "$PKGINC" ]; then
-- IPTINC="$PKGINC"
-- echo "$IPTINC (pkg-config)"
-- else
-- IPTINC="$IPTSRC/include"
-- echo "$IPTINC (from source)"
-- IPTINC="-I$IPTINC"
-- fi
-+ IPTINC="$IPTSRC/include"
-+ IPTINC="-I$IPTINC"
-+ echo "$IPTINC (from source)"
- fi
- }
-
-@@ -109,7 +110,16 @@ try_dir2() {
- test -d "$1" && try_dir `dirname $1` && return 0
- }
-
--iptables_ver() {
-+check_pkg_config() {
-+ test "$PKGWARN" && return 1
-+ if ! which pkg-config >/dev/null 2>&1; then
-+ echo "! You don't have pkg-config, it may be useful to install it."
-+ PKGWARN=1
-+ return 1
-+ fi
-+ return 0
-+}
-+iptables_find_version() {
- echo -n "Iptables binary version: "
- if [ "$IPTVER" ]; then
- echo "$IPTVER (user specified)"
-@@ -121,6 +131,7 @@ iptables_ver() {
- else
- echo "no iptables binary found"
- fi
-+ check_pkg_config
- PKGVER=`pkg-config --modversion xtables 2>/dev/null`
- if [ "$PKGVER" ]; then
- IPTVER="$PKGVER"
-@@ -131,44 +142,90 @@ iptables_ver() {
- fi
- }
-
--iptables_dir() {
-- test "$IPTINC" && return 1
-- test "$PKGINC" && return 1
--
-- VER="iptables-$IPTVER"
-- if [ "$IPTSRC" ]; then
-- echo "User specified source directory: $IPTSRC"
-- try_dir $IPTSRC || error "Specified directory is not iptables source.."
-+compile_libitp_test() {
-+ echo -n "Checking for presence of $@... "
-+ echo "
-+#define __EXPORTED_HEADERS__
-+#include <$*>" > test.c
-+ gcc -c test.c >/dev/null 2>&1
-+ RET=$?
-+ if [ $RET = 0 ]; then
-+ echo Yes;
- else
-- echo "Searching for $VER sources.."
-- try_dir "./$VER" && return 0
-- try_dir "../$VER" && return 0
-- try_dir "/usr/src/$VER" && return 0
-- try_dirg "iptables" && return 0
-- try_dirg "../iptables" && return 0
-- try_dirg "/usr/src/iptables" && return 0
-- try_dir2 `locate $VER/extensions | head -1` && return 0
-- error "Can not find iptables source directory, try setting it with --ipt-src="
-+ echo No;
- fi
-+ rm -f test.c test.o
-+ return $RET
- }
-
--iptables_pkg_config() {
-+iptables_try_pkgconfig() {
- if [ ! "$PKGVER" ]; then
-+ check_pkg_config
-+ PKGVER=`pkg-config --modversion xtables 2>/dev/null`
-+ TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null`
- echo -n "pkg-config for version $IPTVER exists: "
-- PKGVER=`pkg-config --exact-version=$IPTVER --modversion xtables 2>/dev/null`
-+ pkg-config --exact-version=$IPTVER xtables 2>/dev/null
- if [ $? = 0 ]; then
- echo "Yes"
-+ PKGVER=$TRYPKGVER
- else
-- echo "No (reported: $PKGVER)"
-- unset PKGVER
-+ if [ "$TRYPKGVER" ]; then
-+ echo "No (reported: $TRYPKGVER)"
-+ else
-+ echo "No"
-+ fi
- fi
- fi
- if [ "$PKGVER" ]; then
-+ check_pkg_config
-+ PKGVER=`pkg-config --modversion xtables 2>/dev/null`
- PKGINC=`pkg-config --cflags xtables`
- PKGLIB=`pkg-config --variable=xtlibdir xtables`
-- IPTCFLAGS="-DXTABLES"
- else
-- IPTCFLAGS="-DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
-+ # Newer versions of iptables should not have -I/kernel/include!
-+ # So I assume that newer version will have correct pkg-config set up
-+ # and if not, then it's older who need it.
-+ IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
-+ fi
-+ if compile_libitp_test xtables.h; then
-+ IPTCFLAGS="-DXTABLES $IPTCFLAGS"
-+ elif ! compile_libitp_test iptables.h; then
-+ echo "! Iptables headers not found. You may need to specify --ipt-inc=..."
-+ if [ -s /etc/debian_version ]; then
-+ echo "! "
-+ echo "! Under Debian simply run this:"
-+ echo "! root# apt-get install iptables-dev pkg-config"
-+ elif [ -s /etc/redhat-release ]; then
-+ echo "! "
-+ arch=.`uname -m`
-+ echo "! Under Centos simply run this:"
-+ echo "! root# yum install iptables-devel$arch pkgconfig"
-+ fi
-+ exit 1
-+ fi
-+
-+}
-+
-+iptables_find_src() {
-+ test "$IPTINC" && return 1
-+ test "$PKGVER" && return 1
-+
-+ VER="iptables-$IPTVER"
-+ if [ "$IPTSRC" ]; then
-+ echo "User specified source directory: $IPTSRC"
-+ try_dir $IPTSRC || error "Specified directory is not iptables source.."
-+ else
-+ echo "Searching for $VER sources.."
-+ try_dir "./$VER" && return 0
-+ try_dir "../$VER" && return 0
-+ try_dir "/usr/src/$VER" && return 0
-+ try_dirg "iptables" && return 0
-+ try_dirg "../iptables" && return 0
-+ try_dirg "/usr/src/iptables" && return 0
-+ try_dir2 `locate $VER/extensions 2>/dev/null | head -1` && return 0
-+ echo "! Can not find iptables source directory, you may try setting it with --ipt-src="
-+ echo "! This is not fatal error, yet. Will be just using default include dir."
-+ NOIPTSRC=1
- fi
- }
-
-@@ -206,18 +263,110 @@ do
- esac
- done
-
--test "$KVERSION" || KVERSION=`uname -r`
--echo Kernel version: $KVERSION
-+kernel_find_version() {
-+ KHOW=requested
-+ test "$KVERSION" && return 0
-+
-+ if grep -q '#.*Debian' /proc/version; then
-+ KHOW=proc
-+ KVERSION=`sed -n 's/.*#.*Debian \([0-9\.]\+\)-.*/\1/p' /proc/version`
-+ KLIBMOD=`uname -r`
-+ else
-+ KHOW=uname
-+ KVERSION=`uname -r`
-+ fi
-+ test "$KDIR" || return 0
-+
-+ test -s $KDIR/Makefile || return 1
-+ test -s $KDIR/include/config/kernel.release || return 1
-+ KVERSION=`cat $KDIR/include/config/kernel.release`
-+ KHOW=sources
-+}
-+
-+kernel_check_src() {
-+ if [ -s "$1/Makefile" ]; then
-+ KDIR="$1"
-+ return 0
-+ fi
-+ return 1
-+}
-+
-+kernel_find_source() {
-+ KSHOW=requested
-+ test "$KDIR" && return 0
-+ KSHOW=found
-+ kernel_check_src /lib/modules/$KLIBMOD/build && return 0
-+ kernel_check_src /lib/modules/$KVERSION/build && return 0
-+ kernel_check_src /usr/src/kernels/$KVERSION && return 0
-+ kernel_check_src /usr/src/linux-$KVERSION && return 0
-+ echo "! Linux source not found. Don't panic. You may specify kernel source"
-+ echo "! directory with --kdir=..., or try to install kernel-devel package,"
-+ echo "! or just raw sources for linux-$KVERSION from kernel.org."
-+ if grep -q -i centos /proc/version 2>/dev/null; then
-+ echo "! "
-+ arch=.`uname -m`
-+ echo "! Under Centos simply run this:"
-+ echo "! root# yum install kernel-devel iptables-devel$arch pkgconfig"
-+ fi
-+ if grep -q -i debian /proc/version 2>/dev/null; then
-+ echo "! "
-+ echo "! Under Debian simply run this:"
-+ echo "! root# apt-get install module-assistant iptables-dev pkg-config"
-+ echo "! root# m-a prepare"
-+ fi
-+ exit 1
-+}
-+
-+kernel_check_consistency() {
-+ if test -s $KDIR/include/config/kernel.release; then
-+ SRCVER=`cat $KDIR/include/config/kernel.release`
-+ test "$KVERSION" != "$SRCVER" && error "$KHOW kernel version ($KVERSION) and $KSHOW version of kernel source ($SRCVER) doesn't match!\n!" \
-+ "You may try to specify only kernel source tree with --kdir=$KDIR\n!" \
-+ "and configure will pick up version properly."
-+ else
-+ test -e "$KDIR/.config" || error ".config in kernel source not found, run make menuconfig in $KDIR"
-+ test -d "$KDIR/include/config" || error "kernel is not prepared, run make prepare modules_prepare in $KDIR"
-+ fi
-+}
-+
-+kconfig() {
-+ KCONFIG=$KDIR/.config
-+ if ! grep -q "^$1=" $KCONFIG 2>/dev/null; then
-+ if [ "$KCONFIGREPORTED" != true ]; then
-+ KCONFIGREPORTED=true
-+ echo Kernel config file checked: $KCONFIG
-+ echo
-+ fi
-+ echo "! Attention: $1 is undefined in your kernel configuration"
-+ echo "! Without this option enabled $2 will not work."
-+ echo
-+ fi
-+}
-+
-+kernel_check_config() {
-+ kconfig CONFIG_SYSCTL "sysctl interface"
-+ kconfig CONFIG_PROC_FS "proc interface"
-+ kconfig CONFIG_NF_NAT_NEEDED "natevents"
-+ kconfig CONFIG_NF_CONNTRACK_EVENTS "natevents"
-+ kconfig CONFIG_NF_CONNTRACK_MARK "connmark tracking"
-+ kconfig CONFIG_IPV6 "IPv6"
-+ kconfig CONFIG_IP6_NF_IPTABLES "ip6tables target"
-+}
-
--test "$KDIR" || KDIR=/lib/modules/$KVERSION/build
--echo Kernel sources: $KDIR
-+kernel_find_version #KVERSION
-+test "$KLIBMOD" || KLIBMOD=$KVERSION
-+echo "Kernel version: $KVERSION ($KHOW)"
-+kernel_find_source #KDIR
-+echo "Kernel sources: $KDIR ($KSHOW)"
-+kernel_check_consistency
-+kernel_check_config
-
- test "$IPTBIN" || IPTBIN=`which iptables`
-
--iptables_ver #IPTVER
--iptables_pkg_config
--iptables_dir #IPTSRC
--iptables_src_version #check IPTSRC match to IPTVER
-+iptables_find_version #IPTVER
-+iptables_try_pkgconfig #try to configure from pkg-config
-+iptables_find_src #IPTSRC
-+iptables_src_version #check that IPTSRC match to IPTVER
- iptables_inc #IPTINC
- iptables_modules #IPTLIB
-
-@@ -225,7 +374,6 @@ REPLACE="\
- s!@KVERSION@!$KVERSION!;\
- s!@KDIR@!$KDIR!;\
- s!@IPTABLES_VERSION@!$IPTVER!;\
--s!@IPTABLES_INCLUDES@!$IPTINC!;\
- s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\
- s!@IPTABLES_MODULES@!$IPTLIB!"
-
-diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c
-index d4c91e1..ad974c5 100644
---- a/ipt_NETFLOW.c
-+++ b/ipt_NETFLOW.c
-@@ -1,6 +1,6 @@
- /*
- * This is NetFlow exporting module (NETFLOW target) for linux
-- * (c) 2008-2012 <abc@telekom.ru>
-+ * (c) 2008-2013 <abc@telekom.ru>
- *
- *
- * This program is free software: you can redistribute it and/or modify
-@@ -18,8 +18,6 @@
- *
- */
-
--//#define RAW_PROMISC_HACK
--
- #include <linux/module.h>
- #include <linux/skbuff.h>
- #include <linux/proc_fs.h>
-@@ -31,16 +29,26 @@
- #include <linux/icmp.h>
- #include <linux/igmp.h>
- #include <linux/inetdevice.h>
--#include <linux/jhash.h>
-+#include <linux/hash.h>
-+#include <linux/delay.h>
-+#include <linux/spinlock_types.h>
- #include <net/icmp.h>
- #include <net/ip.h>
-+#include <net/ipv6.h>
- #include <net/tcp.h>
- #include <net/route.h>
-+#include <net/ip6_fib.h>
- #include <net/dst.h>
- #include <linux/netfilter_ipv4/ip_tables.h>
-+#if defined(CONFIG_NF_NAT_NEEDED) || defined(CONFIG_NF_CONNTRACK_MARK)
-+#include <linux/notifier.h>
-+#include <net/netfilter/nf_conntrack.h>
-+#include <net/netfilter/nf_conntrack_core.h>
-+#endif
- #include <linux/version.h>
- #include <asm/unaligned.h>
- #include "ipt_NETFLOW.h"
-+#include "murmur3.h"
- #ifdef CONFIG_BRIDGE_NETFILTER
- #include <linux/netfilter_bridge.h>
- #endif
-@@ -74,41 +82,66 @@
- #define ipt_target xt_target
- #endif
-
--#define IPT_NETFLOW_VERSION "1.8"
-+#define IPT_NETFLOW_VERSION "1.8.2" /* Note that if you are using git, you
-+ will see version in other format. */
-+#include "version.h"
-+#ifdef GITVERSION
-+#undef IPT_NETFLOW_VERSION
-+#define IPT_NETFLOW_VERSION GITVERSION
-+#endif
-
- MODULE_LICENSE("GPL");
- MODULE_AUTHOR("<abc@telekom.ru>");
- MODULE_DESCRIPTION("iptables NETFLOW target module");
- MODULE_VERSION(IPT_NETFLOW_VERSION);
-+MODULE_ALIAS("ip6t_NETFLOW");
-
- #define DST_SIZE 256
- static char destination_buf[DST_SIZE] = "127.0.0.1:2055";
- static char *destination = destination_buf;
--module_param(destination, charp, 0400);
-+module_param(destination, charp, 0444);
- MODULE_PARM_DESC(destination, "export destination ipaddress:port");
-
- static int inactive_timeout = 15;
--module_param(inactive_timeout, int, 0600);
-+module_param(inactive_timeout, int, 0644);
- MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds");
-
- static int active_timeout = 30 * 60;
--module_param(active_timeout, int, 0600);
-+module_param(active_timeout, int, 0644);
- MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds");
-
- static int debug = 0;
--module_param(debug, int, 0600);
-+module_param(debug, int, 0644);
- MODULE_PARM_DESC(debug, "debug verbosity level");
-
- static int sndbuf;
--module_param(sndbuf, int, 0400);
-+module_param(sndbuf, int, 0444);
- MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size");
-
-+static int protocol = 5;
-+module_param(protocol, int, 0444);
-+MODULE_PARM_DESC(protocol, "netflow protocol version (5, 9, 10)");
-+
-+static unsigned int refresh_rate = 20;
-+module_param(refresh_rate, uint, 0644);
-+MODULE_PARM_DESC(refresh_rate, "NetFlow v9/IPFIX refresh rate (packets)");
-+
-+static unsigned int timeout_rate = 30;
-+module_param(timeout_rate, uint, 0644);
-+MODULE_PARM_DESC(timeout_rate, "NetFlow v9/IPFIX timeout rate (minutes)");
-+
-+#ifdef CONFIG_NF_NAT_NEEDED
-+static int natevents = 0;
-+module_param(natevents, int, 0444);
-+MODULE_PARM_DESC(natevents, "send NAT Events");
-+#endif
-+
- static int hashsize;
--module_param(hashsize, int, 0400);
-+module_param(hashsize, int, 0444);
- MODULE_PARM_DESC(hashsize, "hash table size");
-
- static int maxflows = 2000000;
--module_param(maxflows, int, 0600);
-+module_param(maxflows, int, 0644);
- MODULE_PARM_DESC(maxflows, "maximum number of flows");
- static int peakflows = 0;
- static unsigned long peakflows_at;
-@@ -121,22 +154,52 @@ MODULE_PARM_DESC(aggregation, "aggregation ruleset");
-
- static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat);
- static LIST_HEAD(usock_list);
--static DEFINE_RWLOCK(sock_lock);
-+static DEFINE_MUTEX(sock_lock);
-
-+#define LOCK_COUNT (1<<8)
-+#define LOCK_COUNT_MASK (LOCK_COUNT-1)
-+static spinlock_t htable_locks[LOCK_COUNT] = {
-+ [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(htable_locks)
-+};
-+static DEFINE_RWLOCK(htable_rwlock); /* global lock to protect htable_locks change */
- static unsigned int ipt_netflow_hash_rnd;
--struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
-+static struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
- static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */
- static LIST_HEAD(ipt_netflow_list); /* all flows */
-+static DEFINE_SPINLOCK(hlist_lock); /* should almost always be locked w/o _bh */
- static LIST_HEAD(aggr_n_list);
- static LIST_HEAD(aggr_p_list);
- static DEFINE_RWLOCK(aggr_lock);
-+#ifdef CONFIG_NF_NAT_NEEDED
-+static LIST_HEAD(nat_list); /* nat events */
-+static DEFINE_SPINLOCK(nat_lock);
-+static unsigned long nat_events_start = 0;
-+static unsigned long nat_events_stop = 0;
-+#endif
- static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */
- static atomic_t ipt_netflow_count = ATOMIC_INIT(0);
--static DEFINE_SPINLOCK(ipt_netflow_lock); /* hash table lock */
-
--static long long pdu_packets = 0, pdu_traf = 0;
--static struct netflow5_pdu pdu;
--static unsigned long pdu_ts_mod;
-+static long long pdu_packets = 0, pdu_traf = 0; /* how much accounted traffic in pdu */
-+static unsigned int pdu_count = 0;
-+static unsigned int pdu_seq = 0;
-+static unsigned int pdu_data_records = 0;
-+static unsigned int pdu_tpl_records = 0;
-+static unsigned long pdu_ts_mod; /* ts of last flow */
-+static union {
-+ struct netflow5_pdu v5;
-+ struct netflow9_pdu v9;
-+ struct ipfix_pdu ipfix;
-+} pdu;
-+static int engine_id = 0; /* Observation Domain */
-+static __u8 *pdu_data_used;
-+static __u8 *pdu_high_wm; /* high watermark */
-+static unsigned int pdu_max_size; /* sizeof pdu */
-+static struct flowset_data *pdu_flowset = NULL; /* current data flowset */
-+
-+static void (*netflow_export_flow)(struct ipt_netflow *nf);
-+static void (*netflow_export_pdu)(void); /* called on timeout */
-+static void netflow_switch_version(int ver);
-+
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
- static void netflow_work_fn(void *work);
- static DECLARE_WORK(netflow_work, netflow_work_fn, NULL);
-@@ -146,19 +209,26 @@ static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn);
- #endif
- static struct timer_list rate_timer;
-
-+#define TCP_SYN_ACK 0x12
- #define TCP_FIN_RST 0x05
-
- static long long sec_prate = 0, sec_brate = 0;
- static long long min_prate = 0, min_brate = 0;
- static long long min5_prate = 0, min5_brate = 0;
--static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */
-+static unsigned int metric = 100, min15_metric = 100, min5_metric = 100, min_metric = 100; /* hash metrics */
-
- static int set_hashsize(int new_size);
- static void destination_removeall(void);
- static int add_destinations(char *ptr);
- static void aggregation_remove(struct list_head *list);
- static int add_aggregation(char *ptr);
--static void netflow_scan_and_export(int flush);
-+static int netflow_scan_and_export(int flush);
-+enum {
-+ DONT_FLUSH, AND_FLUSH
-+};
-+static int template_ids = FLOWSET_DATA_FIRST;
-+static int tpl_count = 0; /* how much active templates */
-+
-
- static inline __be32 bits2mask(int bits) {
- return (bits? 0xffffffff << (32 - bits) : 0);
-@@ -175,28 +245,46 @@ static inline int mask2bits(__be32 mask) {
- /* under that lock worker is always stopped and not rescheduled,
- * and we can call worker sub-functions manually */
- static DEFINE_MUTEX(worker_lock);
--static inline void __start_scan_worker(void)
-+#define MIN_DELAY 1
-+#define MAX_DELAY (HZ / 10)
-+static int worker_delay = HZ / 10;
-+static inline void _schedule_scan_worker(const int status)
- {
-- schedule_delayed_work(&netflow_work, HZ / 10);
-+ /* rudimentary congestion avoidance */
-+ if (status > 0)
-+ worker_delay -= status;
-+ else if (status < 0)
-+ worker_delay /= 2;
-+ else
-+ worker_delay++;
-+ if (worker_delay < MIN_DELAY)
-+ worker_delay = MIN_DELAY;
-+ else if (worker_delay > MAX_DELAY)
-+ worker_delay = MAX_DELAY;
-+ schedule_delayed_work(&netflow_work, worker_delay);
- }
-
--static inline void start_scan_worker(void)
-+/* This is only called soon after pause_scan_worker. */
-+static inline void cont_scan_worker(void)
- {
-- __start_scan_worker();
-+ _schedule_scan_worker(0);
- mutex_unlock(&worker_lock);
- }
-
--/* we always stop scanner before write_lock(&sock_lock)
-- * to let it never hold that spin lock */
--static inline void __stop_scan_worker(void)
-+static inline void _unschedule_scan_worker(void)
- {
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
-+ cancel_rearming_delayed_work(&netflow_work);
-+#else
- cancel_delayed_work_sync(&netflow_work);
-+#endif
- }
-
--static inline void stop_scan_worker(void)
-+/* This is only used for quick pause (in procctl). */
-+static inline void pause_scan_worker(void)
- {
- mutex_lock(&worker_lock);
-- __stop_scan_worker();
-+ _unschedule_scan_worker();
- }
-
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
-@@ -223,11 +311,14 @@ static int nf_seq_show(struct seq_file *seq, void *v)
- int snum = 0;
- int peak = (jiffies - peakflows_at) / HZ;
-
-- seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n",
-+ seq_printf(seq, "ipt_NETFLOW version " IPT_NETFLOW_VERSION ", srcversion %s\n",
-+ THIS_MODULE->srcversion);
-+ seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK, worker delay %d/%d.\n",
- nr_flows,
- peakflows,
- peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60,
-- (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10));
-+ (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10),
-+ worker_delay, HZ);
-
- for_each_present_cpu(cpu) {
- struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
-@@ -252,93 +343,123 @@ static int nf_seq_show(struct seq_file *seq, void *v)
- }
-
- #define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec
-- seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n",
-- ipt_netflow_hash_size,
-- (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
-- FFLOAT(metric, 10),
-- FFLOAT(min_metric, 10),
-- FFLOAT(min5_metric, 10),
-- FFLOAT(min15_metric, 10),
-- pkt_total - pkt_out + pdu_packets,
-- (traf_total - traf_out + pdu_traf) >> 10,
-- pdu_packets,
-- pdu_traf);
--
-- seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n",
-- active_timeout,
-- inactive_timeout,
-- maxflows);
--
-- seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
-- sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);
--
-- seq_printf(seq, "cpu# stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");
--
-- seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
-- (unsigned long long)searched,
-- (unsigned long long)found,
-- (unsigned long long)notfound,
-- truncated, frags, alloc_err, maxflows_err,
-- send_success, send_failed, sock_errors,
-- (unsigned long long)exported_size >> 10,
-- (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
-- (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);
-+ seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%02d [%d.%02d, %d.%02d, %d.%02d]."
-+ " MemTraf: %llu pkt, %llu K (pdu %llu, %llu), Out %llu pkt, %llu K.\n",
-+ ipt_netflow_hash_size,
-+ (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
-+ FFLOAT(metric, 100),
-+ FFLOAT(min_metric, 100),
-+ FFLOAT(min5_metric, 100),
-+ FFLOAT(min15_metric, 100),
-+ pkt_total - pkt_out + pdu_packets,
-+ (traf_total - traf_out + pdu_traf) >> 10,
-+ pdu_packets,
-+ pdu_traf,
-+ pkt_out,
-+ traf_out >> 10);
-+
-+ seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec;"
-+ " Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
-+ sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);
-+
-+ seq_printf(seq, "cpu# stat: <search found new [metric], trunc frag alloc maxflows>,"
-+ " sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");
-+
-+#define SAFEDIV(x,y) ((y)? ({ u64 __tmp = x; do_div(__tmp, y); (int)__tmp; }) : 0)
-+ seq_printf(seq, "Total stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
-+ " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
-+ searched,
-+ (unsigned long long)found,
-+ (unsigned long long)notfound,
-+ FFLOAT(SAFEDIV(100LL * (searched + found + notfound), (found + notfound)), 100),
-+ truncated, frags, alloc_err, maxflows_err,
-+ send_success, send_failed, sock_errors,
-+ (unsigned long long)exported_size >> 10,
-+ (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
-+ (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);
-
- if (num_present_cpus() > 1) {
- for_each_present_cpu(cpu) {
- struct ipt_netflow_stat *st;
-
- st = &per_cpu(ipt_netflow_stat, cpu);
-- seq_printf(seq, "cpu%u stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
-- cpu,
-- (unsigned long long)st->searched,
-- (unsigned long long)st->found,
-- (unsigned long long)st->notfound,
-- st->truncated, st->frags, st->alloc_err, st->maxflows_err,
-- st->send_success, st->send_failed, st->sock_errors,
-- (unsigned long long)st->exported_size >> 10,
-- (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
-- (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
-+ seq_printf(seq, "cpu%u stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
-+ " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
-+ cpu,
-+ (unsigned long long)st->searched,
-+ (unsigned long long)st->found,
-+ (unsigned long long)st->notfound,
-+ FFLOAT(SAFEDIV(100LL * (st->searched + st->found + st->notfound), (st->found + st->notfound)), 100),
-+ st->truncated, st->frags, st->alloc_err, st->maxflows_err,
-+ st->send_success, st->send_failed, st->sock_errors,
-+ (unsigned long long)st->exported_size >> 10,
-+ (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
-+ (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
- }
- }
-
-- read_lock(&sock_lock);
-+ seq_printf(seq, "Protocol version %d", protocol);
-+ if (protocol == 10)
-+ seq_printf(seq, " (ipfix)");
-+ else
-+ seq_printf(seq, " (netflow)");
-+ if (protocol >= 9)
-+ seq_printf(seq, ", refresh-rate %u, timeout-rate %u, (templates %d, active %d)",
-+ refresh_rate, timeout_rate, template_ids - FLOWSET_DATA_FIRST, tpl_count);
-+
-+ seq_printf(seq, ". Timeouts: active %d, inactive %d. Maxflows %u\n",
-+ active_timeout,
-+ inactive_timeout,
-+ maxflows);
-+
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ seq_printf(seq, "Natevents %s, count start %lu, stop %lu.\n", natevents? "enabled" : "disabled",
-+ nat_events_start, nat_events_stop);
-+#endif
-+
-+ mutex_lock(&sock_lock);
- list_for_each_entry(usock, &usock_list, list) {
-- struct sock *sk = usock->sock->sk;
--
-- seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n",
-- snum,
-- usock->ipaddr >> 24,
-- (usock->ipaddr >> 16) & 255,
-- (usock->ipaddr >> 8) & 255,
-- usock->ipaddr & 255,
-- usock->port,
-- sk->sk_sndbuf,
-- atomic_read(&sk->sk_wmem_alloc),
-- atomic_read(&usock->wmem_peak),
-- atomic_read(&usock->err_full),
-- atomic_read(&usock->err_other));
-+ seq_printf(seq, "sock%d: %u.%u.%u.%u:%u",
-+ snum,
-+ HIPQUAD(usock->ipaddr),
-+ usock->port);
-+ if (usock->sock) {
-+ struct sock *sk = usock->sock->sk;
-+
-+ seq_printf(seq, ", sndbuf %u, filled %u, peak %u;"
-+ " err: sndbuf reached %u, connect %u, other %u\n",
-+ sk->sk_sndbuf,
-+ atomic_read(&sk->sk_wmem_alloc),
-+ atomic_read(&usock->wmem_peak),
-+ atomic_read(&usock->err_full),
-+ atomic_read(&usock->err_connect),
-+ atomic_read(&usock->err_other));
-+ } else
-+ seq_printf(seq, " unconnected (%u attempts).\n",
-+ atomic_read(&usock->err_connect));
- snum++;
- }
-- read_unlock(&sock_lock);
-+ mutex_unlock(&sock_lock);
-
- read_lock_bh(&aggr_lock);
- snum = 0;
- list_for_each_entry(aggr_n, &aggr_n_list, list) {
-- seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n",
-- snum,
-- HIPQUAD(aggr_n->addr),
-- mask2bits(aggr_n->mask),
-- mask2bits(aggr_n->aggr_mask));
-+ seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d (usage %u)\n",
-+ snum,
-+ HIPQUAD(aggr_n->addr),
-+ mask2bits(aggr_n->mask),
-+ mask2bits(aggr_n->aggr_mask),
-+ atomic_read(&aggr_n->usage));
- snum++;
- }
- snum = 0;
- list_for_each_entry(aggr_p, &aggr_p_list, list) {
-- seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n",
-- snum,
-- aggr_p->port1,
-- aggr_p->port2,
-- aggr_p->aggr_port);
-+ seq_printf(seq, "aggr#%d port: ports %u-%u replace %u (usage %u)\n",
-+ snum,
-+ aggr_p->port1,
-+ aggr_p->port2,
-+ aggr_p->aggr_port,
-+ atomic_read(&aggr_p->usage));
- snum++;
- }
- read_unlock_bh(&aggr_lock);
-@@ -367,8 +488,13 @@ static struct file_operations nf_seq_fops = {
- #define BEFORE2632(x,y)
- #endif
-
-+/* PAX need to know that we are allowed to write */
-+#ifndef CONSTIFY_PLUGIN
-+#define ctl_table_no_const ctl_table
-+#endif
-+
- /* sysctl /proc/sys/net/netflow */
--static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
-+static int hsize_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
- void __user *buffer, size_t *lenp, loff_t *fpos)
- {
- void *orig = ctl->data;
-@@ -386,20 +512,21 @@ static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp
- return ret;
- }
-
--static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
-+static int sndbuf_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
- void __user *buffer, size_t *lenp, loff_t *fpos)
- {
- int ret;
- struct ipt_netflow_sock *usock;
--
-- read_lock(&sock_lock);
-+
-+ mutex_lock(&sock_lock);
- if (list_empty(&usock_list)) {
-- read_unlock(&sock_lock);
-+ mutex_unlock(&sock_lock);
- return -ENOENT;
- }
- usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list);
-- sndbuf = usock->sock->sk->sk_sndbuf;
-- read_unlock(&sock_lock);
-+ if (usock->sock)
-+ sndbuf = usock->sock->sk->sk_sndbuf;
-+ mutex_unlock(&sock_lock);
-
- ctl->data = &sndbuf;
- ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
-@@ -407,13 +534,14 @@ static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *fil
- return ret;
- if (sndbuf < SOCK_MIN_SNDBUF)
- sndbuf = SOCK_MIN_SNDBUF;
-- stop_scan_worker();
-- write_lock(&sock_lock);
-+ pause_scan_worker();
-+ mutex_lock(&sock_lock);
- list_for_each_entry(usock, &usock_list, list) {
-- usock->sock->sk->sk_sndbuf = sndbuf;
-+ if (usock->sock)
-+ usock->sock->sk->sk_sndbuf = sndbuf;
- }
-- write_unlock(&sock_lock);
-- start_scan_worker();
-+ mutex_unlock(&sock_lock);
-+ cont_scan_worker();
- return ret;
- }
-
-@@ -424,10 +552,10 @@ static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file
-
- ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
- if (ret >= 0 && write) {
-- stop_scan_worker();
-+ pause_scan_worker();
- destination_removeall();
- add_destinations(destination_buf);
-- start_scan_worker();
-+ cont_scan_worker();
- }
- return ret;
- }
-@@ -446,13 +574,12 @@ static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file
- return ret;
- }
-
--static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
-+static int flush_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
- void __user *buffer, size_t *lenp, loff_t *fpos)
- {
- int ret;
-- int val;
-+ int val = 0;
-
-- val = 0;
- ctl->data = &val;
- ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
-
-@@ -461,14 +588,67 @@ static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp
-
- if (val > 0) {
- printk(KERN_INFO "ipt_NETFLOW: forced flush\n");
-- stop_scan_worker();
-- netflow_scan_and_export(1);
-- start_scan_worker();
-+ pause_scan_worker();
-+ netflow_scan_and_export(AND_FLUSH);
-+ cont_scan_worker();
-+ }
-+
-+ return ret;
-+}
-+
-+static int protocol_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
-+ void __user *buffer, size_t *lenp, loff_t *fpos)
-+{
-+ int ret;
-+ int ver = protocol;
-+
-+ ctl->data = &ver;
-+ ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
-+
-+ if (!write)
-+ return ret;
-+
-+ switch (ver) {
-+ case 5:
-+ case 9:
-+ case 10:
-+ printk(KERN_INFO "ipt_NETFLOW: forced flush (protocol version change)\n");
-+ pause_scan_worker();
-+ netflow_scan_and_export(AND_FLUSH);
-+ netflow_switch_version(ver);
-+ cont_scan_worker();
-+ break;
-+ default:
-+ return -EPERM;
- }
-
- return ret;
- }
-
-+#ifdef CONFIG_NF_NAT_NEEDED
-+static void register_ct_events(void);
-+static void unregister_ct_events(void);
-+static int natevents_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
-+ void __user *buffer, size_t *lenp, loff_t *fpos)
-+{
-+ int ret;
-+ int val = natevents;
-+
-+ ctl->data = &val;
-+ ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
-+
-+ if (!write)
-+ return ret;
-+
-+ if (natevents && !val)
-+ unregister_ct_events();
-+ else if (!natevents && val)
-+ register_ct_events();
-+
-+ return ret;
-+}
-+#endif
-+
- static struct ctl_table_header *netflow_sysctl_header;
-
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-@@ -547,6 +727,38 @@ static struct ctl_table netflow_sysctl_table[] = {
- .maxlen = sizeof(int),
- .proc_handler = &flush_procctl,
- },
-+ {
-+ _CTL_NAME(10)
-+ .procname = "protocol",
-+ .mode = 0644,
-+ .maxlen = sizeof(int),
-+ .proc_handler = &protocol_procctl,
-+ },
-+ {
-+ _CTL_NAME(11)
-+ .procname = "refresh-rate",
-+ .mode = 0644,
-+ .data = &refresh_rate,
-+ .maxlen = sizeof(int),
-+ .proc_handler = &proc_dointvec,
-+ },
-+ {
-+ _CTL_NAME(12)
-+ .procname = "timeout-rate",
-+ .mode = 0644,
-+ .data = &timeout_rate,
-+ .maxlen = sizeof(int),
-+ .proc_handler = &proc_dointvec,
-+ },
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ {
-+ _CTL_NAME(13)
-+ .procname = "natevents",
-+ .mode = 0644,
-+ .maxlen = sizeof(int),
-+ .proc_handler = &natevents_procctl,
-+ },
-+#endif
- { }
- };
-
-@@ -588,18 +800,69 @@ static struct ctl_path netflow_sysctl_path[] = {
- static void sk_error_report(struct sock *sk)
- {
- /* clear connection refused errors if any */
-- write_lock_bh(&sk->sk_callback_lock);
- if (debug > 1)
-- printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err);
-+ printk(KERN_INFO "ipt_NETFLOW: socket error <%d>\n", sk->sk_err);
- sk->sk_err = 0;
- NETFLOW_STAT_INC(sock_errors);
-- write_unlock_bh(&sk->sk_callback_lock);
- return;
- }
-
-+static struct socket *_usock_alloc(const __be32 ipaddr, const unsigned short port)
-+{
-+ struct sockaddr_in sin;
-+ struct socket *sock;
-+ int error;
-+
-+ if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
-+ printk(KERN_ERR "ipt_NETFLOW: sock_create_kern error %d\n", -error);
-+ return NULL;
-+ }
-+ sock->sk->sk_allocation = GFP_ATOMIC;
-+ sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
-+ sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
-+ if (sndbuf)
-+ sock->sk->sk_sndbuf = sndbuf;
-+ else
-+ sndbuf = sock->sk->sk_sndbuf;
-+ memset(&sin, 0, sizeof(sin));
-+ sin.sin_family = AF_INET;
-+ sin.sin_addr.s_addr = htonl(ipaddr);
-+ sin.sin_port = htons(port);
-+ if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
-+ sizeof(sin), 0)) < 0) {
-+ printk(KERN_ERR "ipt_NETFLOW: error connecting UDP socket %d,"
-+ " don't worry, will try reconnect later.\n", -error);
-+ /* ENETUNREACH when no interfaces */
-+ sock_release(sock);
-+ return NULL;
-+ }
-+ return sock;
-+}
-+
-+static void usock_connect(struct ipt_netflow_sock *usock, const int sendmsg)
-+{
-+ usock->sock = _usock_alloc(usock->ipaddr, usock->port);
-+ if (usock->sock) {
-+ if (sendmsg || debug)
-+ printk(KERN_INFO "ipt_NETFLOW: connected %u.%u.%u.%u:%u\n",
-+ HIPQUAD(usock->ipaddr),
-+ usock->port);
-+ } else {
-+ atomic_inc(&usock->err_connect);
-+ if (debug)
-+ printk(KERN_INFO "ipt_NETFLOW: connect to %u.%u.%u.%u:%u failed%s.\n",
-+ HIPQUAD(usock->ipaddr),
-+ usock->port,
-+ (sendmsg)? " (pdu lost)" : "");
-+ }
-+ atomic_set(&usock->wmem_peak, 0);
-+ atomic_set(&usock->err_full, 0);
-+ atomic_set(&usock->err_other, 0);
-+}
-+
- // return numbers of sends succeded, 0 if none
- /* only called in scan worker path */
--static int netflow_send_pdu(void *buffer, int len)
-+static void netflow_sendmsg(void *buffer, const int len)
- {
- struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL };
- struct kvec iov = { buffer, len };
-@@ -607,9 +870,16 @@ static int netflow_send_pdu(void *buffer, int len)
- int snum = 0;
- struct ipt_netflow_sock *usock;
-
-+ mutex_lock(&sock_lock);
- list_for_each_entry(usock, &usock_list, list) {
-+ if (!usock->sock)
-+ usock_connect(usock, 1);
-+ if (!usock->sock) {
-+ NETFLOW_STAT_INC_ATOMIC(send_failed);
-+ continue;
-+ }
- if (debug)
-- printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n",
-+ printk(KERN_INFO "netflow_sendmsg: sendmsg(%d, %d) [%u %u]\n",
- snum,
- len,
- atomic_read(&usock->sock->sk->sk_wmem_alloc),
-@@ -624,7 +894,7 @@ static int netflow_send_pdu(void *buffer, int len)
- suggestion = ": increase sndbuf!";
- } else
- atomic_inc(&usock->err_other);
-- printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n",
-+ printk(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: data loss %llu pkt, %llu bytes%s\n",
- snum, ret, pdu_packets, pdu_traf, suggestion);
- } else {
- unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc);
-@@ -636,98 +906,67 @@ static int netflow_send_pdu(void *buffer, int len)
- }
- snum++;
- }
-- return retok;
-+ mutex_unlock(&sock_lock);
-+ if (retok == 0) {
-+ /* not least one send succeded, account stat for dropped packets */
-+ NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
-+ NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
-+ }
- }
-
--static void usock_free(struct ipt_netflow_sock *usock)
-+static void usock_close_free(struct ipt_netflow_sock *usock)
- {
-- printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n",
-+ printk(KERN_INFO "ipt_NETFLOW: removed destination %u.%u.%u.%u:%u\n",
- HIPQUAD(usock->ipaddr),
-- usock->port,
-- usock->sock);
-+ usock->port);
- if (usock->sock)
- sock_release(usock->sock);
- usock->sock = NULL;
-- vfree(usock);
-+ vfree(usock);
- }
-
- static void destination_removeall(void)
- {
-- write_lock(&sock_lock);
-+ mutex_lock(&sock_lock);
- while (!list_empty(&usock_list)) {
- struct ipt_netflow_sock *usock;
-
- usock = list_entry(usock_list.next, struct ipt_netflow_sock, list);
- list_del(&usock->list);
-- write_unlock(&sock_lock);
-- usock_free(usock);
-- write_lock(&sock_lock);
-+ mutex_unlock(&sock_lock);
-+ usock_close_free(usock);
-+ mutex_lock(&sock_lock);
- }
-- write_unlock(&sock_lock);
-+ mutex_unlock(&sock_lock);
- }
-
- static void add_usock(struct ipt_netflow_sock *usock)
- {
- struct ipt_netflow_sock *sk;
-
-- /* don't need empty sockets */
-- if (!usock->sock) {
-- usock_free(usock);
-- return;
-- }
--
-- write_lock(&sock_lock);
-+ mutex_lock(&sock_lock);
- /* don't need duplicated sockets */
- list_for_each_entry(sk, &usock_list, list) {
- if (sk->ipaddr == usock->ipaddr &&
- sk->port == usock->port) {
-- write_unlock(&sock_lock);
-- usock_free(usock);
-+ mutex_unlock(&sock_lock);
-+ usock_close_free(usock);
- return;
- }
- }
- list_add_tail(&usock->list, &usock_list);
-- printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n",
-+ printk(KERN_INFO "ipt_NETFLOW: added destination %u.%u.%u.%u:%u%s\n",
- HIPQUAD(usock->ipaddr),
-- usock->port);
-- write_unlock(&sock_lock);
--}
--
--static struct socket *usock_alloc(__be32 ipaddr, unsigned short port)
--{
-- struct sockaddr_in sin;
-- struct socket *sock;
-- int error;
--
-- if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
-- printk(KERN_ERR "netflow: sock_create_kern error %d\n", error);
-- return NULL;
-- }
-- sock->sk->sk_allocation = GFP_ATOMIC;
-- sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
-- sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
-- if (sndbuf)
-- sock->sk->sk_sndbuf = sndbuf;
-- else
-- sndbuf = sock->sk->sk_sndbuf;
-- memset(&sin, 0, sizeof(sin));
-- sin.sin_family = AF_INET;
-- sin.sin_addr.s_addr = htonl(ipaddr);
-- sin.sin_port = htons(port);
-- if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
-- sizeof(sin), 0)) < 0) {
-- printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error);
-- sock_release(sock);
-- return NULL;
-- }
-- return sock;
-+ usock->port,
-+ (!usock->sock)? " (unconnected)" : "");
-+ mutex_unlock(&sock_lock);
- }
-
- #define SEPARATORS " ,;\t\n"
- static int add_destinations(char *ptr)
- {
- while (ptr) {
-- unsigned char ip[4];
-+ unsigned char ip[4];
- unsigned short port;
-
- ptr += strspn(ptr, SEPARATORS);
-@@ -737,17 +976,15 @@ static int add_destinations(char *ptr)
- struct ipt_netflow_sock *usock;
-
- if (!(usock = vmalloc(sizeof(*usock)))) {
-- printk(KERN_ERR "netflow: can't vmalloc socket\n");
-+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc socket\n");
- return -ENOMEM;
- }
-
- memset(usock, 0, sizeof(*usock));
-+ atomic_set(&usock->err_connect, 0);
- usock->ipaddr = ntohl(*(__be32 *)ip);
- usock->port = port;
-- usock->sock = usock_alloc(usock->ipaddr, port);
-- atomic_set(&usock->wmem_peak, 0);
-- atomic_set(&usock->err_full, 0);
-- atomic_set(&usock->err_other, 0);
-+ usock_connect(usock, 0);
- add_usock(usock);
- } else
- break;
-@@ -781,7 +1018,7 @@ static int add_aggregation(char *ptr)
- LIST_HEAD(old_aggr_list);
-
- while (ptr && *ptr) {
-- unsigned char ip[4];
-+ unsigned char ip[4];
- unsigned int mask;
- unsigned int port1, port2;
- unsigned int aggr_to;
-@@ -792,16 +1029,16 @@ static int add_aggregation(char *ptr)
- ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) {
-
- if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) {
-- printk(KERN_ERR "netflow: can't vmalloc aggr\n");
-+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
- return -ENOMEM;
- }
- memset(aggr_n, 0, sizeof(*aggr_n));
-
-- aggr_n->addr = ntohl(*(__be32 *)ip);
- aggr_n->mask = bits2mask(mask);
-+ aggr_n->addr = ntohl(*(__be32 *)ip) & aggr_n->mask;
- aggr_n->aggr_mask = bits2mask(aggr_to);
- aggr_n->prefix = mask;
-- printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n",
-+ printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u.%u.%u.%u/%u=%u]\n",
- HIPQUAD(aggr_n->addr), mask, aggr_to);
- list_add_tail(&aggr_n->list, &new_aggr_n_list);
-
-@@ -809,7 +1046,7 @@ static int add_aggregation(char *ptr)
- sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) {
-
- if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) {
-- printk(KERN_ERR "netflow: can't vmalloc aggr\n");
-+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
- return -ENOMEM;
- }
- memset(aggr_p, 0, sizeof(*aggr_p));
-@@ -817,11 +1054,11 @@ static int add_aggregation(char *ptr)
- aggr_p->port1 = port1;
- aggr_p->port2 = port2;
- aggr_p->aggr_port = aggr_to;
-- printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n",
-+ printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u-%u=%u]\n",
- port1, port2, aggr_to);
- list_add_tail(&aggr_p->list, &new_aggr_p_list);
- } else {
-- printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr);
-+ printk(KERN_ERR "ipt_NETFLOW: bad aggregation rule: %s (ignoring)\n", ptr);
- break;
- }
-
-@@ -846,17 +1083,23 @@ static int add_aggregation(char *ptr)
-
- static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple)
- {
-- /* tuple is rounded to u32s */
-- return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
-+ return murmur3(tuple, sizeof(struct ipt_netflow_tuple), ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
- }
-
- static struct ipt_netflow *
--ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
-+ipt_netflow_find(const struct ipt_netflow_tuple *tuple, const unsigned int hash)
- {
- struct ipt_netflow *nf;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
-+#define compat_hlist_for_each_entry hlist_for_each_entry
-+#define compat_hlist_for_each_entry_safe hlist_for_each_entry_safe
- struct hlist_node *pos;
-+#else /* since 3.9.0 */
-+#define compat_hlist_for_each_entry(a,pos,c,d) hlist_for_each_entry(a,c,d)
-+#define compat_hlist_for_each_entry_safe(a,pos,c,d,e) hlist_for_each_entry_safe(a,c,d,e)
-+#endif
-
-- hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
-+ compat_hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
- if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
- nf->nr_bytes < FLOW_FULL_WATERMARK) {
- NETFLOW_STAT_INC(found);
-@@ -868,7 +1111,7 @@ ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
- return NULL;
- }
-
--static struct hlist_head *alloc_hashtable(int size)
-+static struct hlist_head *alloc_hashtable(const int size)
- {
- struct hlist_head *hash;
-
-@@ -879,19 +1122,18 @@ static struct hlist_head *alloc_hashtable(int size)
- for (i = 0; i < size; i++)
- INIT_HLIST_HEAD(&hash[i]);
- } else
-- printk(KERN_ERR "netflow: unable to vmalloc hash table.\n");
-+ printk(KERN_ERR "ipt_NETFLOW: unable to vmalloc hash table.\n");
-
- return hash;
- }
-
--static int set_hashsize(int new_size)
-+static int set_hashsize(const int new_size)
- {
- struct hlist_head *new_hash, *old_hash;
-- unsigned int hash;
- struct ipt_netflow *nf;
- int rnd;
-
-- printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n",
-+ printk(KERN_INFO "ipt_NETFLOW: allocating new hash table %u -> %u buckets\n",
- ipt_netflow_hash_size, new_size);
- new_hash = alloc_hashtable(new_size);
- if (!new_hash)
-@@ -900,19 +1142,24 @@ static int set_hashsize(int new_size)
- get_random_bytes(&rnd, 4);
-
- /* rehash */
-- spin_lock_bh(&ipt_netflow_lock);
-+ write_lock_bh(&htable_rwlock);
- old_hash = ipt_netflow_hash;
- ipt_netflow_hash = new_hash;
- ipt_netflow_hash_size = new_size;
- ipt_netflow_hash_rnd = rnd;
- /* hash_netflow() is dependent on ipt_netflow_hash_* values */
-+ spin_lock(&hlist_lock);
- list_for_each_entry(nf, &ipt_netflow_list, list) {
-+ unsigned int hash;
-+
- hash = hash_netflow(&nf->tuple);
- /* hlist_add_head overwrites hlist pointers for this node
- * so it's good */
- hlist_add_head(&nf->hlist, &new_hash[hash]);
-+ nf->lock = &htable_locks[hash & LOCK_COUNT_MASK];
- }
-- spin_unlock_bh(&ipt_netflow_lock);
-+ spin_unlock(&hlist_lock);
-+ write_unlock_bh(&htable_rwlock);
-
- vfree(old_hash);
-
-@@ -920,14 +1167,14 @@ static int set_hashsize(int new_size)
- }
-
- static struct ipt_netflow *
--ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
-+ipt_netflow_alloc(const struct ipt_netflow_tuple *tuple)
- {
- struct ipt_netflow *nf;
- long count;
-
- nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
- if (!nf) {
-- printk(KERN_ERR "Can't allocate netflow.\n");
-+ printk(KERN_ERR "ipt_NETFLOW: Can't allocate flow.\n");
- return NULL;
- }
-
-@@ -945,13 +1192,15 @@ ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
-
- static void ipt_netflow_free(struct ipt_netflow *nf)
- {
-+ if (IS_DUMMY_FLOW(nf))
-+ return;
- atomic_dec(&ipt_netflow_count);
- kmem_cache_free(ipt_netflow_cachep, nf);
- }
-
- static struct ipt_netflow *
--init_netflow(struct ipt_netflow_tuple *tuple,
-- struct sk_buff *skb, unsigned int hash)
-+init_netflow(const struct ipt_netflow_tuple *tuple,
-+ const struct sk_buff *skb, const unsigned int hash)
- {
- struct ipt_netflow *nf;
-
-@@ -959,93 +1208,774 @@ init_netflow(struct ipt_netflow_tuple *tuple,
- if (!nf)
- return NULL;
-
-+ nf->lock = &htable_locks[hash & LOCK_COUNT_MASK];
- hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]);
-+ spin_lock(&hlist_lock);
- list_add(&nf->list, &ipt_netflow_list);
-+ spin_unlock(&hlist_lock);
-
- return nf;
- }
-
- /* cook pdu, send, and clean */
- /* only called in scan worker path */
--static void netflow_export_pdu(void)
-+static void netflow_export_pdu_v5(void)
- {
- struct timeval tv;
- int pdusize;
-
-- if (!pdu.nr_records)
-+ if (!pdu_data_records)
- return;
-
- if (debug > 1)
-- printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records);
-- do_gettimeofday(&tv);
--
-- pdu.version = htons(5);
-- pdu.ts_uptime = htonl(jiffies_to_msecs(jiffies));
-- pdu.ts_usecs = htonl(tv.tv_sec);
-- pdu.ts_unsecs = htonl(tv.tv_usec);
-- //pdu.eng_type = 0;
-- //pdu.eng_id = 0;
-- //pdu.padding = 0;
-+ printk(KERN_INFO "netflow_export_pdu_v5 with %d records\n", pdu_data_records);
-
-- pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records;
--
-- /* especially fix nr_records before export */
-- pdu.nr_records = htons(pdu.nr_records);
-+ pdu.v5.version = htons(5);
-+ pdu.v5.nr_records = htons(pdu_data_records);
-+ pdu.v5.ts_uptime = htonl(jiffies_to_msecs(jiffies));
-+ do_gettimeofday(&tv);
-+ pdu.v5.ts_usecs = htonl(tv.tv_sec);
-+ pdu.v5.ts_unsecs = htonl(tv.tv_usec);
-+ pdu.v5.seq = htonl(pdu_seq);
-+ //pdu.v5.eng_type = 0;
-+ pdu.v5.eng_id = engine_id;
-+ //pdu.v5.padding = 0;
-
-- if (netflow_send_pdu(&pdu, pdusize) == 0) {
-- /* not least one send succeded, account stat for dropped packets */
-- NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
-- NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
-- }
-+ pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu_data_records;
-
-- pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records));
-+ netflow_sendmsg(&pdu.v5, pdusize);
-
-- pdu.nr_records = 0;
- pdu_packets = 0;
-- pdu_traf = 0;
-+ pdu_traf = 0;
-+
-+ pdu_seq += pdu_data_records;
-+ pdu_count++;
-+ pdu_data_records = 0;
- }
-
- /* only called in scan worker path */
--static void netflow_export_flow(struct ipt_netflow *nf)
-+static void netflow_export_flow_v5(struct ipt_netflow *nf)
- {
- struct netflow5_record *rec;
-
-- if (debug > 2)
-- printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records);
-+ if (unlikely(debug > 2))
-+ printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records);
-
- pdu_packets += nf->nr_packets;
- pdu_traf += nf->nr_bytes;
- pdu_ts_mod = jiffies;
-- rec = &pdu.flow[pdu.nr_records++];
-+ rec = &pdu.v5.flow[pdu_data_records++];
-
- /* make V5 flow record */
-- rec->s_addr = nf->tuple.s_addr;
-- rec->d_addr = nf->tuple.d_addr;
-- //rec->nexthop = 0;
-+ rec->s_addr = nf->tuple.src.ip;
-+ rec->d_addr = nf->tuple.dst.ip;
-+ rec->nexthop = nf->nh.ip;
- rec->i_ifc = htons(nf->tuple.i_ifc);
- rec->o_ifc = htons(nf->o_ifc);
- rec->nr_packets = htonl(nf->nr_packets);
- rec->nr_octets = htonl(nf->nr_bytes);
-- rec->ts_first = htonl(jiffies_to_msecs(nf->ts_first));
-- rec->ts_last = htonl(jiffies_to_msecs(nf->ts_last));
-+ rec->first_ms = htonl(jiffies_to_msecs(nf->ts_first));
-+ rec->last_ms = htonl(jiffies_to_msecs(nf->ts_last));
- rec->s_port = nf->tuple.s_port;
- rec->d_port = nf->tuple.d_port;
-- //rec->reserved = 0;
-+ //rec->reserved = 0; /* pdu is always zeroized for v5 in netflow_switch_version */
- rec->tcp_flags = nf->tcp_flags;
- rec->protocol = nf->tuple.protocol;
- rec->tos = nf->tuple.tos;
-- //rec->s_as = 0;
-- //rec->d_as = 0;
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ rec->s_as = nf->s_as;
-+ rec->d_as = nf->d_as;
-+#endif
- rec->s_mask = nf->s_mask;
- rec->d_mask = nf->d_mask;
- //rec->padding = 0;
- ipt_netflow_free(nf);
-
-- if (pdu.nr_records == NETFLOW5_RECORDS_MAX)
-+ if (pdu_data_records == NETFLOW5_RECORDS_MAX)
-+ netflow_export_pdu_v5();
-+}
-+
-+/* pdu is initially blank, export current pdu, and prepare next for filling. */
-+static void netflow_export_pdu_v9(void)
-+{
-+ struct timeval tv;
-+ int pdusize;
-+
-+ if (pdu_data_used <= pdu.v9.data)
-+ return;
-+
-+ if (debug > 1)
-+ printk(KERN_INFO "netflow_export_pdu_v9 with %d records\n",
-+ pdu_data_records + pdu_tpl_records);
-+
-+ pdu.v9.version = htons(9);
-+ pdu.v9.nr_records = htons(pdu_data_records + pdu_tpl_records);
-+ pdu.v9.sys_uptime_ms = htonl(jiffies_to_msecs(jiffies));
-+ do_gettimeofday(&tv);
-+ pdu.v9.export_time_s = htonl(tv.tv_sec);
-+ pdu.v9.seq = htonl(pdu_seq);
-+ pdu.v9.source_id = engine_id;
-+
-+ pdusize = pdu_data_used - (unsigned char *)&pdu.v9;
-+
-+ netflow_sendmsg(&pdu.v9, pdusize);
-+
-+ pdu_packets = 0;
-+ pdu_traf = 0;
-+
-+ pdu_seq++;
-+ pdu_count++;
-+ pdu_data_records = pdu_tpl_records = 0;
-+ pdu_data_used = pdu.v9.data;
-+ pdu_flowset = NULL;
-+}
-+
-+static void netflow_export_pdu_ipfix(void)
-+{
-+ struct timeval tv;
-+ int pdusize;
-+
-+ if (pdu_data_used <= pdu.ipfix.data)
-+ return;
-+
-+ if (debug > 1)
-+ printk(KERN_INFO "netflow_export_pduX with %d records\n",
-+ pdu_data_records);
-+
-+ pdu.ipfix.version = htons(10);
-+ do_gettimeofday(&tv);
-+ pdu.ipfix.export_time_s = htonl(tv.tv_sec);
-+ pdu.ipfix.seq = htonl(pdu_seq);
-+ pdu.ipfix.odomain_id = engine_id;
-+ pdusize = pdu_data_used - (unsigned char *)&pdu;
-+ pdu.ipfix.length = htons(pdusize);
-+
-+ netflow_sendmsg(&pdu.ipfix, pdusize);
-+
-+ pdu_packets = 0;
-+ pdu_traf = 0;
-+
-+ pdu_seq += pdu_data_records;
-+ pdu_count++;
-+ pdu_data_records = pdu_tpl_records = 0;
-+ pdu_data_used = pdu.ipfix.data;
-+ pdu_flowset = NULL;
-+}
-+
-+static inline int pdu_have_space(const size_t size)
-+{
-+ return ((pdu_data_used + size) <= pdu_high_wm);
-+}
-+
-+static inline unsigned char *pdu_grab_space(const size_t size)
-+{
-+ unsigned char *ptr = pdu_data_used;
-+ pdu_data_used += size;
-+ return ptr;
-+}
-+
-+// allocate data space in pdu, or fail if pdu is reallocated.
-+static inline unsigned char *pdu_alloc_fail(const size_t size)
-+{
-+ if (!pdu_have_space(size)) {
- netflow_export_pdu();
-+ return NULL;
-+ }
-+ return pdu_grab_space(size);
-+}
-+
-+/* doesn't fail, but can provide empty pdu. */
-+static unsigned char *pdu_alloc(const size_t size)
-+{
-+ return pdu_alloc_fail(size) ?: pdu_grab_space(size);
-+}
-+
-+/* global table of sizes of template field types */
-+static u_int8_t tpl_element_sizes[] = {
-+ [IN_BYTES] = 4,
-+ [IN_PKTS] = 4,
-+ [PROTOCOL] = 1,
-+ [TOS] = 1,
-+ [TCP_FLAGS] = 1,
-+ [L4_SRC_PORT] = 2,
-+ [IPV4_SRC_ADDR] = 4,
-+ [SRC_MASK] = 1,
-+ [INPUT_SNMP] = 2,
-+ [L4_DST_PORT] = 2,
-+ [IPV4_DST_ADDR] = 4,
-+ [DST_MASK] = 1,
-+ [OUTPUT_SNMP] = 2,
-+ [IPV4_NEXT_HOP] = 4,
-+ //[SRC_AS] = 2,
-+ //[DST_AS] = 2,
-+ //[BGP_IPV4_NEXT_HOP] = 4,
-+ //[MUL_DST_PKTS] = 4,
-+ //[MUL_DST_BYTES] = 4,
-+ [LAST_SWITCHED] = 4,
-+ [FIRST_SWITCHED]= 4,
-+ [IPV6_SRC_ADDR] = 16,
-+ [IPV6_DST_ADDR] = 16,
-+ [IPV6_FLOW_LABEL] = 3,
-+ [ICMP_TYPE] = 2,
-+ [MUL_IGMP_TYPE] = 1,
-+ //[TOTAL_BYTES_EXP] = 4,
-+ //[TOTAL_PKTS_EXP] = 4,
-+ //[TOTAL_FLOWS_EXP] = 4,
-+ [IPV6_NEXT_HOP] = 16,
-+ [IPV6_OPTION_HEADERS] = 2,
-+ [commonPropertiesId] = 4,
-+ [ipv4Options] = 4,
-+ [tcpOptions] = 4,
-+ [postNATSourceIPv4Address] = 4,
-+ [postNATDestinationIPv4Address] = 4,
-+ [postNAPTSourceTransportPort] = 2,
-+ [postNAPTDestinationTransportPort] = 2,
-+ [natEvent] = 1,
-+ [postNATSourceIPv6Address] = 16,
-+ [postNATDestinationIPv6Address] = 16,
-+ [IPSecSPI] = 4,
-+ [observationTimeMilliseconds] = 8,
-+ [observationTimeMicroseconds] = 8,
-+ [observationTimeNanoseconds] = 8,
-+};
-+
-+#define TEMPLATES_HASH_BSIZE 8
-+#define TEMPLATES_HASH_SIZE (1<<TEMPLATES_HASH_BSIZE)
-+static struct hlist_head templates_hash[TEMPLATES_HASH_SIZE];
-+
-+struct base_template {
-+ int length; /* number of elements in template */
-+ u_int16_t types[]; /* {type, size} pairs */
-+};
-+
-+/* base templates */
-+#define BTPL_BASE 0x00000001 /* base stat */
-+#define BTPL_IP4 0x00000002 /* IPv4 */
-+#define BTPL_MASK4 0x00000004 /* Aggregated */
-+#define BTPL_PORTS 0x00000008 /* UDP&TCP */
-+#define BTPL_IP6 0x00000010 /* IPv6 */
-+#define BTPL_ICMP 0x00000020 /* ICMP */
-+#define BTPL_IGMP 0x00000040 /* IGMP */
-+#define BTPL_IPSEC 0x00000080 /* AH&ESP */
-+#define BTPL_NAT4 0x00000100 /* NAT IPv4 */
-+#define BTPL_MARK 0x00000400 /* connmark */
-+#define BTPL_LABEL6 0x00000800 /* IPv6 flow label */
-+#define BTPL_OPTIONS4 0x00001000 /* IPv4 Options */
-+#define BTPL_OPTIONS6 0x00002000 /* IPv6 Options */
-+#define BTPL_TCPOPTIONS 0x00004000 /* TCP Options */
-+#define BTPL_MAX 32
-+
-+static struct base_template template_base = {
-+ .types = {
-+ INPUT_SNMP,
-+ OUTPUT_SNMP,
-+ IN_PKTS,
-+ IN_BYTES,
-+ FIRST_SWITCHED,
-+ LAST_SWITCHED,
-+ PROTOCOL,
-+ TOS,
-+ 0
-+ }
-+};
-+static struct base_template template_ipv4 = {
-+ .types = {
-+ IPV4_SRC_ADDR,
-+ IPV4_DST_ADDR,
-+ IPV4_NEXT_HOP,
-+ 0
-+ }
-+};
-+static struct base_template template_options4 = {
-+ .types = { ipv4Options, 0 }
-+};
-+static struct base_template template_tcpoptions = {
-+ .types = { tcpOptions, 0 }
-+};
-+static struct base_template template_ipv6 = {
-+ .types = {
-+ IPV6_SRC_ADDR,
-+ IPV6_DST_ADDR,
-+ IPV6_NEXT_HOP,
-+ 0
-+ }
-+};
-+static struct base_template template_options6 = {
-+ .types = { IPV6_OPTION_HEADERS, 0 }
-+};
-+static struct base_template template_label6 = {
-+ .types = { IPV6_FLOW_LABEL, 0 }
-+};
-+static struct base_template template_ipv4_mask = {
-+ .types = {
-+ SRC_MASK,
-+ DST_MASK,
-+ 0
-+ }
-+};
-+static struct base_template template_ports = {
-+ .types = {
-+ L4_SRC_PORT,
-+ L4_DST_PORT,
-+ TCP_FLAGS,
-+ 0
-+ }
-+};
-+static struct base_template template_icmp = {
-+ .types = { ICMP_TYPE, 0 }
-+};
-+static struct base_template template_igmp = {
-+ .types = { MUL_IGMP_TYPE, 0 }
-+};
-+static struct base_template template_ipsec = {
-+ .types = { IPSecSPI, 0 }
-+};
-+static struct base_template template_nat4 = {
-+ .types = {
-+ observationTimeMilliseconds,
-+ IPV4_SRC_ADDR,
-+ IPV4_DST_ADDR,
-+ postNATSourceIPv4Address,
-+ postNATDestinationIPv4Address,
-+ L4_SRC_PORT,
-+ L4_DST_PORT,
-+ postNAPTSourceTransportPort,
-+ postNAPTDestinationTransportPort,
-+ PROTOCOL,
-+ natEvent,
-+ 0
-+ }
-+};
-+static struct base_template template_mark = {
-+ .types = { commonPropertiesId, 0 }
-+};
-+
-+struct data_template {
-+ struct hlist_node hlist;
-+ int tpl_mask;
-+
-+ int length; /* number of elements in template */
-+ int tpl_size; /* summary size of template with flowset header */
-+ int rec_size; /* summary size of all recods of template (w/o flowset header) */
-+ int template_id_n; /* assigned from template_ids, network order. */
-+ int exported_cnt;
-+ unsigned long exported_ts; /* jiffies */
-+ u_int16_t fields[]; /* {type, size} pairs */
-+} __attribute__ ((packed));
-+
-+#define TPL_FIELD_NSIZE 4 /* one complete template field's network size */
-+
-+static void free_templates(void)
-+{
-+ int i;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
-+ struct hlist_node *pos;
-+#endif
-+ struct hlist_node *tmp;
-+
-+ for (i = 0; i < TEMPLATES_HASH_SIZE; i++) {
-+ struct hlist_head *thead = &templates_hash[i];
-+ struct data_template *tpl;
-+
-+ compat_hlist_for_each_entry_safe(tpl, pos, tmp, thead, hlist)
-+ kfree(tpl);
-+ INIT_HLIST_HEAD(thead);
-+ }
-+ tpl_count = 0;
-+}
-+
-+/* create combined template from mask */
-+static struct data_template *get_template(const int tmask)
-+{
-+ struct base_template *tlist[BTPL_MAX];
-+ struct data_template *tpl;
-+ int tnum;
-+ int length;
-+ int i, j, k;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
-+ struct hlist_node *pos;
-+#endif
-+ int hash = hash_long(tmask, TEMPLATES_HASH_BSIZE);
-+
-+ compat_hlist_for_each_entry(tpl, pos, &templates_hash[hash], hlist)
-+ if (tpl->tpl_mask == tmask)
-+ return tpl;
-+
-+ tnum = 0;
-+ if (tmask & BTPL_IP4) {
-+ tlist[tnum++] = &template_ipv4;
-+ if (tmask & BTPL_OPTIONS4)
-+ tlist[tnum++] = &template_options4;
-+ if (tmask & BTPL_MASK4)
-+ tlist[tnum++] = &template_ipv4_mask;
-+ } else if (tmask & BTPL_IP6) {
-+ tlist[tnum++] = &template_ipv6;
-+ if (tmask & BTPL_LABEL6)
-+ tlist[tnum++] = &template_label6;
-+ if (tmask & BTPL_OPTIONS6)
-+ tlist[tnum++] = &template_options6;
-+ } else if (tmask & BTPL_NAT4)
-+ tlist[tnum++] = &template_nat4;
-+ if (tmask & BTPL_PORTS)
-+ tlist[tnum++] = &template_ports;
-+ if (tmask & BTPL_BASE)
-+ tlist[tnum++] = &template_base;
-+ if (tmask & BTPL_TCPOPTIONS)
-+ tlist[tnum++] = &template_tcpoptions;
-+ if (tmask & BTPL_ICMP)
-+ tlist[tnum++] = &template_icmp;
-+ if (tmask & BTPL_IGMP)
-+ tlist[tnum++] = &template_igmp;
-+ if (tmask & BTPL_IPSEC)
-+ tlist[tnum++] = &template_ipsec;
-+ if (tmask & BTPL_MARK)
-+ tlist[tnum++] = &template_mark;
-+
-+ /* calc memory size */
-+ length = 0;
-+ for (i = 0; i < tnum; i++) {
-+ if (!tlist[i]->length) {
-+ for (k = 0; tlist[i]->types[k]; k++);
-+ tlist[i]->length = k;
-+ }
-+ length += tlist[i]->length;
-+ }
-+ /* elements are pairs + one termiantor */
-+ tpl = kmalloc(sizeof(struct data_template) + (length * 2 + 1) * sizeof(u_int16_t), GFP_KERNEL);
-+ if (!tpl) {
-+ printk(KERN_ERR "ipt_NETFLOW: unable to kmalloc template.\n");
-+ return NULL;
-+ }
-+ tpl->tpl_mask = tmask;
-+ tpl->length = length;
-+ tpl->tpl_size = sizeof(struct flowset_template);
-+ tpl->rec_size = 0;
-+ tpl->template_id_n = htons(template_ids++);
-+ tpl->exported_cnt = 0;
-+ tpl->exported_ts = 0;
-+
-+ j = 0;
-+ for (i = 0; i < tnum; i++) {
-+ struct base_template *btpl = tlist[i];
-+
-+ for (k = 0; k < btpl->length; k++) {
-+ int size;
-+ int type = btpl->types[k];
-+
-+ tpl->fields[j++] = type;
-+ size = tpl_element_sizes[type];
-+ tpl->fields[j++] = size;
-+ tpl->rec_size += size;
-+ }
-+ tpl->tpl_size += btpl->length * TPL_FIELD_NSIZE;
-+ }
-+ tpl->fields[j++] = 0;
-+
-+ hlist_add_head(&tpl->hlist, &templates_hash[hash]);
-+ tpl_count++;
-+
-+ return tpl;
-+}
-+
-+static void pdu_add_template(struct data_template *tpl)
-+{
-+ int i;
-+ unsigned char *ptr;
-+ struct flowset_template *ntpl;
-+ __be16 *sptr;
-+
-+ ptr = pdu_alloc(tpl->tpl_size);
-+ ntpl = (struct flowset_template *)ptr;
-+ ntpl->flowset_id = protocol == 9? htons(FLOWSET_TEMPLATE) : htons(IPFIX_TEMPLATE);
-+ ntpl->length = htons(tpl->tpl_size);
-+ ntpl->template_id = tpl->template_id_n;
-+ ntpl->field_count = htons(tpl->length);
-+ ptr += sizeof(struct flowset_template);
-+ sptr = (__be16 *)ptr;
-+ for (i = 0; ; ) {
-+ int type = tpl->fields[i++];
-+ if (!type)
-+ break;
-+ *sptr++ = htons(type);
-+ *sptr++ = htons(tpl->fields[i++]);
-+ }
-+
-+ tpl->exported_cnt = pdu_count;
-+ tpl->exported_ts = jiffies;
-+
-+ pdu_flowset = NULL;
-+ pdu_tpl_records++;
-+}
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
-+static inline s64 portable_ktime_to_ms(const ktime_t kt)
-+{
-+ struct timeval tv = ktime_to_timeval(kt);
-+ return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
-+}
-+#define ktime_to_ms portable_ktime_to_ms
-+#endif
-+
-+/* encode one field */
-+typedef struct in6_addr in6_t;
-+static inline void add_ipv4_field(__u8 *ptr, const int type, const struct ipt_netflow *nf)
-+{
-+ switch (type) {
-+ case IN_BYTES: *(__be32 *)ptr = htonl(nf->nr_bytes); break;
-+ case IN_PKTS: *(__be32 *)ptr = htonl(nf->nr_packets); break;
-+ case FIRST_SWITCHED: *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_first)); break;
-+ case LAST_SWITCHED: *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_last)); break;
-+ case IPV4_SRC_ADDR: *(__be32 *)ptr = nf->tuple.src.ip; break;
-+ case IPV4_DST_ADDR: *(__be32 *)ptr = nf->tuple.dst.ip; break;
-+ case IPV4_NEXT_HOP: *(__be32 *)ptr = nf->nh.ip; break;
-+ case L4_SRC_PORT: *(__be16 *)ptr = nf->tuple.s_port; break;
-+ case L4_DST_PORT: *(__be16 *)ptr = nf->tuple.d_port; break;
-+ case INPUT_SNMP: *(__be16 *)ptr = htons(nf->tuple.i_ifc); break;
-+ case OUTPUT_SNMP: *(__be16 *)ptr = htons(nf->o_ifc); break;
-+ case PROTOCOL: *ptr = nf->tuple.protocol; break;
-+ case TCP_FLAGS: *ptr = nf->tcp_flags; break;
-+ case TOS: *ptr = nf->tuple.tos; break;
-+ case IPV6_SRC_ADDR: *(in6_t *)ptr = nf->tuple.src.in6; break;
-+ case IPV6_DST_ADDR: *(in6_t *)ptr = nf->tuple.dst.in6; break;
-+ case IPV6_NEXT_HOP: *(in6_t *)ptr = nf->nh.in6; break;
-+ case IPV6_FLOW_LABEL: *ptr++ = nf->flow_label >> 16;
-+ *(__be16 *)ptr = nf->flow_label;
-+ break;
-+ case tcpOptions: *(__be32 *)ptr = htonl(nf->tcpoptions); break;
-+ case ipv4Options: *(__be32 *)ptr = htonl(nf->options); break;
-+ case IPV6_OPTION_HEADERS: *(__be16 *)ptr = htons(nf->options); break;
-+#ifdef CONFIG_NF_CONNTRACK_MARK
-+ case commonPropertiesId:
-+ *(__be32 *)ptr = htonl(nf->mark); break;
-+#endif
-+ case SRC_MASK: *ptr = nf->s_mask; break;
-+ case DST_MASK: *ptr = nf->d_mask; break;
-+ case ICMP_TYPE: *(__be16 *)ptr = nf->tuple.d_port; break;
-+ case MUL_IGMP_TYPE: *ptr = nf->tuple.d_port; break;
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ case postNATSourceIPv4Address: *(__be32 *)ptr = nf->nat->post.s_addr; break;
-+ case postNATDestinationIPv4Address: *(__be32 *)ptr = nf->nat->post.d_addr; break;
-+ case postNAPTSourceTransportPort: *(__be16 *)ptr = nf->nat->post.s_port; break;
-+ case postNAPTDestinationTransportPort: *(__be16 *)ptr = nf->nat->post.d_port; break;
-+ case natEvent: *ptr = nf->nat->nat_event; break;
-+#endif
-+ case IPSecSPI: *(__u32 *)ptr = (nf->tuple.s_port << 16) | nf->tuple.d_port; break;
-+ case observationTimeMilliseconds:
-+ *(__be64 *)ptr = cpu_to_be64(ktime_to_ms(nf->ts_obs)); break;
-+ case observationTimeMicroseconds:
-+ *(__be64 *)ptr = cpu_to_be64(ktime_to_us(nf->ts_obs)); break;
-+ case observationTimeNanoseconds:
-+ *(__be64 *)ptr = cpu_to_be64(ktime_to_ns(nf->ts_obs)); break;
-+ default:
-+ memset(ptr, 0, tpl_element_sizes[type]);
-+ }
-+}
-+
-+#define PAD_SIZE 4 /* rfc prescribes flowsets to be padded */
-+
-+/* cache timeout_rate in jiffies */
-+static inline unsigned long timeout_rate_j(void)
-+{
-+ static unsigned int t_rate = 0;
-+ static unsigned long t_rate_j = 0;
-+
-+ if (unlikely(timeout_rate != t_rate)) {
-+ struct timeval tv = { .tv_sec = timeout_rate * 60, .tv_usec = 0 };
-+
-+ t_rate = timeout_rate;
-+ t_rate_j = timeval_to_jiffies(&tv);
-+ }
-+ return t_rate_j;
-+}
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-+#define IPPROTO_UDPLITE 136
-+#endif
-+
-+#ifndef time_is_before_jiffies
-+#define time_is_before_jiffies(a) time_after(jiffies, a)
-+#endif
-+
-+static void netflow_export_flow_tpl(struct ipt_netflow *nf)
-+{
-+ unsigned char *ptr;
-+ int i;
-+ struct data_template *tpl;
-+ int tpl_mask = BTPL_BASE;
-+
-+ if (unlikely(debug > 2))
-+ printk(KERN_INFO "adding flow to export (%d)\n",
-+ pdu_data_records + pdu_tpl_records);
-+
-+ if (likely(nf->tuple.l3proto == AF_INET)) {
-+ tpl_mask |= BTPL_IP4;
-+ if (unlikely(nf->options))
-+ tpl_mask |= BTPL_OPTIONS4;
-+ } else {
-+ tpl_mask |= BTPL_IP6;
-+ if (unlikely(nf->options))
-+ tpl_mask |= BTPL_OPTIONS6;
-+ if (unlikely(nf->flow_label))
-+ tpl_mask |= BTPL_LABEL6;
-+ }
-+ if (unlikely(nf->tcpoptions))
-+ tpl_mask |= BTPL_TCPOPTIONS;
-+ if (unlikely(nf->s_mask || nf->d_mask))
-+ tpl_mask |= BTPL_MASK4;
-+ if (likely(nf->tuple.protocol == IPPROTO_TCP ||
-+ nf->tuple.protocol == IPPROTO_UDP ||
-+ nf->tuple.protocol == IPPROTO_SCTP ||
-+ nf->tuple.protocol == IPPROTO_UDPLITE))
-+ tpl_mask |= BTPL_PORTS;
-+ else if (nf->tuple.protocol == IPPROTO_ICMP)
-+ tpl_mask |= BTPL_ICMP;
-+ else if (nf->tuple.protocol == IPPROTO_IGMP)
-+ tpl_mask |= BTPL_IGMP;
-+#ifdef CONFIG_NF_CONNTRACK_MARK
-+ if (nf->mark)
-+ tpl_mask |= BTPL_MARK;
-+#endif
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ if (nf->nat)
-+ tpl_mask = BTPL_NAT4;
-+#endif
-+
-+ tpl = get_template(tpl_mask);
-+ if (unlikely(!tpl)) {
-+ printk(KERN_INFO "ipt_NETFLOW: template allocation failed.\n");
-+ NETFLOW_STAT_INC(alloc_err);
-+ NETFLOW_STAT_ADD_ATOMIC(pkt_drop, nf->nr_packets);
-+ NETFLOW_STAT_ADD_ATOMIC(traf_drop, nf->nr_bytes);
-+ ipt_netflow_free(nf);
-+ return;
-+ }
-+
-+ if (unlikely(!pdu_flowset ||
-+ pdu_flowset->flowset_id != tpl->template_id_n ||
-+ !(ptr = pdu_alloc_fail(tpl->rec_size)))) {
-+
-+ /* if there was previous data template we should pad it to 4 bytes */
-+ if (pdu_flowset) {
-+ int padding = (PAD_SIZE - ntohs(pdu_flowset->length) % PAD_SIZE) % PAD_SIZE;
-+ if (padding && (ptr = pdu_alloc_fail(padding))) {
-+ pdu_flowset->length = htons(ntohs(pdu_flowset->length) + padding);
-+ for (; padding; padding--)
-+ *ptr++ = 0;
-+ }
-+ }
-+
-+ if (!tpl->exported_ts ||
-+ pdu_count > (tpl->exported_cnt + refresh_rate) ||
-+ time_is_before_jiffies(tpl->exported_ts + timeout_rate_j())) {
-+ pdu_add_template(tpl);
-+ }
-+
-+ ptr = pdu_alloc(sizeof(struct flowset_data) + tpl->rec_size);
-+ pdu_flowset = (struct flowset_data *)ptr;
-+ pdu_flowset->flowset_id = tpl->template_id_n;
-+ pdu_flowset->length = htons(sizeof(struct flowset_data));
-+ ptr += sizeof(struct flowset_data);
-+ }
-+
-+ /* encode all fields */
-+ for (i = 0; ; ) {
-+ int type = tpl->fields[i++];
-+
-+ if (!type)
-+ break;
-+ add_ipv4_field(ptr, type, nf);
-+ ptr += tpl->fields[i++];
-+ }
-+
-+ pdu_data_records++;
-+ pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size);
-+
-+ pdu_packets += nf->nr_packets;
-+ pdu_traf += nf->nr_bytes;
-+
-+ ipt_netflow_free(nf);
-+ pdu_ts_mod = jiffies;
-+}
-+
-+static void netflow_switch_version(const int ver)
-+{
-+ protocol = ver;
-+ if (protocol == 5) {
-+ memset(&pdu, 0, sizeof(pdu));
-+ netflow_export_flow = &netflow_export_flow_v5;
-+ netflow_export_pdu = &netflow_export_pdu_v5;
-+ } else if (protocol == 9) {
-+ pdu_data_used = pdu.v9.data;
-+ pdu_max_size = sizeof(pdu.v9);
-+ pdu_high_wm = (unsigned char *)&pdu + pdu_max_size;
-+ netflow_export_flow = &netflow_export_flow_tpl;
-+ netflow_export_pdu = &netflow_export_pdu_v9;
-+ } else { /* IPFIX */
-+ pdu_data_used = pdu.ipfix.data;
-+ pdu_max_size = sizeof(pdu.ipfix);
-+ pdu_high_wm = (unsigned char *)&pdu + pdu_max_size;
-+ netflow_export_flow = &netflow_export_flow_tpl;
-+ netflow_export_pdu = &netflow_export_pdu_ipfix;
-+ }
-+ if (protocol != 5)
-+ free_templates();
-+ pdu_data_records = pdu_tpl_records = 0;
-+ pdu_flowset = NULL;
-+ printk(KERN_INFO "ipt_NETFLOW protocol version %d (%s) enabled.\n",
-+ protocol, protocol == 10? "IPFIX" : "NetFlow");
-+}
-+
-+#ifdef CONFIG_NF_NAT_NEEDED
-+static void export_nat_event(struct nat_event *nel)
-+{
-+ static struct ipt_netflow nf = { { NULL } };
-+
-+ nf.tuple.l3proto = AF_INET;
-+ nf.tuple.protocol = nel->protocol;
-+ nf.nat = nel; /* this is also flag of dummy flow */
-+ nf.tcp_flags = (nel->nat_event == NAT_DESTROY)? TCP_FIN_RST : TCP_SYN_ACK;
-+ if (protocol >= 9) {
-+ nf.ts_obs = nel->ts_ktime;
-+ nf.tuple.src.ip = nel->pre.s_addr;
-+ nf.tuple.dst.ip = nel->pre.d_addr;
-+ nf.tuple.s_port = nel->pre.s_port;
-+ nf.tuple.d_port = nel->pre.d_port;
-+ netflow_export_flow(&nf);
-+ } else { /* v5 */
-+ /* The weird v5 packet(s).
-+ * src and dst will be same as in data flow from the FORWARD chain
-+ * where src is pre-nat src ip and dst is post-nat dst ip.
-+ * What we lacking here is external src ip for SNAT, or
-+ * pre-nat dst ip for DNAT. We will put this into Nexthop field
-+ * with port into src/dst AS field. tcp_flags will distinguish it's
-+ * start or stop event. Two flows in case of full nat. */
-+ nf.tuple.src.ip = nel->pre.s_addr;
-+ nf.tuple.s_port = nel->pre.s_port;
-+ nf.tuple.dst.ip = nel->post.d_addr;
-+ nf.tuple.d_port = nel->post.d_port;
-+
-+ nf.ts_first = nel->ts_jiffies;
-+ nf.ts_last = nel->ts_jiffies;
-+ if (nel->pre.s_addr != nel->post.s_addr ||
-+ nel->pre.s_port != nel->post.s_port) {
-+ nf.nh.ip = nel->post.s_addr;
-+ nf.s_as = nel->post.s_port;
-+ nf.d_as = 0;
-+ netflow_export_flow(&nf);
-+ }
-+ if (nel->pre.d_addr != nel->post.d_addr ||
-+ nel->pre.d_port != nel->post.d_port) {
-+ nf.nh.ip = nel->pre.d_addr;
-+ nf.s_as = 0;
-+ nf.d_as = nel->pre.d_port;
-+ netflow_export_flow(&nf);
-+ }
-+ }
-+ kfree(nel);
- }
-+#endif /* CONFIG_NF_NAT_NEEDED */
-
--static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
-+static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout)
- {
- /* active too long, finishing, or having too much bytes */
- return ((jiffies - nf->ts_first) > a_timeout) ||
-@@ -1057,42 +1987,77 @@ static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
-
- /* could be called with zero to flush cache and pdu */
- /* this function is guaranteed to be called non-concurrently */
--static void netflow_scan_and_export(int flush)
-+/* return -1 is trylockfailed, 0 if nothin gexported, >=1 if exported something */
-+static int netflow_scan_and_export(const int flush)
- {
- long i_timeout = inactive_timeout * HZ;
- long a_timeout = active_timeout * HZ;
-+ int trylock_failed = 0;
-+ int pdu_c = pdu_count;
-
- if (flush)
- i_timeout = 0;
-
-- spin_lock_bh(&ipt_netflow_lock);
-- while (!list_empty(&ipt_netflow_list)) {
-+ local_bh_disable();
-+ spin_lock(&hlist_lock);
-+ /* This is different order of locking than elsewhere,
-+ * so we trylock&break to avoid deadlock. */
-+
-+ while (likely(!list_empty(&ipt_netflow_list))) {
- struct ipt_netflow *nf;
--
-+
-+ /* Last entry, which is usually oldest. */
- nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list);
-+ if (!spin_trylock(nf->lock)) {
-+ trylock_failed = 1;
-+ break;
-+ }
- /* Note: i_timeout checked with >= to allow specifying zero timeout
- * to purge all flows on module unload */
- if (((jiffies - nf->ts_last) >= i_timeout) ||
- active_needs_export(nf, a_timeout)) {
- hlist_del(&nf->hlist);
-+ spin_unlock(nf->lock);
-+
- list_del(&nf->list);
-+ spin_unlock(&hlist_lock);
-+ local_bh_enable();
-+
- NETFLOW_STAT_ADD(pkt_out, nf->nr_packets);
- NETFLOW_STAT_ADD(traf_out, nf->nr_bytes);
-- spin_unlock_bh(&ipt_netflow_lock);
- netflow_export_flow(nf);
-- spin_lock_bh(&ipt_netflow_lock);
-+
-+ local_bh_disable();
-+ spin_lock(&hlist_lock);
- } else {
-+ spin_unlock(nf->lock);
- /* all flows which need to be exported is always at the tail
- * so if no more exportable flows we can break */
- break;
- }
- }
-- spin_unlock_bh(&ipt_netflow_lock);
--
-+ spin_unlock(&hlist_lock);
-+ local_bh_enable();
-+
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ spin_lock_bh(&nat_lock);
-+ while (!list_empty(&nat_list)) {
-+ struct nat_event *nel;
-+
-+ nel = list_entry(nat_list.next, struct nat_event, list);
-+ list_del(&nel->list);
-+ spin_unlock_bh(&nat_lock);
-+ export_nat_event(nel);
-+ spin_lock_bh(&nat_lock);
-+ }
-+ spin_unlock_bh(&nat_lock);
-+#endif
- /* flush flows stored in pdu if there no new flows for too long */
- /* Note: using >= to allow flow purge on zero timeout */
- if ((jiffies - pdu_ts_mod) >= i_timeout)
- netflow_export_pdu();
-+
-+ return trylock_failed? -1 : pdu_count - pdu_c;
- }
-
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
-@@ -1101,8 +2066,10 @@ static void netflow_work_fn(void *dummy)
- static void netflow_work_fn(struct work_struct *dummy)
- #endif
- {
-- netflow_scan_and_export(0);
-- __start_scan_worker();
-+ int status;
-+
-+ status = netflow_scan_and_export(DONT_FLUSH);
-+ _schedule_scan_worker(status);
- }
-
- #define RATESHIFT 2
-@@ -1154,7 +2121,7 @@ static void rate_timer_calc(unsigned long dummy)
- old_found = found;
- old_notfound = notfound;
- /* if there is no access to hash keep rate steady */
-- metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
-+ metric = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
- CALC_RATE(min15_metric, (unsigned long long)metric, 15);
- CALC_RATE(min5_metric, (unsigned long long)metric, 5);
- CALC_RATE(min_metric, (unsigned long long)metric, 1);
-@@ -1162,6 +2129,262 @@ static void rate_timer_calc(unsigned long dummy)
- mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
- }
-
-+#ifdef CONFIG_NF_NAT_NEEDED
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+static struct nf_ct_event_notifier *saved_event_cb __read_mostly = NULL;
-+static int netflow_conntrack_event(const unsigned int events, struct nf_ct_event *item)
-+#else
-+static int netflow_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr)
-+#endif
-+{
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+ struct nf_conn *ct = item->ct;
-+#else
-+ struct nf_conn *ct = (struct nf_conn *)ptr;
-+#endif
-+ struct nat_event *nel;
-+ const struct nf_conntrack_tuple *t;
-+ int ret = NOTIFY_DONE;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+ struct nf_ct_event_notifier *notifier;
-+
-+ /* Call netlink first. */
-+ notifier = rcu_dereference(saved_event_cb);
-+ if (likely(notifier))
-+ ret = notifier->fcn(events, item);
-+#endif
-+ if (unlikely(!natevents))
-+ return ret;
-+
-+ if (!(events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED) | (1 << IPCT_DESTROY))))
-+ return ret;
-+
-+ if (!(ct->status & IPS_NAT_MASK))
-+ return ret;
-+
-+ if (unlikely(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET ||
-+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num != AF_INET)) {
-+ /* Well, there is no linux NAT for IPv6 anyway. */
-+ return ret;
-+ }
-+
-+ if (!(nel = kmalloc(sizeof(struct nat_event), GFP_ATOMIC))) {
-+ printk(KERN_ERR "ipt_NETFLOW: can't kmalloc nat event\n");
-+ return ret;
-+ }
-+ memset(nel, 0, sizeof(struct nat_event));
-+ nel->ts_ktime = ktime_get_real();
-+ nel->ts_jiffies = jiffies;
-+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-+ nel->protocol = t->dst.protonum;
-+ nel->pre.s_addr = t->src.u3.ip;
-+ nel->pre.d_addr = t->dst.u3.ip;
-+ nel->pre.s_port = t->src.u.all;
-+ nel->pre.d_port = t->dst.u.all;
-+ t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-+ /* reply is reversed */
-+ nel->post.s_addr = t->dst.u3.ip;
-+ nel->post.d_addr = t->src.u3.ip;
-+ nel->post.s_port = t->dst.u.all;
-+ nel->post.d_port = t->src.u.all;
-+ if (events & (1 << IPCT_DESTROY)) {
-+ nel->nat_event = NAT_DESTROY;
-+ nat_events_stop++;
-+ } else {
-+ nel->nat_event = NAT_CREATE;
-+ nat_events_start++;
-+ }
-+
-+ spin_lock_bh(&nat_lock);
-+ list_add_tail(&nel->list, &nat_list);
-+ spin_unlock_bh(&nat_lock);
-+
-+ return ret;
-+}
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
-+static struct notifier_block ctnl_notifier = {
-+ .notifier_call = netflow_conntrack_event
-+};
-+#else
-+static struct nf_ct_event_notifier ctnl_notifier = {
-+ .fcn = netflow_conntrack_event
-+};
-+#endif /* since 2.6.31 */
-+#endif /* CONFIG_NF_NAT_NEEDED */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) && \
-+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
-+static bool
-+#else
-+static int
-+#endif
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-+netflow_target_check(const char *tablename, const void *entry, const struct xt_target *target,
-+ void *targinfo,
-+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
-+ unsigned int targinfosize,
-+#endif
-+ unsigned int hook_mask)
-+{
-+#else
-+netflow_target_check(const struct xt_tgchk_param *par)
-+{
-+ const char *tablename = par->table;
-+ const struct xt_target *target = par->target;
-+#endif
-+ if (strcmp("nat", tablename) == 0) {
-+ /* In the nat table we only see single packet per flow, which is useless. */
-+ printk(KERN_ERR "%s target: is not valid in %s table\n", target->name, tablename);
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
-+#define CHECK_FAIL 0
-+#define CHECK_OK 1
-+#else
-+#define CHECK_FAIL -EINVAL
-+#define CHECK_OK 0
-+#endif
-+ return CHECK_FAIL;
-+ }
-+ if (target->family == AF_INET6 && protocol == 5) {
-+ printk(KERN_ERR "ip6tables NETFLOW target is meaningful for protocol 9 or 10 only.\n");
-+ return CHECK_FAIL;
-+ }
-+ return CHECK_OK;
-+}
-+
-+#define SetXBit(x) (0x8000 >> (x)) /* Proper bit for htons later. */
-+#ifndef IPPROTO_MH
-+#define IPPROTO_MH 135
-+#endif
-+static inline __u16 observed_hdrs(const __u8 currenthdr)
-+{
-+ switch (currenthdr) {
-+ case IPPROTO_TCP:
-+ case IPPROTO_UDP:
-+ /* For speed, in case switch is not optimized. */
-+ return 0;
-+ case IPPROTO_DSTOPTS: return SetXBit(0);
-+ case IPPROTO_HOPOPTS: return SetXBit(1);
-+ case IPPROTO_ROUTING: return SetXBit(5);
-+ case IPPROTO_MH: return SetXBit(12);
-+ case IPPROTO_ESP: return SetXBit(13);
-+ case IPPROTO_AH: return SetXBit(14);
-+ case IPPROTO_COMP: return SetXBit(15);
-+ case IPPROTO_FRAGMENT: /* Handled elsewhere. */
-+ /* Next is known headers. */
-+ case IPPROTO_ICMPV6:
-+ case IPPROTO_UDPLITE:
-+ case IPPROTO_IPIP:
-+ case IPPROTO_PIM:
-+ case IPPROTO_GRE:
-+ case IPPROTO_SCTP:
-+#ifdef IPPROTO_L2TP
-+ case IPPROTO_L2TP:
-+#endif
-+ case IPPROTO_DCCP:
-+ return 0;
-+ }
-+ return SetXBit(3); /* Unknown header. */
-+}
-+
-+/* http://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml */
-+static const __u8 ip4_opt_table[] = {
-+ [7] = 0, /* RR */ /* parsed manually becasue of 0 */
-+ [134] = 1, /* CIPSO */
-+ [133] = 2, /* E-SEC */
-+ [68] = 3, /* TS */
-+ [131] = 4, /* LSR */
-+ [130] = 5, /* SEC */
-+ [1] = 6, /* NOP */
-+ [0] = 7, /* EOOL */
-+ [15] = 8, /* ENCODE */
-+ [142] = 9, /* VISA */
-+ [205] = 10, /* FINN */
-+ [12] = 11, /* MTUR */
-+ [11] = 12, /* MTUP */
-+ [10] = 13, /* ZSU */
-+ [137] = 14, /* SSR */
-+ [136] = 15, /* SID */
-+ [151] = 16, /* DPS */
-+ [150] = 17, /* NSAPA */
-+ [149] = 18, /* SDB */
-+ [147] = 19, /* ADDEXT */
-+ [148] = 20, /* RTRALT */
-+ [82] = 21, /* TR */
-+ [145] = 22, /* EIP */
-+ [144] = 23, /* IMITD */
-+ [30] = 25, /* EXP */
-+ [94] = 25, /* EXP */
-+ [158] = 25, /* EXP */
-+ [222] = 25, /* EXP */
-+ [25] = 30, /* QS */
-+ [152] = 31, /* UMP */
-+};
-+/* Parse IPv4 Options array int ipv4Options IPFIX value. */
-+static inline __u32 ip4_options(const u_int8_t *p, const unsigned int optsize)
-+{
-+ __u32 ret = 0;
-+ unsigned int i;
-+
-+ for (i = 0; likely(i < optsize); ) {
-+ u_int8_t op = p[i++];
-+
-+ if (op == 7) /* RR: bit 0 */
-+ ret |= 1;
-+ else if (likely(op < ARRAY_SIZE(ip4_opt_table))) {
-+ /* Btw, IANA doc is messed up in a crazy way:
-+ * http://www.ietf.org/mail-archive/web/ipfix/current/msg06008.html (2011)
-+ * I decided to follow IANA _text_ description from
-+ * http://www.iana.org/assignments/ipfix/ipfix.xhtml (2013-09-18)
-+ *
-+ * Set proper bit for htonl later. */
-+ if (ip4_opt_table[op])
-+ ret |= 1 << (32 - ip4_opt_table[op]);
-+ }
-+ if (likely(i >= optsize || op == 0))
-+ break;
-+ else if (unlikely(op == 1))
-+ continue;
-+ else if (unlikely(p[i] < 2))
-+ break;
-+ else
-+ i += p[i] - 1;
-+ }
-+ return ret;
-+}
-+
-+#define TCPHDR_MAXSIZE (4 * 15)
-+/* List of options: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml */
-+static inline __u32 tcp_options(const struct sk_buff *skb, const unsigned int ptr, const struct tcphdr *th)
-+{
-+ const unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr);
-+ __u8 _opt[TCPHDR_MAXSIZE];
-+ const u_int8_t *p;
-+ __u32 ret;
-+ unsigned int i;
-+
-+ p = skb_header_pointer(skb, ptr + sizeof(struct tcphdr), optsize, _opt);
-+ if (unlikely(!p))
-+ return 0;
-+ ret = 0;
-+ for (i = 0; likely(i < optsize); ) {
-+ u_int8_t opt = p[i++];
-+
-+ if (likely(opt < 32)) {
-+ /* IANA doc is messed up, see above. */
-+ ret |= 1 << (32 - opt);
-+ }
-+ if (likely(i >= optsize || opt == 0))
-+ break;
-+ else if (unlikely(opt == 1))
-+ continue;
-+ else if (unlikely(p[i] < 2)) /* "silly options" */
-+ break;
-+ else
-+ i += p[i] - 1;
-+ }
-+ return ret;
-+}
- /* packet receiver */
- static unsigned int netflow_target(
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
-@@ -1192,27 +2415,38 @@ static unsigned int netflow_target(
- )
- {
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
-- struct sk_buff *skb = *pskb;
-+ const struct sk_buff *skb = *pskb;
-+#endif
-+ union {
-+ struct iphdr ip;
-+ struct ipv6hdr ip6;
-+ } _iph, *iph;
-+ unsigned int hash;
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-+ const int family = target->family;
-+#else
-+ const int family = par->family;
- #endif
-- struct iphdr _iph, *iph;
- struct ipt_netflow_tuple tuple;
- struct ipt_netflow *nf;
- __u8 tcp_flags;
- struct netflow_aggr_n *aggr_n;
- struct netflow_aggr_p *aggr_p;
- __u8 s_mask, d_mask;
-- unsigned int hash;
--
-- iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb);
--
-- if (iph == NULL) {
-+ unsigned int ptr;
-+ int fragment;
-+ size_t pkt_len;
-+ int options = 0;
-+ int tcpoptions = 0;
-+
-+ iph = skb_header_pointer(skb, 0, (likely(family == AF_INET))? sizeof(_iph.ip) : sizeof(_iph.ip6), &iph);
-+ if (unlikely(iph == NULL)) {
- NETFLOW_STAT_INC(truncated);
- NETFLOW_STAT_INC(pkt_drop);
- return IPT_CONTINUE;
- }
-
-- tuple.s_addr = iph->saddr;
-- tuple.d_addr = iph->daddr;
-+ tuple.l3proto = family;
- tuple.s_port = 0;
- tuple.d_port = 0;
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
-@@ -1220,30 +2454,118 @@ static unsigned int netflow_target(
- #else
- tuple.i_ifc = par->in? par->in->ifindex : -1;
- #endif
-- tuple.protocol = iph->protocol;
-- tuple.tos = iph->tos;
- tcp_flags = 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */
- s_mask = 0;
- d_mask = 0;
-
-- if (iph->frag_off & htons(IP_OFFSET))
-+ if (likely(family == AF_INET)) {
-+ tuple.src = (union nf_inet_addr){ .ip = iph->ip.saddr };
-+ tuple.dst = (union nf_inet_addr){ .ip = iph->ip.daddr };
-+ tuple.tos = iph->ip.tos;
-+ tuple.protocol = iph->ip.protocol;
-+ fragment = unlikely(iph->ip.frag_off & htons(IP_OFFSET));
-+ ptr = iph->ip.ihl * 4;
-+ pkt_len = ntohs(iph->ip.tot_len);
-+
-+#define IPHDR_MAXSIZE (4 * 15)
-+ if (unlikely(iph->ip.ihl * 4 > sizeof(struct iphdr))) {
-+ u_int8_t _opt[IPHDR_MAXSIZE - sizeof(struct iphdr)];
-+ const u_int8_t *op;
-+ unsigned int optsize = iph->ip.ihl * 4 - sizeof(struct iphdr);
-+
-+ op = skb_header_pointer(skb, sizeof(struct iphdr), optsize, _opt);
-+ if (likely(op))
-+ options = ip4_options(op, optsize);
-+ }
-+ } else {
-+ __u8 currenthdr;
-+
-+ tuple.src.in6 = iph->ip6.saddr;
-+ tuple.dst.in6 = iph->ip6.daddr;
-+ tuple.tos = iph->ip6.priority;
-+ fragment = 0;
-+ ptr = sizeof(struct ipv6hdr);
-+ pkt_len = ntohs(iph->ip6.payload_len) + sizeof(struct ipv6hdr);
-+
-+ currenthdr = iph->ip6.nexthdr;
-+ while (currenthdr != NEXTHDR_NONE && ipv6_ext_hdr(currenthdr)) {
-+ struct ipv6_opt_hdr _hdr;
-+ const struct ipv6_opt_hdr *hp;
-+ unsigned int hdrlen = 0;
-+
-+ options |= observed_hdrs(currenthdr);
-+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
-+ if (hp == NULL) {
-+ /* We have src/dst, so must account something. */
-+ tuple.protocol = currenthdr;
-+ fragment = 3;
-+ goto do_protocols;
-+ }
-+
-+ switch (currenthdr) {
-+ case IPPROTO_FRAGMENT: {
-+ struct frag_hdr _fhdr;
-+ const struct frag_hdr *fh;
-+
-+ fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
-+ &_fhdr);
-+ if (fh == NULL) {
-+ tuple.protocol = currenthdr;
-+ fragment = 2;
-+ goto do_protocols;
-+ }
-+ fragment = 1;
-+#define FRA0 SetXBit(4) /* Fragment header - first fragment */
-+#define FRA1 SetXBit(6) /* Fragmentation header - not first fragment */
-+ options |= (ntohs(fh->frag_off) & 0xFFF8)? FRA1 : FRA0;
-+ hdrlen = 8;
-+ break;
-+ }
-+ case IPPROTO_AH: {
-+ struct ip_auth_hdr _hdr, *hp;
-+
-+ if (likely(hp = skb_header_pointer(skb, ptr, 8, &_hdr))) {
-+ tuple.s_port = hp->spi >> 16;
-+ tuple.d_port = hp->spi;
-+ }
-+ hdrlen = (hp->hdrlen + 2) << 2;
-+ break;
-+ }
-+ default:
-+ hdrlen = ipv6_optlen(hp);
-+ }
-+ currenthdr = hp->nexthdr;
-+ ptr += hdrlen;
-+ }
-+ tuple.protocol = currenthdr;
-+ options |= observed_hdrs(currenthdr);
-+ }
-+
-+do_protocols:
-+ if (fragment) {
-+ /* if conntrack is enabled it should defrag on pre-routing and local-out */
- NETFLOW_STAT_INC(frags);
-- else {
-+ } else {
- switch (tuple.protocol) {
- case IPPROTO_TCP: {
- struct tcphdr _hdr, *hp;
-
-- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) {
-+ if (likely(hp = skb_header_pointer(skb, ptr, 14, &_hdr))) {
- tuple.s_port = hp->source;
- tuple.d_port = hp->dest;
- tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16);
-+
-+ if (unlikely(hp->doff * 4 > sizeof(struct tcphdr)))
-+ tcpoptions = tcp_options(skb, ptr, hp);
- }
- break;
- }
-- case IPPROTO_UDP: {
-+ case IPPROTO_UDP:
-+ case IPPROTO_UDPLITE:
-+ case IPPROTO_SCTP: {
- struct udphdr _hdr, *hp;
-
-- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) {
-+ if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) {
- tuple.s_port = hp->source;
- tuple.d_port = hp->dest;
- }
-@@ -1252,72 +2574,111 @@ static unsigned int netflow_target(
- case IPPROTO_ICMP: {
- struct icmphdr _hdr, *hp;
-
-- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr)))
-- tuple.d_port = (hp->type << 8) | hp->code;
-+ if (likely(family == AF_INET &&
-+ (hp = skb_header_pointer(skb, ptr, 2, &_hdr))))
-+ tuple.d_port = htons((hp->type << 8) | hp->code);
- break;
- }
-+ case IPPROTO_ICMPV6: {
-+ struct icmp6hdr _icmp6h, *ic;
-+
-+ if (likely(family == AF_INET6 &&
-+ (ic = skb_header_pointer(skb, ptr, 2, &_icmp6h))))
-+ tuple.d_port = htons((ic->icmp6_type << 8) | ic->icmp6_code);
-+ break;
-+ }
- case IPPROTO_IGMP: {
-- struct igmphdr *_hdr, *hp;
-+ struct igmphdr _hdr, *hp;
-
-- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr)))
-+ if (likely(hp = skb_header_pointer(skb, ptr, 1, &_hdr)))
- tuple.d_port = hp->type;
- }
- break;
-+ case IPPROTO_AH: { /* IPSEC */
-+ struct ip_auth_hdr _hdr, *hp;
-+
-+ if (likely(family == AF_INET && /* For IPv6 it's parsed above. */
-+ (hp = skb_header_pointer(skb, ptr, 8, &_hdr)))) {
-+ tuple.s_port = hp->spi >> 16;
-+ tuple.d_port = hp->spi;
-+ }
-+ break;
-+ }
-+ case IPPROTO_ESP: {
-+ struct ip_esp_hdr _hdr, *hp;
-+
-+ if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr)))
-+ tuple.s_port = hp->spi >> 16;
-+ tuple.d_port = hp->spi;
-+ }
-+ break;
- }
- } /* not fragmented */
-
- /* aggregate networks */
- read_lock_bh(&aggr_lock);
-- list_for_each_entry(aggr_n, &aggr_n_list, list)
-- if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) {
-- tuple.s_addr &= htonl(aggr_n->aggr_mask);
-- s_mask = aggr_n->prefix;
-- break;
-- }
-- list_for_each_entry(aggr_n, &aggr_n_list, list)
-- if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) {
-- tuple.d_addr &= htonl(aggr_n->aggr_mask);
-- d_mask = aggr_n->prefix;
-- break;
-- }
-+ if (family == AF_INET) {
-+ list_for_each_entry(aggr_n, &aggr_n_list, list)
-+ if (unlikely((ntohl(tuple.src.ip) & aggr_n->mask) == aggr_n->addr)) {
-+ tuple.src.ip &= htonl(aggr_n->aggr_mask);
-+ s_mask = aggr_n->prefix;
-+ atomic_inc(&aggr_n->usage);
-+ break;
-+ }
-+ list_for_each_entry(aggr_n, &aggr_n_list, list)
-+ if (unlikely((ntohl(tuple.dst.ip) & aggr_n->mask) == aggr_n->addr)) {
-+ tuple.dst.ip &= htonl(aggr_n->aggr_mask);
-+ d_mask = aggr_n->prefix;
-+ atomic_inc(&aggr_n->usage);
-+ break;
-+ }
-+ }
-
-- /* aggregate ports */
-- list_for_each_entry(aggr_p, &aggr_p_list, list)
-- if (ntohs(tuple.s_port) >= aggr_p->port1 &&
-- ntohs(tuple.s_port) <= aggr_p->port2) {
-- tuple.s_port = htons(aggr_p->aggr_port);
-- break;
-- }
-+ if (tuple.protocol == IPPROTO_TCP ||
-+ tuple.protocol == IPPROTO_UDP ||
-+ tuple.protocol == IPPROTO_SCTP ||
-+ tuple.protocol == IPPROTO_UDPLITE) {
-+ /* aggregate ports */
-+ list_for_each_entry(aggr_p, &aggr_p_list, list)
-+ if (unlikely(ntohs(tuple.s_port) >= aggr_p->port1 &&
-+ ntohs(tuple.s_port) <= aggr_p->port2)) {
-+ tuple.s_port = htons(aggr_p->aggr_port);
-+ atomic_inc(&aggr_p->usage);
-+ break;
-+ }
-
-- list_for_each_entry(aggr_p, &aggr_p_list, list)
-- if (ntohs(tuple.d_port) >= aggr_p->port1 &&
-- ntohs(tuple.d_port) <= aggr_p->port2) {
-- tuple.d_port = htons(aggr_p->aggr_port);
-- break;
-- }
-+ list_for_each_entry(aggr_p, &aggr_p_list, list)
-+ if (unlikely(ntohs(tuple.d_port) >= aggr_p->port1 &&
-+ ntohs(tuple.d_port) <= aggr_p->port2)) {
-+ tuple.d_port = htons(aggr_p->aggr_port);
-+ atomic_inc(&aggr_p->usage);
-+ break;
-+ }
-+ }
- read_unlock_bh(&aggr_lock);
-
- hash = hash_netflow(&tuple);
-- spin_lock_bh(&ipt_netflow_lock);
-+ read_lock_bh(&htable_rwlock);
-+ spin_lock(&htable_locks[hash & LOCK_COUNT_MASK]);
- /* record */
- nf = ipt_netflow_find(&tuple, hash);
-- if (!nf) {
-- if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) {
-+ if (unlikely(!nf)) {
-+ struct rtable *rt;
-+
-+ if (unlikely(maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows)) {
- /* This is DOS attack prevention */
- NETFLOW_STAT_INC(maxflows_err);
- NETFLOW_STAT_INC(pkt_drop);
-- NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
-- spin_unlock_bh(&ipt_netflow_lock);
-- return IPT_CONTINUE;
-+ NETFLOW_STAT_ADD(traf_drop, pkt_len);
-+ goto unlock_return;
- }
-
- nf = init_netflow(&tuple, skb, hash);
-- if (!nf || IS_ERR(nf)) {
-+ if (unlikely(!nf || IS_ERR(nf))) {
- NETFLOW_STAT_INC(alloc_err);
- NETFLOW_STAT_INC(pkt_drop);
-- NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
-- spin_unlock_bh(&ipt_netflow_lock);
-- return IPT_CONTINUE;
-+ NETFLOW_STAT_ADD(traf_drop, pkt_len);
-+ goto unlock_return;
- }
-
- nf->ts_first = jiffies;
-@@ -1330,31 +2691,68 @@ static unsigned int netflow_target(
- nf->s_mask = s_mask;
- nf->d_mask = d_mask;
-
-- if (debug > 2)
-- printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
-+ rt = (struct rtable *)skb->dst;
-+#else /* since 2.6.26 */
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
-+ rt = skb->rtable;
-+#else /* since 2.6.31 */
-+ rt = skb_rtable(skb);
-+#endif
-+#endif
-+ if (likely(family == AF_INET)) {
-+ if (rt)
-+ nf->nh.ip = rt->rt_gateway;
-+ } else {
-+ if (rt)
-+ nf->nh.in6 = ((struct rt6_info *)rt)->rt6i_gateway;
-+ nf->flow_label = (iph->ip6.flow_lbl[0] << 16) |
-+ (iph->ip6.flow_lbl[1] << 8) | (iph->ip6.flow_lbl[2]);
-+ }
-+#if 0
-+ if (unlikely(debug > 2))
-+ printk(KERN_INFO "ipt_NETFLOW: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
- atomic_read(&ipt_netflow_count),
- tuple.i_ifc, nf->o_ifc,
- NIPQUAD(tuple.s_addr), ntohs(tuple.s_port),
- NIPQUAD(tuple.d_addr), ntohs(tuple.d_port));
-+#endif
- } else {
- /* ipt_netflow_list is sorted by access time:
- * most recently accessed flows are at head, old flows remain at tail
- * this function bubble up flow to the head */
-+ spin_lock(&hlist_lock);
- list_move(&nf->list, &ipt_netflow_list);
-+ spin_unlock(&hlist_lock);
- }
-
-+#ifdef CONFIG_NF_CONNTRACK_MARK
-+ {
-+ struct nf_conn *ct;
-+ enum ip_conntrack_info ctinfo;
-+ ct = nf_ct_get(skb, &ctinfo);
-+ if (ct)
-+ nf->mark = ct->mark;
-+ }
-+#endif
-+
- nf->nr_packets++;
-- nf->nr_bytes += ntohs(iph->tot_len);
-+ nf->nr_bytes += pkt_len;
- nf->ts_last = jiffies;
- nf->tcp_flags |= tcp_flags;
-+ nf->options |= options;
-+ if (tuple.protocol == IPPROTO_TCP)
-+ nf->tcpoptions |= tcpoptions;
-
- NETFLOW_STAT_INC(pkt_total);
-- NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len));
-+ NETFLOW_STAT_ADD(traf_total, pkt_len);
-
-- if (active_needs_export(nf, active_timeout * HZ)) {
-+ if (likely(active_needs_export(nf, active_timeout * HZ))) {
- /* ok, if this active flow to be exported
- * bubble it to the tail */
-+ spin_lock(&hlist_lock);
- list_move_tail(&nf->list, &ipt_netflow_list);
-+ spin_unlock(&hlist_lock);
-
- /* Blog: I thought about forcing timer to wake up sooner if we have
- * enough exportable flows, but in fact this doesn't have much sense,
-@@ -1363,35 +2761,194 @@ static unsigned int netflow_target(
- * limited size). But yes, this is disputable. */
- }
-
-- spin_unlock_bh(&ipt_netflow_lock);
-+unlock_return:
-+ spin_unlock(&htable_locks[hash & LOCK_COUNT_MASK]);
-+ read_unlock_bh(&htable_rwlock);
-
- return IPT_CONTINUE;
- }
-
--static struct ipt_target ipt_netflow_reg = {
-- .name = "NETFLOW",
-- .target = netflow_target,
-- .family = AF_INET,
--#ifndef RAW_PROMISC_HACK
-- .table = "filter",
--#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
-- .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
-- (1 << NF_INET_LOCAL_OUT),
--#else
-- .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
-- (1 << NF_IP_LOCAL_OUT),
--#endif /* NF_IP_LOCAL_IN */
-+#ifdef CONFIG_NF_NAT_NEEDED
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+ /* Below 2.6.31 we don't need to handle callback chain manually. */
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
-+#define NET_STRUCT struct net *net
-+#define NET_ARG net,
-+#define nf_conntrack_event_cb net->ct.nf_conntrack_event_cb
- #else
-- .table = "raw",
--#ifndef NF_IP_LOCAL_IN
-- .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
-- (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING),
-+#define NET_STRUCT void
-+#define NET_ARG
-+#endif
-+static int set_notifier_cb(NET_STRUCT)
-+{
-+ struct nf_ct_event_notifier *notifier;
-+
-+ notifier = rcu_dereference(nf_conntrack_event_cb);
-+ if (notifier == NULL) {
-+ /* Polite mode. */
-+ nf_conntrack_register_notifier(NET_ARG &ctnl_notifier);
-+ } else if (notifier != &ctnl_notifier) {
-+ if (!saved_event_cb)
-+ saved_event_cb = notifier;
-+ else if (saved_event_cb != notifier)
-+ printk(KERN_ERR "natevents_net_init: %p != %p (report error.)\n",
-+ saved_event_cb, notifier);
-+ rcu_assign_pointer(nf_conntrack_event_cb, &ctnl_notifier);
-+ } else
-+ printk(KERN_ERR "ipt_NETFLOW: natevents already enabled.\n");
-+ return 0;
-+}
-+static void unset_notifier_cb(NET_STRUCT)
-+{
-+ struct nf_ct_event_notifier *notifier;
-+
-+ notifier = rcu_dereference(nf_conntrack_event_cb);
-+ if (notifier == &ctnl_notifier) {
-+ if (saved_event_cb == NULL)
-+ nf_conntrack_unregister_notifier(NET_ARG &ctnl_notifier);
-+ else
-+ rcu_assign_pointer(nf_conntrack_event_cb, saved_event_cb);
-+ } else
-+ printk(KERN_ERR "ipt_NETFLOW: natevents already disabled.\n");
-+}
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
-+#undef nf_conntrack_event_cb
-+static struct pernet_operations natevents_net_ops = {
-+ .init = set_notifier_cb,
-+ .exit = unset_notifier_cb
-+};
-+#endif
-+#endif /* since 2.6.31 */
-+
-+static DEFINE_MUTEX(events_lock);
-+/* Both functions may be called multiple times. */
-+static void register_ct_events(void)
-+{
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+#define NETLINK_M "nf_conntrack_netlink"
-+ struct module *netlink_m;
-+ static int referenced = 0;
-+#endif
-+
-+ printk(KERN_INFO "ipt_NETFLOW: enable natevents.\n");
-+ mutex_lock(&events_lock);
-+
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+ /* Pre-load netlink module who will be first notifier
-+ * user, and then hijack nf_conntrack_event_cb from it. */
-+ if (
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
-+ !rcu_dereference(nf_conntrack_event_cb) ||
-+#endif
-+ !(netlink_m = find_module(NETLINK_M))) {
-+ printk("Loading " NETLINK_M "\n");
-+ request_module(NETLINK_M);
-+ }
-+ /* Reference netlink module to prevent it's unsafe unload before us. */
-+ if (!referenced && (netlink_m = find_module(NETLINK_M))) {
-+ referenced++;
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
-+#define use_module ref_module
-+#endif
-+ use_module(THIS_MODULE, netlink_m);
-+ }
-+
-+ /* Register ct events callback. */
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
-+ register_pernet_subsys(&natevents_net_ops);
- #else
-- .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
-- (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_PRE_ROUTING),
--#endif /* NF_IP_LOCAL_IN */
--#endif /* !RAW_PROMISC_HACK */
-- .me = THIS_MODULE
-+ set_notifier_cb();
-+#endif
-+#else /* below v2.6.31 */
-+ if (!natevents && nf_conntrack_register_notifier(&ctnl_notifier) < 0)
-+ printk(KERN_ERR "Can't register conntrack notifier, natevents disabled.\n");
-+ else
-+#endif
-+ natevents = 1;
-+ mutex_unlock(&events_lock);
-+}
-+
-+static void unregister_ct_events(void)
-+{
-+ printk(KERN_INFO "ipt_NETFLOW: disable natevents.\n");
-+ mutex_lock(&events_lock);
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
-+ unregister_pernet_subsys(&natevents_net_ops);
-+#else /* < v3.2 */
-+ unset_notifier_cb();
-+#endif /* v3.2 */
-+ rcu_assign_pointer(saved_event_cb, NULL);
-+#else /* < v2.6.31 */
-+ nf_conntrack_unregister_notifier(&ctnl_notifier);
-+#endif
-+ natevents = 0;
-+ mutex_unlock(&events_lock);
-+}
-+#endif /* CONFIG_NF_NAT_NEEDED */
-+
-+#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
-+#define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING
-+#define NF_IP_LOCAL_IN NF_INET_LOCAL_IN
-+#define NF_IP_FORWARD NF_INET_FORWARD
-+#define NF_IP_LOCAL_OUT NF_INET_LOCAL_OUT
-+#define NF_IP_POST_ROUTING NF_INET_POST_ROUTING
-+#endif
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
-+/* net/netfilter/x_tables.c */
-+static void xt_unregister_targets(struct xt_target *target, unsigned int n)
-+{
-+ unsigned int i;
-+
-+ for (i = 0; i < n; i++)
-+ xt_unregister_target(&target[i]);
-+}
-+static int xt_register_targets(struct xt_target *target, unsigned int n)
-+{
-+ unsigned int i;
-+
-+ int err = 0;
-+ for (i = 0; i < n; i++)
-+ if ((err = xt_register_target(&target[i])))
-+ goto err;
-+ return err;
-+err:
-+ if (i > 0)
-+ xt_unregister_targets(target, i);
-+ return err;
-+}
-+#endif
-+
-+static struct ipt_target ipt_netflow_reg[] __read_mostly = {
-+ {
-+ .name = "NETFLOW",
-+ .target = netflow_target,
-+ .checkentry = netflow_target_check,
-+ .family = AF_INET,
-+ .hooks =
-+ (1 << NF_IP_PRE_ROUTING) |
-+ (1 << NF_IP_LOCAL_IN) |
-+ (1 << NF_IP_FORWARD) |
-+ (1 << NF_IP_LOCAL_OUT) |
-+ (1 << NF_IP_POST_ROUTING),
-+ .me = THIS_MODULE
-+ },
-+ {
-+ .name = "NETFLOW",
-+ .target = netflow_target,
-+ .checkentry = netflow_target_check,
-+ .family = AF_INET6,
-+ .hooks =
-+ (1 << NF_IP_PRE_ROUTING) |
-+ (1 << NF_IP_LOCAL_IN) |
-+ (1 << NF_IP_FORWARD) |
-+ (1 << NF_IP_LOCAL_OUT) |
-+ (1 << NF_IP_POST_ROUTING),
-+ .me = THIS_MODULE
-+ },
- };
-
- static int __init ipt_netflow_init(void)
-@@ -1399,11 +2956,16 @@ static int __init ipt_netflow_init(void)
- #ifdef CONFIG_PROC_FS
- struct proc_dir_entry *proc_stat;
- #endif
-+ printk(KERN_INFO "ipt_NETFLOW version %s, srcversion %s\n",
-+ IPT_NETFLOW_VERSION, THIS_MODULE->srcversion);
-
- get_random_bytes(&ipt_netflow_hash_rnd, 4);
-
- /* determine hash size (idea from nf_conntrack_core.c) */
- if (!hashsize) {
-+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
-+#define num_physpages totalram_pages
-+#endif
- hashsize = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct hlist_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
-@@ -1411,8 +2973,7 @@ static int __init ipt_netflow_init(void)
- }
- if (hashsize < 16)
- hashsize = 16;
-- printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n",
-- IPT_NETFLOW_VERSION, hashsize);
-+ printk(KERN_INFO "ipt_NETFLOW: hashsize %u\n", hashsize);
-
- ipt_netflow_hash_size = hashsize;
- ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size);
-@@ -1434,12 +2995,18 @@ static int __init ipt_netflow_init(void)
- }
-
- #ifdef CONFIG_PROC_FS
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
- proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat));
-+#else
-+ proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat), &nf_seq_fops);
-+#endif
- if (!proc_stat) {
- printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
- goto err_free_netflow_slab;
- }
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
- proc_stat->proc_fops = &nf_seq_fops;
-+#endif
- #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
- proc_stat->owner = THIS_MODULE;
- #endif
-@@ -1480,21 +3047,28 @@ static int __init ipt_netflow_init(void)
- }
- add_aggregation(aggregation);
-
-- __start_scan_worker();
-+ netflow_switch_version(protocol);
-+ _schedule_scan_worker(0);
- setup_timer(&rate_timer, rate_timer_calc, 0);
- mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
-
-- if (xt_register_target(&ipt_netflow_reg))
-+ peakflows_at = jiffies;
-+ if (xt_register_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg)))
- goto err_stop_timer;
-
-- peakflows_at = jiffies;
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ if (natevents)
-+ register_ct_events();
-+#endif
-
-- printk(KERN_INFO "ipt_netflow loaded.\n");
-+ printk(KERN_INFO "ipt_NETFLOW is loaded.\n");
- return 0;
-
- err_stop_timer:
-- __stop_scan_worker();
-+ _unschedule_scan_worker();
-+ netflow_scan_and_export(AND_FLUSH);
- del_timer_sync(&rate_timer);
-+ free_templates();
- destination_removeall();
- aggregation_remove(&aggr_n_list);
- aggregation_remove(&aggr_p_list);
-@@ -1506,17 +3080,18 @@ err_free_proc_stat:
- #ifdef CONFIG_PROC_FS
- remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
- err_free_netflow_slab:
--#endif
-+#endif
- kmem_cache_destroy(ipt_netflow_cachep);
- err_free_hash:
- vfree(ipt_netflow_hash);
- err:
-+ printk(KERN_INFO "ipt_NETFLOW is not loaded.\n");
- return -ENOMEM;
- }
-
- static void __exit ipt_netflow_fini(void)
- {
-- printk(KERN_INFO "ipt_netflow unloading..\n");
-+ printk(KERN_INFO "ipt_NETFLOW unloading..\n");
-
- #ifdef CONFIG_SYSCTL
- unregister_sysctl_table(netflow_sysctl_header);
-@@ -1524,14 +3099,18 @@ static void __exit ipt_netflow_fini(void)
- #ifdef CONFIG_PROC_FS
- remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
- #endif
--
-- xt_unregister_target(&ipt_netflow_reg);
-- __stop_scan_worker();
-- netflow_scan_and_export(1);
-+ xt_unregister_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg));
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ if (natevents)
-+ unregister_ct_events();
-+#endif
-+ _unschedule_scan_worker();
-+ netflow_scan_and_export(AND_FLUSH);
- del_timer_sync(&rate_timer);
-
- synchronize_sched();
-
-+ free_templates();
- destination_removeall();
- aggregation_remove(&aggr_n_list);
- aggregation_remove(&aggr_p_list);
-@@ -1539,7 +3118,7 @@ static void __exit ipt_netflow_fini(void)
- kmem_cache_destroy(ipt_netflow_cachep);
- vfree(ipt_netflow_hash);
-
-- printk(KERN_INFO "ipt_netflow unloaded.\n");
-+ printk(KERN_INFO "ipt_NETFLOW unloaded.\n");
- }
-
- module_init(ipt_netflow_init);
-diff --git a/ipt_NETFLOW.h b/ipt_NETFLOW.h
-index 4a7b645..749f985 100644
---- a/ipt_NETFLOW.h
-+++ b/ipt_NETFLOW.h
-@@ -35,8 +35,8 @@ struct netflow5_record {
- __be16 o_ifc;
- __be32 nr_packets;
- __be32 nr_octets;
-- __be32 ts_first;
-- __be32 ts_last;
-+ __be32 first_ms;
-+ __be32 last_ms;
- __be16 s_port;
- __be16 d_port;
- __u8 reserved;
-@@ -54,9 +54,9 @@ struct netflow5_record {
- struct netflow5_pdu {
- __be16 version;
- __be16 nr_records;
-- __be32 ts_uptime;
-- __be32 ts_usecs;
-- __be32 ts_unsecs;
-+ __be32 ts_uptime; /* ms */
-+ __be32 ts_usecs; /* s */
-+ __be32 ts_unsecs; /* ns */
- __be32 seq;
- __u8 eng_type;
- __u8 eng_id;
-@@ -65,42 +65,185 @@ struct netflow5_pdu {
- } __attribute__ ((packed));
- #define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record))
-
-+/* NetFlow v9 RFC http://www.ietf.org/rfc/rfc3954.txt */
-+enum {
-+ IN_BYTES = 1,
-+ IN_PKTS = 2,
-+ PROTOCOL = 4,
-+ TOS = 5,
-+ TCP_FLAGS = 6,
-+ L4_SRC_PORT = 7,
-+ IPV4_SRC_ADDR = 8,
-+ SRC_MASK = 9,
-+ INPUT_SNMP = 10,
-+ L4_DST_PORT = 11,
-+ IPV4_DST_ADDR = 12,
-+ DST_MASK = 13,
-+ OUTPUT_SNMP = 14,
-+ IPV4_NEXT_HOP = 15,
-+ //SRC_AS = 16,
-+ //DST_AS = 17,
-+ //BGP_IPV4_NEXT_HOP = 18,
-+ //MUL_DST_PKTS = 19,
-+ //MUL_DST_BYTES = 20,
-+ LAST_SWITCHED = 21,
-+ FIRST_SWITCHED = 22,
-+ IPV6_SRC_ADDR = 27,
-+ IPV6_DST_ADDR = 28,
-+ IPV6_FLOW_LABEL = 31,
-+ ICMP_TYPE = 32,
-+ MUL_IGMP_TYPE = 33,
-+ //TOTAL_BYTES_EXP = 40,
-+ //TOTAL_PKTS_EXP = 41,
-+ //TOTAL_FLOWS_EXP = 42,
-+ IPV6_NEXT_HOP = 62,
-+ IPV6_OPTION_HEADERS = 64,
-+ commonPropertiesId = 137, /* for MARK */
-+ ipv4Options = 208,
-+ tcpOptions = 209,
-+ postNATSourceIPv4Address = 225,
-+ postNATDestinationIPv4Address = 226,
-+ postNAPTSourceTransportPort = 227,
-+ postNAPTDestinationTransportPort = 228,
-+ natEvent = 230,
-+ postNATSourceIPv6Address = 281,
-+ postNATDestinationIPv6Address = 282,
-+ IPSecSPI = 295,
-+ observationTimeMilliseconds = 323,
-+ observationTimeMicroseconds = 324,
-+ observationTimeNanoseconds = 325,
-+};
-+
-+enum {
-+ FLOWSET_TEMPLATE = 0,
-+ FLOWSET_OPTIONS = 1,
-+ IPFIX_TEMPLATE = 2,
-+ IPFIX_OPTIONS = 3,
-+ FLOWSET_DATA_FIRST = 256,
-+};
-+
-+struct flowset_template {
-+ __be16 flowset_id;
-+ __be16 length;
-+ __be16 template_id;
-+ __be16 field_count;
-+} __attribute__ ((packed));
-+
-+struct flowset_data {
-+ __be16 flowset_id;
-+ __be16 length;
-+} __attribute__ ((packed));
-+
-+/* NetFlow v9 packet. */
-+struct netflow9_pdu {
-+ __be16 version;
-+ __be16 nr_records;
-+ __be32 sys_uptime_ms;
-+ __be32 export_time_s;
-+ __be32 seq;
-+ __be32 source_id; /* Exporter Observation Domain */
-+ __u8 data[1400];
-+} __attribute__ ((packed));
-+
-+/* IPFIX packet. */
-+struct ipfix_pdu {
-+ __be16 version;
-+ __be16 length;
-+ __be32 export_time_s;
-+ __be32 seq;
-+ __be32 odomain_id; /* Observation Domain ID */
-+ __u8 data[1400];
-+} __attribute__ ((packed));
-+
-+/* Maximum bytes flow can have, after it's reached flow will become
-+ * not searchable and will be exported soon. */
-+#define FLOW_FULL_WATERMARK 0xffefffff
-+
-+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
-+union nf_inet_addr {
-+ __be32 ip;
-+ __be32 ip6[4];
-+ struct in_addr in;
-+ struct in6_addr in6;
-+};
-+#endif
-+
- /* hashed data which identify unique flow */
-+/* 16+16 + 2+2 + 2+1+1+1 = 41 */
- struct ipt_netflow_tuple {
-- __be32 s_addr; // Network byte order
-- __be32 d_addr; // -"-
-- __be16 s_port; // -"-
-+ union nf_inet_addr src;
-+ union nf_inet_addr dst;
-+ __be16 s_port; // Network byte order
- __be16 d_port; // -"-
-- __be16 i_ifc; // Local byte order
-+ __u16 i_ifc; // Host byte order
- __u8 protocol;
- __u8 tos;
-+ __u8 l3proto;
- };
--/* tuple size is rounded to u32s */
--#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4)
--
--/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */
--#define FLOW_FULL_WATERMARK 0xffefffff
-
--/* flow entry */
-+/* hlist[2] + tuple[]: 8+8 + 41 = 57 (less than usual cache line, 64) */
- struct ipt_netflow {
- struct hlist_node hlist; // hashtable search chain
-- struct list_head list; // all flows chain
-
- /* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */
- struct ipt_netflow_tuple tuple;
-
- /* volatile data */
-- __be16 o_ifc;
-+ union nf_inet_addr nh;
-+ __u16 o_ifc;
- __u8 s_mask;
- __u8 d_mask;
-+ __u8 tcp_flags; /* `OR' of all tcp flags */
-
- /* flow statistics */
- u_int32_t nr_packets;
- u_int32_t nr_bytes;
-- unsigned long ts_first;
-- unsigned long ts_last;
-- __u8 tcp_flags; /* `OR' of all tcp flags */
-+ union {
-+ struct {
-+ unsigned long first;
-+ unsigned long last;
-+ } ts;
-+ ktime_t ts_obs;
-+ } _ts_un;
-+#define ts_first _ts_un.ts.first
-+#define ts_last _ts_un.ts.last
-+#define ts_obs _ts_un.ts_obs
-+ u_int32_t flow_label; /* IPv6 */
-+ u_int32_t options; /* IPv4(16) & IPv6(32) Options */
-+ u_int32_t tcpoptions;
-+#ifdef CONFIG_NF_CONNTRACK_MARK
-+ u_int32_t mark; /* Exported as commonPropertiesId */
-+#endif
-+#ifdef CONFIG_NF_NAT_NEEDED
-+ __be32 s_as;
-+ __be32 d_as;
-+ struct nat_event *nat;
-+#endif
-+ struct list_head list; // all flows chain
-+ spinlock_t *lock;
-+};
-+
-+#ifdef CONFIG_NF_NAT_NEEDED
-+enum {
-+ NAT_CREATE, NAT_DESTROY, NAT_POOLEXHAUSTED
- };
-+struct nat_event {
-+ struct list_head list;
-+ struct {
-+ __be32 s_addr;
-+ __be32 d_addr;
-+ __be16 s_port;
-+ __be16 d_port;
-+ } pre, post;
-+ ktime_t ts_ktime;
-+ unsigned long ts_jiffies;
-+ __u8 protocol;
-+ __u8 nat_event;
-+};
-+#define IS_DUMMY_FLOW(nf) (nf->nat)
-+#else
-+#define IS_DUMMY_FLOW(nf) 0
-+#endif
-
- static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1,
- const struct ipt_netflow_tuple *t2)
-@@ -115,11 +258,13 @@ struct ipt_netflow_sock {
- unsigned short port;
- atomic_t wmem_peak; // sk_wmem_alloc peak value
- atomic_t err_full; // socket filled error
-+ atomic_t err_connect; // connect errors
- atomic_t err_other; // other socket errors
- };
-
- struct netflow_aggr_n {
- struct list_head list;
-+ atomic_t usage;
- __u32 mask;
- __u32 addr;
- __u32 aggr_mask;
-@@ -128,6 +273,7 @@ struct netflow_aggr_n {
-
- struct netflow_aggr_p {
- struct list_head list;
-+ atomic_t usage;
- __u16 port1;
- __u16 port2;
- __u16 aggr_port;
-diff --git a/libipt_NETFLOW.c b/libipt_NETFLOW.c
-index d85b6d9..a0f9e5d 100644
---- a/libipt_NETFLOW.c
-+++ b/libipt_NETFLOW.c
-@@ -58,24 +58,24 @@
- #define _IPT_IP struct ipt_ip
- #endif
-
-+#ifndef IPTABLES_VERSION
-+#define IPTABLES_VERSION XTABLES_VERSION
-+#endif
-+
- static struct option opts[] = {
-- {0}
-+ { 0 }
- };
-
- static void help(void)
- {
-- printf( "NETFLOW target\n");
-+ printf("NETFLOW target\n");
- }
-
--//static int parse(int c, char **argv, int invert, unsigned int *flags,
--// const _IPT_ENTRY *entry,
--// struct ipt_entry_target **target)
- static int parse(int c, char **argv, int invert, unsigned int *flags,
- const _IPT_ENTRY *entry,
- struct ipt_entry_target **targetinfo)
-
- {
--
- return 1;
- }
-
-@@ -95,16 +95,9 @@ static void print(const _IPT_IP *ip,
- }
-
- static struct iptables_target netflow = {
--#ifdef MOD140
-- .family = AF_INET,
--#endif
- .next = NULL,
- .name = "NETFLOW",
--#ifdef XTABLES_VERSION
-- .version = XTABLES_VERSION,
--#else
- .version = IPTABLES_VERSION,
--#endif
- .size = IPT_ALIGN(0),
- .userspacesize = IPT_ALIGN(0),
- .help = &help,
-diff --git a/murmur3.h b/murmur3.h
-new file mode 100644
-index 0000000..57a6006
---- /dev/null
-+++ b/murmur3.h
-@@ -0,0 +1,42 @@
-+/* MurmurHash3, based on https://code.google.com/p/smhasher of Austin Appleby. */
-+
-+static __always_inline uint32_t rotl32(const uint32_t x, const int8_t r)
-+{
-+ return (x << r) | (x >> (32 - r));
-+}
-+
-+static __always_inline uint32_t fmix32(register uint32_t h)
-+{
-+ h ^= h >> 16;
-+ h *= 0x85ebca6b;
-+ h ^= h >> 13;
-+ h *= 0xc2b2ae35;
-+ h ^= h >> 16;
-+ return h;
-+}
-+
-+static inline uint32_t murmur3(const void *key, const uint32_t len, const uint32_t seed)
-+{
-+ const uint32_t c1 = 0xcc9e2d51;
-+ const uint32_t c2 = 0x1b873593;
-+ const uint32_t *blocks;
-+ const uint8_t *tail;
-+ register uint32_t h1 = seed;
-+ uint32_t k1 = 0;
-+ uint32_t i;
-+
-+ blocks = (const uint32_t *)key;
-+ for (i = len / 4; i; --i) {
-+ h1 ^= rotl32(*blocks++ * c1, 15) * c2;
-+ h1 = rotl32(h1, 13) * 5 + 0xe6546b64;
-+ }
-+ tail = (const uint8_t*)blocks;
-+ switch (len & 3) {
-+ case 3: k1 ^= tail[2] << 16;
-+ case 2: k1 ^= tail[1] << 8;
-+ case 1: k1 ^= tail[0];
-+ h1 ^= rotl32(k1 * c1, 15) * c2;
-+ }
-+ return fmix32(h1^ len);
-+}
-+
-diff --git a/raw_promisc_debian_squeeze6.patch b/raw_promisc_debian_squeeze6.patch
-new file mode 100644
-index 0000000..69d0d35
---- /dev/null
-+++ b/raw_promisc_debian_squeeze6.patch
-@@ -0,0 +1,37 @@
-+
-+ Short manual and patch for Debian Squeeze
-+ suggested by Pavel Odintsov:
-+
-+On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote:
-+>
-+> краткий мануал для патчинга Debian Squeeze ядра патчем promisc.
-+>
-+> cd /usr/src
-+> apt-get install -y dpkg-dev
-+> apt-get build-dep linux-image-2.6.32-5-amd64
-+> cd linux-2.6-2.6.32/
-+> apt-get source linux-image-2.6.32-5-amd64
-+>
-+> wget .... /root/raw_promisc_debian_squeeze6.patch
-+> patch -p1 < raw_promisc_debian_squeeze6.patch
-+> Накладываем патчи дебияна:
-+> debian/rules source
-+>
-+> Запускаем сборку:
-+> debian/rules binary
-+>
-+
-+diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c
-+--- linux-2.6-2.6.32/net/ipv4/ip_input.c 2009-12-03 04:51:21.000000000 +0100
-++++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c 2012-06-25 19:13:49.000000000 +0200
-+@@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n
-+ /* When the interface is in promisc. mode, drop all the crap
-+ * that it receives, do not try to analyse it.
-+ */
-+- if (skb->pkt_type == PACKET_OTHERHOST)
-+- goto drop;
-++ //if (skb->pkt_type == PACKET_OTHERHOST)
-++ // goto drop;
-+
-+
-+ IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);