diff options
Diffstat (limited to 'testing/ipt-netflow-grsec')
-rw-r--r-- | testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch | 32 | ||||
-rw-r--r-- | testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch | 28 | ||||
-rw-r--r-- | testing/ipt-netflow-grsec/APKBUILD | 16 | ||||
-rw-r--r-- | testing/ipt-netflow-grsec/git.patch | 4308 |
4 files changed, 6 insertions, 4378 deletions
diff --git a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch b/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch deleted file mode 100644 index 99ebffe380..0000000000 --- a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.10.y-kernel.patch +++ /dev/null @@ -1,32 +0,0 @@ -From b1588f736edbeeb2b3de6081e92dde8840cef66d Mon Sep 17 00:00:00 2001 -From: Natanael Copa <ncopa@alpinelinux.org> -Date: Mon, 5 Aug 2013 12:56:19 +0000 -Subject: [PATCH] Fix compilation with 3.10.y kernel - -use proc_create instead of create_proc_entry ---- - ipt_NETFLOW.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c -index 4e1b9f3..758bccc 100644 ---- a/ipt_NETFLOW.c -+++ b/ipt_NETFLOW.c -@@ -1432,12 +1432,12 @@ static int __init ipt_netflow_init(void) - } - - #ifdef CONFIG_PROC_FS -- proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat)); -+ proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat), -+ &nf_seq_fops); - if (!proc_stat) { - printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n"); - goto err_free_netflow_slab; - } -- proc_stat->proc_fops = &nf_seq_fops; - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) - proc_stat->owner = THIS_MODULE; - #endif --- -1.8.3.4 - diff --git a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch b/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch deleted file mode 100644 index f3cbfae429..0000000000 --- a/testing/ipt-netflow-grsec/0001-Fix-compilation-with-3.9.y-kernel.patch +++ /dev/null @@ -1,28 +0,0 @@ -From 2f4b540bb2e60aca47263cf0fd2b3abc0a499d87 Mon Sep 17 00:00:00 2001 -From: Natanael Copa <ncopa@alpinelinux.org> -Date: Tue, 14 May 2013 07:25:47 +0000 -Subject: [PATCH] Fix compilation with 3.9.y kernel - -use new hlist_for_each api ---- - ipt_NETFLOW.c | 4 +--- - 1 file changed, 1 insertion(+), 3 deletions(-) - -diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c -index d4c91e1..4e1b9f3 100644 ---- a/ipt_NETFLOW.c -+++ b/ipt_NETFLOW.c -@@ -854,9 +854,7 @@ static struct ipt_netflow * - ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash) - { - struct ipt_netflow *nf; -- struct hlist_node *pos; -- -- hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) { -+ hlist_for_each_entry(nf, &ipt_netflow_hash[hash], hlist) { - if (ipt_netflow_tuple_equal(tuple, &nf->tuple) && - nf->nr_bytes < FLOW_FULL_WATERMARK) { - NETFLOW_STAT_INC(found); --- -1.8.2.2 - diff --git a/testing/ipt-netflow-grsec/APKBUILD b/testing/ipt-netflow-grsec/APKBUILD index 03e4f3faea..fc3a3ec825 100644 --- a/testing/ipt-netflow-grsec/APKBUILD +++ b/testing/ipt-netflow-grsec/APKBUILD @@ -2,11 +2,11 @@ _flavor=grsec _kpkg=linux-$_flavor -_kver=3.14.15 -_kpkgrel=1 +_kver=3.14.16 +_kpkgrel=0 # when chaning _ver we *must* bump _mypkgrel -_ver=1.8 +_ver=2.0 _mypkgrel=0 @@ -30,7 +30,6 @@ url="http://ipt-netflow.sourceforge.net/" arch="all" license=GPL3+ source="ipt-netflow-$_ver.tar.gz::https://github.com/aabc/ipt-netflow/archive/v$_ver.tar.gz - git.patch " depends="$_kpkg-dev=$_kpkgver" makedepends="linux-${_flavor}-dev=$_kpkgver iptables-dev bash" @@ -64,9 +63,6 @@ dev() { default_dev } -md5sums="922a7d9dd17f28a2c3551a1c5e9849eb ipt-netflow-1.8.tar.gz -77875a5979589bca8c49a8331e7cc22b git.patch" -sha256sums="eefe766eda6d4d918e4cc919b5d5acd2b681a258246017ab7948b80f0cb95112 ipt-netflow-1.8.tar.gz -75ea3afe9532699d4fd1bef7f198dfaa97b3a310234a86b48371522ae0704a60 git.patch" -sha512sums="c413515deeac1d604e36a53b39edcf340d6c3f78c29e53979fede8aa013e324ada4622b571ac5270f7495ab6982096cd2bd9e9283c9cc2cc7360aa5c3954792d ipt-netflow-1.8.tar.gz -3fade2ceb00ddae15dd6b1eacebc36480efe0c5c2d050e050e0aef37382ad72ca454f4d23080b7263ab2f078f72db4572e2514d3faca113d8e437437f7c3afcf git.patch" +md5sums="d1b40ee12b563edc4faae5e3234c3436 ipt-netflow-2.0.tar.gz" +sha256sums="547ca6c2e8d82fc7d3d113d2ab3f602106d1efa7739167f8c7850a43e5bbe46e ipt-netflow-2.0.tar.gz" +sha512sums="7bf330ac665180218ea4cc42fe6ac6d365b45632039ff9601559f7a4675738c409853433b15dd431dba68cfe36269d6992754154411e2761041fec021e19bf8d ipt-netflow-2.0.tar.gz" diff --git a/testing/ipt-netflow-grsec/git.patch b/testing/ipt-netflow-grsec/git.patch deleted file mode 100644 index faa49ffdc7..0000000000 --- a/testing/ipt-netflow-grsec/git.patch +++ /dev/null @@ -1,4308 +0,0 @@ -diff --git a/Makefile.in b/Makefile.in -index 30ebbfe..0a82c67 100644 ---- a/Makefile.in -+++ b/Makefile.in -@@ -1,43 +1,73 @@ --# -+# Edit Makefile.in and run ./configure - - KVERSION = @KVERSION@ - KDIR = @KDIR@ -+KINSTDIR = $(shell dirname @KDIR@) - IPTABLES_CFLAGS = @IPTABLES_CFLAGS@ - IPTABLES_MODULES = @IPTABLES_MODULES@ -+DEPMOD = depmod -a - -+# https://www.kernel.org/doc/Documentation/kbuild/modules.txt -+# https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt - obj-m = ipt_NETFLOW.o - --ipt_NETFLOW.ko: ipt_NETFLOW.c ipt_NETFLOW.h -+all: ipt_NETFLOW.ko libipt_NETFLOW.so libip6t_NETFLOW.so -+ipt_NETFLOW.ko: version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile - @echo Compiling for kernel $(KVERSION) - make -C $(KDIR) M=$(CURDIR) modules --all: ipt_NETFLOW.ko libipt_NETFLOW.so --minstall: -- make -C $(KDIR) M=$(CURDIR) modules_install -+ @touch $@ -+sparse: | version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile -+ @rm -f ipt_NETFLOW.ko ipt_NETFLOW.o -+ @echo Compiling for kernel $(KVERSION) -+ make -C $(KDIR) M=$(CURDIR) modules C=1 -+ @touch ipt_NETFLOW.ko -+minstall: | ipt_NETFLOW.ko -+ make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR) -+ $(DEPMOD) - mclean: - make -C $(KDIR) M=$(CURDIR) clean - lclean: - -rm -f *.so *_sh.o - clean: mclean lclean -- -rm -f *.so *.o modules.order -+ -rm -f *.so *.o modules.order version.h -+ -+%_sh.o: libipt_NETFLOW.c -+ gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c -+ -+%.so: %_sh.o -+ gcc -shared -o $@ $< - --libipt_NETFLOW.so: libipt_NETFLOW.c -- gcc -O2 -Wall -Wunused -I$(KDIR)/include $(IPTABLES_CFLAGS) -fPIC -o libipt_NETFLOW_sh.o -c libipt_NETFLOW.c -- gcc -shared -o libipt_NETFLOW.so libipt_NETFLOW_sh.o -+version.h: ipt_NETFLOW.c ipt_NETFLOW.h Makefile -+ @if [ -d .git ] && type git >/dev/null 2>&1; then \ -+ echo "#define GITVERSION \"`git describe --dirty`\""; \ -+ fi > version.h - --linstall: ipt_NETFLOW.ko libipt_NETFLOW.so -- cp -a libipt_NETFLOW.so $(IPTABLES_MODULES) -+linstall: | libipt_NETFLOW.so libip6t_NETFLOW.so -+ install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so -+ install -D libip6t_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so - - install: minstall linstall - -+uninstall: -+ -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so -+ -rm -f $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so -+ -rm -f $(DESTDIR)$(KINSTDIR)/extra/ipt_NETFLOW.ko -+ - Makefile: Makefile.in configure - ./configure --make - - load: all -- insmod ipt_NETFLOW.ko active_timeout=5 -- iptables -A OUTPUT -d 0/0 -j NETFLOW -- iptables -A INPUT -d 0/0 -j NETFLOW -+ -insmod ipt_NETFLOW.ko active_timeout=5 protocol=9 -+ -iptables -I OUTPUT -j NETFLOW -+ -iptables -I INPUT -j NETFLOW -+ -ip6tables -I OUTPUT -j NETFLOW -+ -ip6tables -I INPUT -j NETFLOW - - unload: -- iptables -D OUTPUT -d 0/0 -j NETFLOW -- iptables -D INPUT -d 0/0 -j NETFLOW -- rmmod ipt_NETFLOW.ko -+ -iptables -D OUTPUT -j NETFLOW -+ -iptables -D INPUT -j NETFLOW -+ -ip6tables -D OUTPUT -j NETFLOW -+ -ip6tables -D INPUT -j NETFLOW -+ -rmmod ipt_NETFLOW.ko -+ -+reload: unload load -diff --git a/README b/README -index 213f02c..56a4fde 100644 ---- a/README -+++ b/README -@@ -1,10 +1,17 @@ --ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008 -+ipt_NETFLOW linux 2.6.x-3.x kernel module by <abc@telekom.ru> -- 2008-2013. -+ -+ High performance NetFlow v5, v9, IPFIX flow data export module for Linux -+ kernel. Supporting IPv4 and IPv6. Created to be useful for highly loaded -+ linux router. It should be used as iptables target. Also can export NAT -+ translation events using NetFlow Event Logging (NEL) for v9, IPFIX, or -+ specially crafted v5 flows. -+ - - ============================ - = OBTAINING LATEST VERSION = - ============================ - -- $ git clone git://ipt-netflow.git.sourceforge.net/gitroot/ipt-netflow/ipt-netflow -+ $ git clone git://git.code.sf.net/p/ipt-netflow/code ipt-netflow - $ cd ipt-netflow - - -@@ -12,94 +19,220 @@ ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008 - = INSTALLATION = - ================ - --1. Besides kernel you will need iptables/netfilter source matching your -- installation or just fresh install from there: ftp://ftp.netfilter.org/pub/iptables/snapshot/ -- I have this: ftp://ftp.netfilter.org/pub/iptables/snapshot/iptables-1.3.7-20070329.tar.bz2 -- Unpack it somewhere and build with make. -+ Four easy steps. -+ -+** 1. Prepare Kernel source -+ -+ If you have package system install kernel-devel package, otherwise install -+ raw kernel source from http://kernel.org matching _exactly_ version of your -+ installed kernel. -+ -+ a) What to do for Centos: -+ -+ ~# yum install kernel-devel -+ -+ b) What to do for Debian: -+ -+ ~# apt-get install module-assistant -+ ~# m-a prepare -+ -+ c) Otherwise, if you downloaded raw kernel sources don't forget to create -+ .config by copying it from your distribution's kernel. Its copy could reside -+ in /boot or sometimes in /proc, examples: -+ -+ kernel-src-dir/# cp /boot/config-`uname -r` .config -+ or -+ kernel-src-dir/# zcat /proc/config.gz > .config -+ -+ Assuming you unpacked kernel source into `kernel-src-dir/' directory. -+ Then run: -+ -+ kernel-src-dir/# make oldconfig -+ -+ After that you'll need to prepare kernel for modules build: -+ -+ kernel-src-dir/# make prepare modules_prepare -+ -+ Note: Don't try to `make prepare' in Centos kernel-devel package directory -+ (which is usually something like /usr/src/kernels/2.6.32-431.el6.x86_64) -+ as this is wrong and meaningless. -+ -+** 2. Prepare Iptables -+ -+ Before this step it also would be useful to install pkg-config if don't -+ already have. -+ -+ If you have package system just install iptables-devel (or iptables-dev) -+ package, otherwise install iptables source matching version of your -+ installation from ftp://ftp.netfilter.org/pub/iptables/ -+ -+ a) What to do for Centos: -+ -+ # yum install iptables-devel -+ -+ b) What to do for Debian: -+ -+ # apt-get install iptables-dev pkg-config - --2. Run ./configure script and it will create Makefile -+ c) Otherwise, for raw iptables source build it and make install. - --3. make all install; depmod -- This will install kernel module and iptable specific library. -+** 3. Now, to actually build the module run: - --Troubleshooting: -- 1) Sometimes you will want to add CC=gcc-3 to make command. -- Example: make CC=gcc-3.3 -+ ~/ipt-netflow# ./configure -+ ~/ipt-netflow# make all install -+ ~/ipt-netflow# depmod - -- 2) Compile module with actual kernel source compiled. -- I.e. first compile kernel and boot into it, and then compile module. -+ This will install kernel module and iptables specific library. - -- 3) For autoloading module after reboot: set net.netflow.destination (or load -- module, if idestination set on load) after interfaces are up. Becasue module -- needs exporting interface (usually lo) to establish export connection. -+ Troubleshooting: - --4. After this point you should be able to load module -- and use -j NETFLOW target in your iptables. See next section. -+ a) Sometimes you will want to add CC=gcc-3 to make command. -+ Example: make CC=gcc-3.3 -+ -+ b) Compile module with actual kernel source compiled. -+ I.e. first compile kernel and boot into it, and then compile module. -+ If you are using kernel-devel package check that its version matches -+ your kernel package. -+ -+ c) If you have sources in non-standard places or configure isn't able to -+ find something run ./configure --help to see how to specify paths manually. -+ -+** 4. After this point you should be able to load module and -+ use -j NETFLOW target in your iptables. See next section. - - - =========== - = RUNNING = - =========== - --1. You can load module by insmod like this: -- # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1 -+1. You can load module directly by insmod like this: -+ -+ # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1 - - Or if properly installed (make install; depmod) by this: -- # modprobe ipt_NETFLOW destination=127.0.0.1:2055 -+ -+ # modprobe ipt_NETFLOW destination=127.0.0.1:2055 - - See, you may add options in insmod/modprobe command line, or add -- them in /etc/ to modules.conf or modprobe.conf like thus: -- options ipt_NETFLOW destination=127.0.0.1:2055 -+ them in /etc/modprobe.conf or /etc/modprobe.d/ipt_NETFLOW.conf -+ like thus: -+ -+ options ipt_NETFLOW destination=127.0.0.1:2055 protocol=9 natevents=1 - - 2. Statistics is in /proc/net/stat/ipt_netflow -- To view slab statistics: grep ipt_netflow /proc/slabinfo -+ To view boring slab statistics: grep ipt_netflow /proc/slabinfo - - 3. You can view parameters and control them via sysctl, example: -- # sysctl -w net.netflow.hashsize=32768 - --4. Example of directing all traffic into module: -- # iptables -A FORWARD -j NETFLOW -- # iptables -A INPUT -j NETFLOW -- # iptables -A OUTPUT -j NETFLOW -+ # sysctl net.netflow -+ # sysctl net.netflow.hashsize=32768 -+ -+ Note: For after-reboot configuration I recommend to store module parameters -+ in modprobe configs instead of storing them in /etc/sysctl.conf, as it's -+ less clear when init process will apply sysctl.conf, before of after -+ module's load. -+ -+4. Example of directing all IPv4 traffic into the module: -+ -+ # iptables -I FORWARD -j NETFLOW -+ # iptables -I INPUT -j NETFLOW -+ # iptables -I OUTPUT -j NETFLOW -+ -+ Note: It is preferable (because easier to understand) to _insert_ -+ NETFLOW target at the top of the chain, otherwise not all traffic may -+ reach NETFLOW if your iptables configuration is complicated and some -+ other rule inadvertently consume the traffic (dropping or acepting before -+ NETFLOW is reached). It's always good to test your configuration. -+ Use iptables -L -nvx to check pkts/bytes counters on the rules. -+ -+5. If you want to account IPv6 traffic you should use protocol 9 or 10. -+ Example of directing all IPv6 traffic into the module: - -+ # sysctl net.netflow.protocol=10 -+ # ip6tables -I FORWARD -j NETFLOW -+ # ip6tables -I INPUT -j NETFLOW -+ # ip6tables -I OUTPUT -j NETFLOW -+ -+ Note: First enable right version of protocol and after that add ip6tables -+ rules, otherwise you will get errors in dmesg. -+ -+6. If you want to account NAT events (NEL): -+ -+ # sysctl net.netflow.natevents=1 -+ -+ Note that natevents feature is completely independent from traffic accounting -+ (it's using so called conntrack events), thus you don't need to set or change -+ any iptables rules to use that. You may need to enable kernel config option -+ CONFIG_NF_CONNTRACK_EVENTS though (if it isn't already enabled). -+ For details on how they are exported for different protocol versions see -+ below. - - =========== - = OPTIONS = - =========== - -+ protocol=5 -+ - what version of NetFlow protocol to use. Default is 5. -+ You can choose from 5, 9, or 10 (where 10 is IPFIX). If you plan -+ to account IPv6 traffic you should use protocol 9 or 10 (IPFIX), -+ because NetFlow v5 isn't compatible with IPv6. -+ - destination=127.0.0.1:2055 - - where to export netflow, to this ip address - You will see this connection in netstat like this: - udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED - - destination=127.0.0.1:2055,192.0.0.1:2055 -- - mirror flows to two (can be more) addresses, -- separate addresses with comma. -+ - mirror flows to two (can be more) addresses, separate addresses -+ with comma. -+ -+ natevents=1 -+ - Collect and send NAT translation events as NetFlow Event Logging (NEL) -+ for NetFlow v9/IPFIX, or as dummy flows compatible with NetFlow v5. -+ Default is 0 (don't send). -+ -+ For NetFlow v5 protocol meaning of fields in dummy flows are such: -+ Src IP, Src Port is Pre-nat source address. -+ Dst IP, Dst Port is Post-nat destination address. -+ - These two fields made equal to data flows catched in FORWARD chain. -+ Nexthop, Src AS is Post-nat source address for SNAT. Or, -+ Nexthop, Dst AS is Pre-nat destination address for DNAT. -+ TCP Flags is SYN+SCK for start event, RST+FIN for stop event. -+ Pkt/Traffic size is 0 (zero), so it won't interfere with accounting. - - inactive_timeout=15 - - export flow after it's inactive 15 seconds. Default value is 15. - - active_timeout=1800 -- - export flow after it's active 1800 seconds (30 minutes). Default value is 1800. -+ - export flow after it's active 1800 seconds (30 minutes). Default valuae -+ is 1800. -+ -+ refresh-rate=20 -+ - for NetFlow v9 and IPFIX it's rate how frequently to re-send templates -+ (per packets). You probably don't need to change default (which is 20). -+ -+ timeout-rate=30 -+ - for NetFlow v9 and IPFIX it's rate when to re-send old templates (in -+ minutes). No need to change it. - - debug=0 - - debug level (none). - - sndbuf=number -- - size of output socket buffer in bytes. Recommend you to put -+ - size of output socket buffer in bytes. I recommend you to put - higher value if you experience netflow packet drops (can be - seen in statistics as 'sock: fail' number.) - Default value is system default. - - hashsize=number - - Hash table bucket size. Used for performance tuning. -- Abstractly speaking, it should be two times bigger than flows -+ Abstractly speaking, it should be minimum two times bigger than flows - you usually have, but not need to. - Default is system memory dependent small enough value. - - maxflows=2000000 -- - Maximum number of flows to account. It's here to prevent DOS attacks. After -- this limit reached new flows will not be accounted. Default is -+ - Maximum number of flows to account. It's here to prevent DOS attacks. -+ After this limit reached new flows will not be accounted. Default is - 2000000, zero is unlimited. - - aggregation=string.. -@@ -130,14 +263,15 @@ Troubleshooting: - = HOW TO READ STAT = - ==================== - -- Statistics is your friend to fine tune and understand netflow module performance. -+ Statistics is your friend to fine tune and understand netflow module -+ performance. - - To see stat: - # cat /proc/net/stat/ipt_netflow - - How to interpret the data: - --> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K -+> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K, worker delay 100/1000. - - active X: currently active flows in memory cache. - - for optimum CPU performance it is recommended to set hash table size to -@@ -146,8 +280,9 @@ Troubleshooting: - mem XK: how much kilobytes of memory currently taken by active flows. - - one active flow taking 56 bytes of memory. - - there is system limit on cache size too. -+ worker delay X/HZ: how frequently exporter scan flows table per second. - --> Hash: size 8192 (mem 32K), metric 1.0, 1.0, 1.0, 1.0. MemTraf: 1420 pkt, 364 K (pdu 0, 0). -+> Hash: size 8192 (mem 32K), metric 1.00, [1.00, 1.00, 1.00]. MemTraf: 1420 pkt, 364 K (pdu 0, 0). - - Hash: size X: current hash size/limit. - - you can control this by sysctl net.netflow.hashsize variable. -@@ -156,18 +291,22 @@ Troubleshooting: - - optimal value is twice of average of active flows. - mem XK: how much memory occupied by hash table. - - hash table is fixed size by nature, taking 4 bytes per entry. -- metric X, X, X, X: how optimal is your hash table being used. -+ metric X, [X, X, X]: how optimal is your hash table being used. - - lesser value mean more optimal hash table use, min is 1.0. -- - this is moving average (EWMA) of hash table access divided -- by match rate (searches / matches) for 4sec, and 1, 5, 15 minutes. -- Sort of hash table load average. -+ - last three numbers in squares is moving average (EWMA) of hash table -+ access divided by match rate (searches / matches) for 4sec, and 1, 5, and -+ 15 minutes. Sort of hash table load average. First value is instantaneous. -+ You can try to increase hashsize if averages more than 1 (increase -+ certainly if >= 2). - MemTraf: X pkt, X K: how much traffic accounted for flows that are in memory. - - these flows that are residing in internal hash table. - pdu X, X: how much traffic in flows preparing to be exported. - - it is included already in aforementioned MemTraf total. - --> Timeout: active 1800, inactive 15. Maxflows 2000000 -+> Protocol version 10 (ipfix), refresh-rate 20, timeout-rate 30, (templates 2, active 2). Timeouts: active 5, inactive 15. Maxflows 2000000 - -+ Protocol version currently in use. Refresh-rate and timeout-rate -+ for v9 and IPFIX. Total templates generated and currently active. - Timeout: active X: how much seconds to wait before exporting active flow. - - same as sysctl net.netflow.active_timeout variable. - inactive X: how much seconds to wait before exporting inactive flow. -@@ -180,20 +319,22 @@ Troubleshooting: - - - Module throughput values for 1 second, 1 minute, and 5 minutes. - --> cpu# stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes> --> cpu0 stat: 980540 10473 180600, 0 0 0 0, sock: 4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K -+> cpu# stat: <search found new [metric], trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes> -+> cpu0 stat: 980540 10473 180600 [1.03], 0 0 0 0, sock: 4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K - - cpu#: this is Total and per CPU statistics for: - stat: <search found new, trunc frag alloc maxflows>: internal stat for: - search found new: hash table searched, found, and not found counters. -- trunc: how much truncated packets is ignored -+ [metric]: average hash metric since module load. -+ trunc: how much truncated packets are ignored - - these are that possible don't have valid IP header. - - accounted in drop packets counter but not in drop bytes. - frag: how much fragmented packets have seen. - - kernel always defragments INPUT/OUTPUT chains for us. - - these packets are not ignored but not reassembled either, so: -- - if there is no enough data in fragment (ex. tcp ports) it is considered zero. -- alloc: how much cache memory allocations is failed. -+ - if there is no enough data in fragment (ex. tcp ports) it is considered -+ zero. -+ alloc: how much cache memory allocations are failed. - - packets ignored and accounted in drop stat. - - probably increase system memory if this ever happen. - maxflows: how much packets ignored on maxflows (maximum active flows reached). -@@ -203,7 +344,8 @@ Troubleshooting: - sock: <ok fail cberr, bytes>: table of exporting stats for: - ok: how much Netflow PDUs are exported (i.e. UDP packets sent by module). - fail: how much socket errors (i.e. packets failed to be sent). -- - packets dropped and their internal statistics cumulatively accounted in drop stat. -+ - packets dropped and their internal statistics cumulatively accounted in -+ drop stat. - cberr: how much connection refused ICMP errors we got from export target. - - probably you not launched collector software on destination, - - or specified wrong destination address. -@@ -225,20 +367,34 @@ Troubleshooting: - packet is for new flow but maxflows is already reached, - all flows in export packets that got socket error. - --> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, other 0 -+> Natevents disabled, count start 0, stop 0. -+ -+ - Natevents mode disabled or enabled, and how much start or stop events -+ are reported. -+ -+> sock0: 10.0.0.2:2055 unconnected (1 attempts). -+ -+ If socket is unconnected (for example if module loaded before interfaces is -+ up) it shows now much connection attempts was failed. It will try to connect -+ until success. -+ -+> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, connect 0, other 0 - - sockX: per destination stats for: - X.X.X.X:Y: destination ip address and port. - - controlled by sysctl net.netflow.destination variable. - sndbuf X: how much data socket can hold in buffers. - - controlled by sysctl net.netflow.sndbuf variable. -- - if you have packet drops due to sndbuf reached (error -11) increase this value. -+ - if you have packet drops due to sndbuf reached (error -11) increase this -+ value. - filled X: how much data in socket buffers right now. - peak X: peak value of how much data in socket buffers was. - - you will be interested to keep it below sndbuf value. - err: how much packets are dropped due to errors. - - all flows from them will be accounted in drop stat. -- sndbuf reached X: how much packets dropped due to sndbuf being too small (error -11). -+ sndbuf reached X: how much packets dropped due to sndbuf being too small -+ (error -11). -+ connect X: how much connection attempts was failed. - other X: dropped due to other possible errors. - - > aggr0: ... -diff --git a/README.promisc b/README.promisc -index 60ca922..31d774f 100644 ---- a/README.promisc -+++ b/README.promisc -@@ -2,9 +2,14 @@ Hello, - - If you wish to account with netflow module traffic mirrored on switch you may follow this example: - --************** --* Solution 1 * --************** -+ -+ Solution 1: General kernel patch. -+ Solution 2: Alternative w/o kernel patch. -+ -+ -+ ************** -+ * Solution 1 * -+ ************** - - 1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic. - -@@ -33,17 +38,7 @@ If you wish to account with netflow module traffic mirrored on switch you may fo - # /sbin/vconfig add eth1 47 - # /sbin/ifconfig eth1.47 up - --5. Recompile ipt_netflow module with #define RAW_PROMISC_HACK uncommented: -- -- Find this line in ipt_NETFLOW.c (should be line 7): -- --//#define RAW_PROMISC_HACK -- -- And remove two slashes at beginning of the line, so it become like this: -- --#define RAW_PROMISC_HACK -- -- Re-compile module: -+5. Compile module: - - # make clean all install - -@@ -55,13 +50,14 @@ If you wish to account with netflow module traffic mirrored on switch you may fo - - # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW - -- - Voila. - -+ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch - --************** --* Solution 2 * --************** -+ -+ ************** -+ * Solution 2 * -+ ************** - - By Anonymous. - -@@ -81,4 +77,3 @@ Sometimes you may need to run: - - for this scheme to work. - -- -diff --git a/configure b/configure -index 677dd7f..3f10e2a 100755 ---- a/configure -+++ b/configure -@@ -3,7 +3,7 @@ - PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin - - error() { -- echo "! Error: $@" -+ echo -e "! Error: $@" - exit 1 - } - -@@ -56,19 +56,20 @@ get_lib_from_lib() { - } - - iptables_inc() { -- echo -n "Iptables include path: " -+ echo -n "Iptables include flags: " - if [ "$IPTINC" ]; then -- echo "$IPTINC (user specified)" - IPTINC="-I$IPTINC" -+ echo "$IPTINC (user specified)" -+ elif [ "$PKGVER" ]; then -+ IPTINC="$PKGINC" -+ echo "$IPTINC (pkg-config)" -+ elif [ "$NOIPTSRC" ]; then -+ IPTINC= -+ echo "none (default)" - else -- if [ "$PKGINC" ]; then -- IPTINC="$PKGINC" -- echo "$IPTINC (pkg-config)" -- else -- IPTINC="$IPTSRC/include" -- echo "$IPTINC (from source)" -- IPTINC="-I$IPTINC" -- fi -+ IPTINC="$IPTSRC/include" -+ IPTINC="-I$IPTINC" -+ echo "$IPTINC (from source)" - fi - } - -@@ -109,7 +110,16 @@ try_dir2() { - test -d "$1" && try_dir `dirname $1` && return 0 - } - --iptables_ver() { -+check_pkg_config() { -+ test "$PKGWARN" && return 1 -+ if ! which pkg-config >/dev/null 2>&1; then -+ echo "! You don't have pkg-config, it may be useful to install it." -+ PKGWARN=1 -+ return 1 -+ fi -+ return 0 -+} -+iptables_find_version() { - echo -n "Iptables binary version: " - if [ "$IPTVER" ]; then - echo "$IPTVER (user specified)" -@@ -121,6 +131,7 @@ iptables_ver() { - else - echo "no iptables binary found" - fi -+ check_pkg_config - PKGVER=`pkg-config --modversion xtables 2>/dev/null` - if [ "$PKGVER" ]; then - IPTVER="$PKGVER" -@@ -131,44 +142,90 @@ iptables_ver() { - fi - } - --iptables_dir() { -- test "$IPTINC" && return 1 -- test "$PKGINC" && return 1 -- -- VER="iptables-$IPTVER" -- if [ "$IPTSRC" ]; then -- echo "User specified source directory: $IPTSRC" -- try_dir $IPTSRC || error "Specified directory is not iptables source.." -+compile_libitp_test() { -+ echo -n "Checking for presence of $@... " -+ echo " -+#define __EXPORTED_HEADERS__ -+#include <$*>" > test.c -+ gcc -c test.c >/dev/null 2>&1 -+ RET=$? -+ if [ $RET = 0 ]; then -+ echo Yes; - else -- echo "Searching for $VER sources.." -- try_dir "./$VER" && return 0 -- try_dir "../$VER" && return 0 -- try_dir "/usr/src/$VER" && return 0 -- try_dirg "iptables" && return 0 -- try_dirg "../iptables" && return 0 -- try_dirg "/usr/src/iptables" && return 0 -- try_dir2 `locate $VER/extensions | head -1` && return 0 -- error "Can not find iptables source directory, try setting it with --ipt-src=" -+ echo No; - fi -+ rm -f test.c test.o -+ return $RET - } - --iptables_pkg_config() { -+iptables_try_pkgconfig() { - if [ ! "$PKGVER" ]; then -+ check_pkg_config -+ PKGVER=`pkg-config --modversion xtables 2>/dev/null` -+ TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null` - echo -n "pkg-config for version $IPTVER exists: " -- PKGVER=`pkg-config --exact-version=$IPTVER --modversion xtables 2>/dev/null` -+ pkg-config --exact-version=$IPTVER xtables 2>/dev/null - if [ $? = 0 ]; then - echo "Yes" -+ PKGVER=$TRYPKGVER - else -- echo "No (reported: $PKGVER)" -- unset PKGVER -+ if [ "$TRYPKGVER" ]; then -+ echo "No (reported: $TRYPKGVER)" -+ else -+ echo "No" -+ fi - fi - fi - if [ "$PKGVER" ]; then -+ check_pkg_config -+ PKGVER=`pkg-config --modversion xtables 2>/dev/null` - PKGINC=`pkg-config --cflags xtables` - PKGLIB=`pkg-config --variable=xtlibdir xtables` -- IPTCFLAGS="-DXTABLES" - else -- IPTCFLAGS="-DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\"" -+ # Newer versions of iptables should not have -I/kernel/include! -+ # So I assume that newer version will have correct pkg-config set up -+ # and if not, then it's older who need it. -+ IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\"" -+ fi -+ if compile_libitp_test xtables.h; then -+ IPTCFLAGS="-DXTABLES $IPTCFLAGS" -+ elif ! compile_libitp_test iptables.h; then -+ echo "! Iptables headers not found. You may need to specify --ipt-inc=..." -+ if [ -s /etc/debian_version ]; then -+ echo "! " -+ echo "! Under Debian simply run this:" -+ echo "! root# apt-get install iptables-dev pkg-config" -+ elif [ -s /etc/redhat-release ]; then -+ echo "! " -+ arch=.`uname -m` -+ echo "! Under Centos simply run this:" -+ echo "! root# yum install iptables-devel$arch pkgconfig" -+ fi -+ exit 1 -+ fi -+ -+} -+ -+iptables_find_src() { -+ test "$IPTINC" && return 1 -+ test "$PKGVER" && return 1 -+ -+ VER="iptables-$IPTVER" -+ if [ "$IPTSRC" ]; then -+ echo "User specified source directory: $IPTSRC" -+ try_dir $IPTSRC || error "Specified directory is not iptables source.." -+ else -+ echo "Searching for $VER sources.." -+ try_dir "./$VER" && return 0 -+ try_dir "../$VER" && return 0 -+ try_dir "/usr/src/$VER" && return 0 -+ try_dirg "iptables" && return 0 -+ try_dirg "../iptables" && return 0 -+ try_dirg "/usr/src/iptables" && return 0 -+ try_dir2 `locate $VER/extensions 2>/dev/null | head -1` && return 0 -+ echo "! Can not find iptables source directory, you may try setting it with --ipt-src=" -+ echo "! This is not fatal error, yet. Will be just using default include dir." -+ NOIPTSRC=1 - fi - } - -@@ -206,18 +263,110 @@ do - esac - done - --test "$KVERSION" || KVERSION=`uname -r` --echo Kernel version: $KVERSION -+kernel_find_version() { -+ KHOW=requested -+ test "$KVERSION" && return 0 -+ -+ if grep -q '#.*Debian' /proc/version; then -+ KHOW=proc -+ KVERSION=`sed -n 's/.*#.*Debian \([0-9\.]\+\)-.*/\1/p' /proc/version` -+ KLIBMOD=`uname -r` -+ else -+ KHOW=uname -+ KVERSION=`uname -r` -+ fi -+ test "$KDIR" || return 0 -+ -+ test -s $KDIR/Makefile || return 1 -+ test -s $KDIR/include/config/kernel.release || return 1 -+ KVERSION=`cat $KDIR/include/config/kernel.release` -+ KHOW=sources -+} -+ -+kernel_check_src() { -+ if [ -s "$1/Makefile" ]; then -+ KDIR="$1" -+ return 0 -+ fi -+ return 1 -+} -+ -+kernel_find_source() { -+ KSHOW=requested -+ test "$KDIR" && return 0 -+ KSHOW=found -+ kernel_check_src /lib/modules/$KLIBMOD/build && return 0 -+ kernel_check_src /lib/modules/$KVERSION/build && return 0 -+ kernel_check_src /usr/src/kernels/$KVERSION && return 0 -+ kernel_check_src /usr/src/linux-$KVERSION && return 0 -+ echo "! Linux source not found. Don't panic. You may specify kernel source" -+ echo "! directory with --kdir=..., or try to install kernel-devel package," -+ echo "! or just raw sources for linux-$KVERSION from kernel.org." -+ if grep -q -i centos /proc/version 2>/dev/null; then -+ echo "! " -+ arch=.`uname -m` -+ echo "! Under Centos simply run this:" -+ echo "! root# yum install kernel-devel iptables-devel$arch pkgconfig" -+ fi -+ if grep -q -i debian /proc/version 2>/dev/null; then -+ echo "! " -+ echo "! Under Debian simply run this:" -+ echo "! root# apt-get install module-assistant iptables-dev pkg-config" -+ echo "! root# m-a prepare" -+ fi -+ exit 1 -+} -+ -+kernel_check_consistency() { -+ if test -s $KDIR/include/config/kernel.release; then -+ SRCVER=`cat $KDIR/include/config/kernel.release` -+ test "$KVERSION" != "$SRCVER" && error "$KHOW kernel version ($KVERSION) and $KSHOW version of kernel source ($SRCVER) doesn't match!\n!" \ -+ "You may try to specify only kernel source tree with --kdir=$KDIR\n!" \ -+ "and configure will pick up version properly." -+ else -+ test -e "$KDIR/.config" || error ".config in kernel source not found, run make menuconfig in $KDIR" -+ test -d "$KDIR/include/config" || error "kernel is not prepared, run make prepare modules_prepare in $KDIR" -+ fi -+} -+ -+kconfig() { -+ KCONFIG=$KDIR/.config -+ if ! grep -q "^$1=" $KCONFIG 2>/dev/null; then -+ if [ "$KCONFIGREPORTED" != true ]; then -+ KCONFIGREPORTED=true -+ echo Kernel config file checked: $KCONFIG -+ echo -+ fi -+ echo "! Attention: $1 is undefined in your kernel configuration" -+ echo "! Without this option enabled $2 will not work." -+ echo -+ fi -+} -+ -+kernel_check_config() { -+ kconfig CONFIG_SYSCTL "sysctl interface" -+ kconfig CONFIG_PROC_FS "proc interface" -+ kconfig CONFIG_NF_NAT_NEEDED "natevents" -+ kconfig CONFIG_NF_CONNTRACK_EVENTS "natevents" -+ kconfig CONFIG_NF_CONNTRACK_MARK "connmark tracking" -+ kconfig CONFIG_IPV6 "IPv6" -+ kconfig CONFIG_IP6_NF_IPTABLES "ip6tables target" -+} - --test "$KDIR" || KDIR=/lib/modules/$KVERSION/build --echo Kernel sources: $KDIR -+kernel_find_version #KVERSION -+test "$KLIBMOD" || KLIBMOD=$KVERSION -+echo "Kernel version: $KVERSION ($KHOW)" -+kernel_find_source #KDIR -+echo "Kernel sources: $KDIR ($KSHOW)" -+kernel_check_consistency -+kernel_check_config - - test "$IPTBIN" || IPTBIN=`which iptables` - --iptables_ver #IPTVER --iptables_pkg_config --iptables_dir #IPTSRC --iptables_src_version #check IPTSRC match to IPTVER -+iptables_find_version #IPTVER -+iptables_try_pkgconfig #try to configure from pkg-config -+iptables_find_src #IPTSRC -+iptables_src_version #check that IPTSRC match to IPTVER - iptables_inc #IPTINC - iptables_modules #IPTLIB - -@@ -225,7 +374,6 @@ REPLACE="\ - s!@KVERSION@!$KVERSION!;\ - s!@KDIR@!$KDIR!;\ - s!@IPTABLES_VERSION@!$IPTVER!;\ --s!@IPTABLES_INCLUDES@!$IPTINC!;\ - s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\ - s!@IPTABLES_MODULES@!$IPTLIB!" - -diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c -index d4c91e1..ad974c5 100644 ---- a/ipt_NETFLOW.c -+++ b/ipt_NETFLOW.c -@@ -1,6 +1,6 @@ - /* - * This is NetFlow exporting module (NETFLOW target) for linux -- * (c) 2008-2012 <abc@telekom.ru> -+ * (c) 2008-2013 <abc@telekom.ru> - * - * - * This program is free software: you can redistribute it and/or modify -@@ -18,8 +18,6 @@ - * - */ - --//#define RAW_PROMISC_HACK -- - #include <linux/module.h> - #include <linux/skbuff.h> - #include <linux/proc_fs.h> -@@ -31,16 +29,26 @@ - #include <linux/icmp.h> - #include <linux/igmp.h> - #include <linux/inetdevice.h> --#include <linux/jhash.h> -+#include <linux/hash.h> -+#include <linux/delay.h> -+#include <linux/spinlock_types.h> - #include <net/icmp.h> - #include <net/ip.h> -+#include <net/ipv6.h> - #include <net/tcp.h> - #include <net/route.h> -+#include <net/ip6_fib.h> - #include <net/dst.h> - #include <linux/netfilter_ipv4/ip_tables.h> -+#if defined(CONFIG_NF_NAT_NEEDED) || defined(CONFIG_NF_CONNTRACK_MARK) -+#include <linux/notifier.h> -+#include <net/netfilter/nf_conntrack.h> -+#include <net/netfilter/nf_conntrack_core.h> -+#endif - #include <linux/version.h> - #include <asm/unaligned.h> - #include "ipt_NETFLOW.h" -+#include "murmur3.h" - #ifdef CONFIG_BRIDGE_NETFILTER - #include <linux/netfilter_bridge.h> - #endif -@@ -74,41 +82,66 @@ - #define ipt_target xt_target - #endif - --#define IPT_NETFLOW_VERSION "1.8" -+#define IPT_NETFLOW_VERSION "1.8.2" /* Note that if you are using git, you -+ will see version in other format. */ -+#include "version.h" -+#ifdef GITVERSION -+#undef IPT_NETFLOW_VERSION -+#define IPT_NETFLOW_VERSION GITVERSION -+#endif - - MODULE_LICENSE("GPL"); - MODULE_AUTHOR("<abc@telekom.ru>"); - MODULE_DESCRIPTION("iptables NETFLOW target module"); - MODULE_VERSION(IPT_NETFLOW_VERSION); -+MODULE_ALIAS("ip6t_NETFLOW"); - - #define DST_SIZE 256 - static char destination_buf[DST_SIZE] = "127.0.0.1:2055"; - static char *destination = destination_buf; --module_param(destination, charp, 0400); -+module_param(destination, charp, 0444); - MODULE_PARM_DESC(destination, "export destination ipaddress:port"); - - static int inactive_timeout = 15; --module_param(inactive_timeout, int, 0600); -+module_param(inactive_timeout, int, 0644); - MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds"); - - static int active_timeout = 30 * 60; --module_param(active_timeout, int, 0600); -+module_param(active_timeout, int, 0644); - MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds"); - - static int debug = 0; --module_param(debug, int, 0600); -+module_param(debug, int, 0644); - MODULE_PARM_DESC(debug, "debug verbosity level"); - - static int sndbuf; --module_param(sndbuf, int, 0400); -+module_param(sndbuf, int, 0444); - MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size"); - -+static int protocol = 5; -+module_param(protocol, int, 0444); -+MODULE_PARM_DESC(protocol, "netflow protocol version (5, 9, 10)"); -+ -+static unsigned int refresh_rate = 20; -+module_param(refresh_rate, uint, 0644); -+MODULE_PARM_DESC(refresh_rate, "NetFlow v9/IPFIX refresh rate (packets)"); -+ -+static unsigned int timeout_rate = 30; -+module_param(timeout_rate, uint, 0644); -+MODULE_PARM_DESC(timeout_rate, "NetFlow v9/IPFIX timeout rate (minutes)"); -+ -+#ifdef CONFIG_NF_NAT_NEEDED -+static int natevents = 0; -+module_param(natevents, int, 0444); -+MODULE_PARM_DESC(natevents, "send NAT Events"); -+#endif -+ - static int hashsize; --module_param(hashsize, int, 0400); -+module_param(hashsize, int, 0444); - MODULE_PARM_DESC(hashsize, "hash table size"); - - static int maxflows = 2000000; --module_param(maxflows, int, 0600); -+module_param(maxflows, int, 0644); - MODULE_PARM_DESC(maxflows, "maximum number of flows"); - static int peakflows = 0; - static unsigned long peakflows_at; -@@ -121,22 +154,52 @@ MODULE_PARM_DESC(aggregation, "aggregation ruleset"); - - static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat); - static LIST_HEAD(usock_list); --static DEFINE_RWLOCK(sock_lock); -+static DEFINE_MUTEX(sock_lock); - -+#define LOCK_COUNT (1<<8) -+#define LOCK_COUNT_MASK (LOCK_COUNT-1) -+static spinlock_t htable_locks[LOCK_COUNT] = { -+ [0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(htable_locks) -+}; -+static DEFINE_RWLOCK(htable_rwlock); /* global lock to protect htable_locks change */ - static unsigned int ipt_netflow_hash_rnd; --struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */ -+static struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */ - static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */ - static LIST_HEAD(ipt_netflow_list); /* all flows */ -+static DEFINE_SPINLOCK(hlist_lock); /* should almost always be locked w/o _bh */ - static LIST_HEAD(aggr_n_list); - static LIST_HEAD(aggr_p_list); - static DEFINE_RWLOCK(aggr_lock); -+#ifdef CONFIG_NF_NAT_NEEDED -+static LIST_HEAD(nat_list); /* nat events */ -+static DEFINE_SPINLOCK(nat_lock); -+static unsigned long nat_events_start = 0; -+static unsigned long nat_events_stop = 0; -+#endif - static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */ - static atomic_t ipt_netflow_count = ATOMIC_INIT(0); --static DEFINE_SPINLOCK(ipt_netflow_lock); /* hash table lock */ - --static long long pdu_packets = 0, pdu_traf = 0; --static struct netflow5_pdu pdu; --static unsigned long pdu_ts_mod; -+static long long pdu_packets = 0, pdu_traf = 0; /* how much accounted traffic in pdu */ -+static unsigned int pdu_count = 0; -+static unsigned int pdu_seq = 0; -+static unsigned int pdu_data_records = 0; -+static unsigned int pdu_tpl_records = 0; -+static unsigned long pdu_ts_mod; /* ts of last flow */ -+static union { -+ struct netflow5_pdu v5; -+ struct netflow9_pdu v9; -+ struct ipfix_pdu ipfix; -+} pdu; -+static int engine_id = 0; /* Observation Domain */ -+static __u8 *pdu_data_used; -+static __u8 *pdu_high_wm; /* high watermark */ -+static unsigned int pdu_max_size; /* sizeof pdu */ -+static struct flowset_data *pdu_flowset = NULL; /* current data flowset */ -+ -+static void (*netflow_export_flow)(struct ipt_netflow *nf); -+static void (*netflow_export_pdu)(void); /* called on timeout */ -+static void netflow_switch_version(int ver); -+ - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) - static void netflow_work_fn(void *work); - static DECLARE_WORK(netflow_work, netflow_work_fn, NULL); -@@ -146,19 +209,26 @@ static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn); - #endif - static struct timer_list rate_timer; - -+#define TCP_SYN_ACK 0x12 - #define TCP_FIN_RST 0x05 - - static long long sec_prate = 0, sec_brate = 0; - static long long min_prate = 0, min_brate = 0; - static long long min5_prate = 0, min5_brate = 0; --static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */ -+static unsigned int metric = 100, min15_metric = 100, min5_metric = 100, min_metric = 100; /* hash metrics */ - - static int set_hashsize(int new_size); - static void destination_removeall(void); - static int add_destinations(char *ptr); - static void aggregation_remove(struct list_head *list); - static int add_aggregation(char *ptr); --static void netflow_scan_and_export(int flush); -+static int netflow_scan_and_export(int flush); -+enum { -+ DONT_FLUSH, AND_FLUSH -+}; -+static int template_ids = FLOWSET_DATA_FIRST; -+static int tpl_count = 0; /* how much active templates */ -+ - - static inline __be32 bits2mask(int bits) { - return (bits? 0xffffffff << (32 - bits) : 0); -@@ -175,28 +245,46 @@ static inline int mask2bits(__be32 mask) { - /* under that lock worker is always stopped and not rescheduled, - * and we can call worker sub-functions manually */ - static DEFINE_MUTEX(worker_lock); --static inline void __start_scan_worker(void) -+#define MIN_DELAY 1 -+#define MAX_DELAY (HZ / 10) -+static int worker_delay = HZ / 10; -+static inline void _schedule_scan_worker(const int status) - { -- schedule_delayed_work(&netflow_work, HZ / 10); -+ /* rudimentary congestion avoidance */ -+ if (status > 0) -+ worker_delay -= status; -+ else if (status < 0) -+ worker_delay /= 2; -+ else -+ worker_delay++; -+ if (worker_delay < MIN_DELAY) -+ worker_delay = MIN_DELAY; -+ else if (worker_delay > MAX_DELAY) -+ worker_delay = MAX_DELAY; -+ schedule_delayed_work(&netflow_work, worker_delay); - } - --static inline void start_scan_worker(void) -+/* This is only called soon after pause_scan_worker. */ -+static inline void cont_scan_worker(void) - { -- __start_scan_worker(); -+ _schedule_scan_worker(0); - mutex_unlock(&worker_lock); - } - --/* we always stop scanner before write_lock(&sock_lock) -- * to let it never hold that spin lock */ --static inline void __stop_scan_worker(void) -+static inline void _unschedule_scan_worker(void) - { -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) -+ cancel_rearming_delayed_work(&netflow_work); -+#else - cancel_delayed_work_sync(&netflow_work); -+#endif - } - --static inline void stop_scan_worker(void) -+/* This is only used for quick pause (in procctl). */ -+static inline void pause_scan_worker(void) - { - mutex_lock(&worker_lock); -- __stop_scan_worker(); -+ _unschedule_scan_worker(); - } - - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) -@@ -223,11 +311,14 @@ static int nf_seq_show(struct seq_file *seq, void *v) - int snum = 0; - int peak = (jiffies - peakflows_at) / HZ; - -- seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n", -+ seq_printf(seq, "ipt_NETFLOW version " IPT_NETFLOW_VERSION ", srcversion %s\n", -+ THIS_MODULE->srcversion); -+ seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK, worker delay %d/%d.\n", - nr_flows, - peakflows, - peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60, -- (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10)); -+ (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10), -+ worker_delay, HZ); - - for_each_present_cpu(cpu) { - struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu); -@@ -252,93 +343,123 @@ static int nf_seq_show(struct seq_file *seq, void *v) - } - - #define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec -- seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n", -- ipt_netflow_hash_size, -- (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10), -- FFLOAT(metric, 10), -- FFLOAT(min_metric, 10), -- FFLOAT(min5_metric, 10), -- FFLOAT(min15_metric, 10), -- pkt_total - pkt_out + pdu_packets, -- (traf_total - traf_out + pdu_traf) >> 10, -- pdu_packets, -- pdu_traf); -- -- seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n", -- active_timeout, -- inactive_timeout, -- maxflows); -- -- seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n", -- sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate); -- -- seq_printf(seq, "cpu# stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n"); -- -- seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", -- (unsigned long long)searched, -- (unsigned long long)found, -- (unsigned long long)notfound, -- truncated, frags, alloc_err, maxflows_err, -- send_success, send_failed, sock_errors, -- (unsigned long long)exported_size >> 10, -- (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20, -- (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10); -+ seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%02d [%d.%02d, %d.%02d, %d.%02d]." -+ " MemTraf: %llu pkt, %llu K (pdu %llu, %llu), Out %llu pkt, %llu K.\n", -+ ipt_netflow_hash_size, -+ (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10), -+ FFLOAT(metric, 100), -+ FFLOAT(min_metric, 100), -+ FFLOAT(min5_metric, 100), -+ FFLOAT(min15_metric, 100), -+ pkt_total - pkt_out + pdu_packets, -+ (traf_total - traf_out + pdu_traf) >> 10, -+ pdu_packets, -+ pdu_traf, -+ pkt_out, -+ traf_out >> 10); -+ -+ seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec;" -+ " Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n", -+ sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate); -+ -+ seq_printf(seq, "cpu# stat: <search found new [metric], trunc frag alloc maxflows>," -+ " sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n"); -+ -+#define SAFEDIV(x,y) ((y)? ({ u64 __tmp = x; do_div(__tmp, y); (int)__tmp; }) : 0) -+ seq_printf(seq, "Total stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u," -+ " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", -+ searched, -+ (unsigned long long)found, -+ (unsigned long long)notfound, -+ FFLOAT(SAFEDIV(100LL * (searched + found + notfound), (found + notfound)), 100), -+ truncated, frags, alloc_err, maxflows_err, -+ send_success, send_failed, sock_errors, -+ (unsigned long long)exported_size >> 10, -+ (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20, -+ (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10); - - if (num_present_cpus() > 1) { - for_each_present_cpu(cpu) { - struct ipt_netflow_stat *st; - - st = &per_cpu(ipt_netflow_stat, cpu); -- seq_printf(seq, "cpu%u stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", -- cpu, -- (unsigned long long)st->searched, -- (unsigned long long)st->found, -- (unsigned long long)st->notfound, -- st->truncated, st->frags, st->alloc_err, st->maxflows_err, -- st->send_success, st->send_failed, st->sock_errors, -- (unsigned long long)st->exported_size >> 10, -- (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20, -- (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10); -+ seq_printf(seq, "cpu%u stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u," -+ " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n", -+ cpu, -+ (unsigned long long)st->searched, -+ (unsigned long long)st->found, -+ (unsigned long long)st->notfound, -+ FFLOAT(SAFEDIV(100LL * (st->searched + st->found + st->notfound), (st->found + st->notfound)), 100), -+ st->truncated, st->frags, st->alloc_err, st->maxflows_err, -+ st->send_success, st->send_failed, st->sock_errors, -+ (unsigned long long)st->exported_size >> 10, -+ (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20, -+ (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10); - } - } - -- read_lock(&sock_lock); -+ seq_printf(seq, "Protocol version %d", protocol); -+ if (protocol == 10) -+ seq_printf(seq, " (ipfix)"); -+ else -+ seq_printf(seq, " (netflow)"); -+ if (protocol >= 9) -+ seq_printf(seq, ", refresh-rate %u, timeout-rate %u, (templates %d, active %d)", -+ refresh_rate, timeout_rate, template_ids - FLOWSET_DATA_FIRST, tpl_count); -+ -+ seq_printf(seq, ". Timeouts: active %d, inactive %d. Maxflows %u\n", -+ active_timeout, -+ inactive_timeout, -+ maxflows); -+ -+#ifdef CONFIG_NF_NAT_NEEDED -+ seq_printf(seq, "Natevents %s, count start %lu, stop %lu.\n", natevents? "enabled" : "disabled", -+ nat_events_start, nat_events_stop); -+#endif -+ -+ mutex_lock(&sock_lock); - list_for_each_entry(usock, &usock_list, list) { -- struct sock *sk = usock->sock->sk; -- -- seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n", -- snum, -- usock->ipaddr >> 24, -- (usock->ipaddr >> 16) & 255, -- (usock->ipaddr >> 8) & 255, -- usock->ipaddr & 255, -- usock->port, -- sk->sk_sndbuf, -- atomic_read(&sk->sk_wmem_alloc), -- atomic_read(&usock->wmem_peak), -- atomic_read(&usock->err_full), -- atomic_read(&usock->err_other)); -+ seq_printf(seq, "sock%d: %u.%u.%u.%u:%u", -+ snum, -+ HIPQUAD(usock->ipaddr), -+ usock->port); -+ if (usock->sock) { -+ struct sock *sk = usock->sock->sk; -+ -+ seq_printf(seq, ", sndbuf %u, filled %u, peak %u;" -+ " err: sndbuf reached %u, connect %u, other %u\n", -+ sk->sk_sndbuf, -+ atomic_read(&sk->sk_wmem_alloc), -+ atomic_read(&usock->wmem_peak), -+ atomic_read(&usock->err_full), -+ atomic_read(&usock->err_connect), -+ atomic_read(&usock->err_other)); -+ } else -+ seq_printf(seq, " unconnected (%u attempts).\n", -+ atomic_read(&usock->err_connect)); - snum++; - } -- read_unlock(&sock_lock); -+ mutex_unlock(&sock_lock); - - read_lock_bh(&aggr_lock); - snum = 0; - list_for_each_entry(aggr_n, &aggr_n_list, list) { -- seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n", -- snum, -- HIPQUAD(aggr_n->addr), -- mask2bits(aggr_n->mask), -- mask2bits(aggr_n->aggr_mask)); -+ seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d (usage %u)\n", -+ snum, -+ HIPQUAD(aggr_n->addr), -+ mask2bits(aggr_n->mask), -+ mask2bits(aggr_n->aggr_mask), -+ atomic_read(&aggr_n->usage)); - snum++; - } - snum = 0; - list_for_each_entry(aggr_p, &aggr_p_list, list) { -- seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n", -- snum, -- aggr_p->port1, -- aggr_p->port2, -- aggr_p->aggr_port); -+ seq_printf(seq, "aggr#%d port: ports %u-%u replace %u (usage %u)\n", -+ snum, -+ aggr_p->port1, -+ aggr_p->port2, -+ aggr_p->aggr_port, -+ atomic_read(&aggr_p->usage)); - snum++; - } - read_unlock_bh(&aggr_lock); -@@ -367,8 +488,13 @@ static struct file_operations nf_seq_fops = { - #define BEFORE2632(x,y) - #endif - -+/* PAX need to know that we are allowed to write */ -+#ifndef CONSTIFY_PLUGIN -+#define ctl_table_no_const ctl_table -+#endif -+ - /* sysctl /proc/sys/net/netflow */ --static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) -+static int hsize_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,) - void __user *buffer, size_t *lenp, loff_t *fpos) - { - void *orig = ctl->data; -@@ -386,20 +512,21 @@ static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp - return ret; - } - --static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) -+static int sndbuf_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,) - void __user *buffer, size_t *lenp, loff_t *fpos) - { - int ret; - struct ipt_netflow_sock *usock; -- -- read_lock(&sock_lock); -+ -+ mutex_lock(&sock_lock); - if (list_empty(&usock_list)) { -- read_unlock(&sock_lock); -+ mutex_unlock(&sock_lock); - return -ENOENT; - } - usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list); -- sndbuf = usock->sock->sk->sk_sndbuf; -- read_unlock(&sock_lock); -+ if (usock->sock) -+ sndbuf = usock->sock->sk->sk_sndbuf; -+ mutex_unlock(&sock_lock); - - ctl->data = &sndbuf; - ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); -@@ -407,13 +534,14 @@ static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *fil - return ret; - if (sndbuf < SOCK_MIN_SNDBUF) - sndbuf = SOCK_MIN_SNDBUF; -- stop_scan_worker(); -- write_lock(&sock_lock); -+ pause_scan_worker(); -+ mutex_lock(&sock_lock); - list_for_each_entry(usock, &usock_list, list) { -- usock->sock->sk->sk_sndbuf = sndbuf; -+ if (usock->sock) -+ usock->sock->sk->sk_sndbuf = sndbuf; - } -- write_unlock(&sock_lock); -- start_scan_worker(); -+ mutex_unlock(&sock_lock); -+ cont_scan_worker(); - return ret; - } - -@@ -424,10 +552,10 @@ static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file - - ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); - if (ret >= 0 && write) { -- stop_scan_worker(); -+ pause_scan_worker(); - destination_removeall(); - add_destinations(destination_buf); -- start_scan_worker(); -+ cont_scan_worker(); - } - return ret; - } -@@ -446,13 +574,12 @@ static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file - return ret; - } - --static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) -+static int flush_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,) - void __user *buffer, size_t *lenp, loff_t *fpos) - { - int ret; -- int val; -+ int val = 0; - -- val = 0; - ctl->data = &val; - ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); - -@@ -461,14 +588,67 @@ static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp - - if (val > 0) { - printk(KERN_INFO "ipt_NETFLOW: forced flush\n"); -- stop_scan_worker(); -- netflow_scan_and_export(1); -- start_scan_worker(); -+ pause_scan_worker(); -+ netflow_scan_and_export(AND_FLUSH); -+ cont_scan_worker(); -+ } -+ -+ return ret; -+} -+ -+static int protocol_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) -+ void __user *buffer, size_t *lenp, loff_t *fpos) -+{ -+ int ret; -+ int ver = protocol; -+ -+ ctl->data = &ver; -+ ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); -+ -+ if (!write) -+ return ret; -+ -+ switch (ver) { -+ case 5: -+ case 9: -+ case 10: -+ printk(KERN_INFO "ipt_NETFLOW: forced flush (protocol version change)\n"); -+ pause_scan_worker(); -+ netflow_scan_and_export(AND_FLUSH); -+ netflow_switch_version(ver); -+ cont_scan_worker(); -+ break; -+ default: -+ return -EPERM; - } - - return ret; - } - -+#ifdef CONFIG_NF_NAT_NEEDED -+static void register_ct_events(void); -+static void unregister_ct_events(void); -+static int natevents_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,) -+ void __user *buffer, size_t *lenp, loff_t *fpos) -+{ -+ int ret; -+ int val = natevents; -+ -+ ctl->data = &val; -+ ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos); -+ -+ if (!write) -+ return ret; -+ -+ if (natevents && !val) -+ unregister_ct_events(); -+ else if (!natevents && val) -+ register_ct_events(); -+ -+ return ret; -+} -+#endif -+ - static struct ctl_table_header *netflow_sysctl_header; - - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -@@ -547,6 +727,38 @@ static struct ctl_table netflow_sysctl_table[] = { - .maxlen = sizeof(int), - .proc_handler = &flush_procctl, - }, -+ { -+ _CTL_NAME(10) -+ .procname = "protocol", -+ .mode = 0644, -+ .maxlen = sizeof(int), -+ .proc_handler = &protocol_procctl, -+ }, -+ { -+ _CTL_NAME(11) -+ .procname = "refresh-rate", -+ .mode = 0644, -+ .data = &refresh_rate, -+ .maxlen = sizeof(int), -+ .proc_handler = &proc_dointvec, -+ }, -+ { -+ _CTL_NAME(12) -+ .procname = "timeout-rate", -+ .mode = 0644, -+ .data = &timeout_rate, -+ .maxlen = sizeof(int), -+ .proc_handler = &proc_dointvec, -+ }, -+#ifdef CONFIG_NF_NAT_NEEDED -+ { -+ _CTL_NAME(13) -+ .procname = "natevents", -+ .mode = 0644, -+ .maxlen = sizeof(int), -+ .proc_handler = &natevents_procctl, -+ }, -+#endif - { } - }; - -@@ -588,18 +800,69 @@ static struct ctl_path netflow_sysctl_path[] = { - static void sk_error_report(struct sock *sk) - { - /* clear connection refused errors if any */ -- write_lock_bh(&sk->sk_callback_lock); - if (debug > 1) -- printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err); -+ printk(KERN_INFO "ipt_NETFLOW: socket error <%d>\n", sk->sk_err); - sk->sk_err = 0; - NETFLOW_STAT_INC(sock_errors); -- write_unlock_bh(&sk->sk_callback_lock); - return; - } - -+static struct socket *_usock_alloc(const __be32 ipaddr, const unsigned short port) -+{ -+ struct sockaddr_in sin; -+ struct socket *sock; -+ int error; -+ -+ if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { -+ printk(KERN_ERR "ipt_NETFLOW: sock_create_kern error %d\n", -error); -+ return NULL; -+ } -+ sock->sk->sk_allocation = GFP_ATOMIC; -+ sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */ -+ sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */ -+ if (sndbuf) -+ sock->sk->sk_sndbuf = sndbuf; -+ else -+ sndbuf = sock->sk->sk_sndbuf; -+ memset(&sin, 0, sizeof(sin)); -+ sin.sin_family = AF_INET; -+ sin.sin_addr.s_addr = htonl(ipaddr); -+ sin.sin_port = htons(port); -+ if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin, -+ sizeof(sin), 0)) < 0) { -+ printk(KERN_ERR "ipt_NETFLOW: error connecting UDP socket %d," -+ " don't worry, will try reconnect later.\n", -error); -+ /* ENETUNREACH when no interfaces */ -+ sock_release(sock); -+ return NULL; -+ } -+ return sock; -+} -+ -+static void usock_connect(struct ipt_netflow_sock *usock, const int sendmsg) -+{ -+ usock->sock = _usock_alloc(usock->ipaddr, usock->port); -+ if (usock->sock) { -+ if (sendmsg || debug) -+ printk(KERN_INFO "ipt_NETFLOW: connected %u.%u.%u.%u:%u\n", -+ HIPQUAD(usock->ipaddr), -+ usock->port); -+ } else { -+ atomic_inc(&usock->err_connect); -+ if (debug) -+ printk(KERN_INFO "ipt_NETFLOW: connect to %u.%u.%u.%u:%u failed%s.\n", -+ HIPQUAD(usock->ipaddr), -+ usock->port, -+ (sendmsg)? " (pdu lost)" : ""); -+ } -+ atomic_set(&usock->wmem_peak, 0); -+ atomic_set(&usock->err_full, 0); -+ atomic_set(&usock->err_other, 0); -+} -+ - // return numbers of sends succeded, 0 if none - /* only called in scan worker path */ --static int netflow_send_pdu(void *buffer, int len) -+static void netflow_sendmsg(void *buffer, const int len) - { - struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL }; - struct kvec iov = { buffer, len }; -@@ -607,9 +870,16 @@ static int netflow_send_pdu(void *buffer, int len) - int snum = 0; - struct ipt_netflow_sock *usock; - -+ mutex_lock(&sock_lock); - list_for_each_entry(usock, &usock_list, list) { -+ if (!usock->sock) -+ usock_connect(usock, 1); -+ if (!usock->sock) { -+ NETFLOW_STAT_INC_ATOMIC(send_failed); -+ continue; -+ } - if (debug) -- printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n", -+ printk(KERN_INFO "netflow_sendmsg: sendmsg(%d, %d) [%u %u]\n", - snum, - len, - atomic_read(&usock->sock->sk->sk_wmem_alloc), -@@ -624,7 +894,7 @@ static int netflow_send_pdu(void *buffer, int len) - suggestion = ": increase sndbuf!"; - } else - atomic_inc(&usock->err_other); -- printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n", -+ printk(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: data loss %llu pkt, %llu bytes%s\n", - snum, ret, pdu_packets, pdu_traf, suggestion); - } else { - unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc); -@@ -636,98 +906,67 @@ static int netflow_send_pdu(void *buffer, int len) - } - snum++; - } -- return retok; -+ mutex_unlock(&sock_lock); -+ if (retok == 0) { -+ /* not least one send succeded, account stat for dropped packets */ -+ NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets); -+ NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf); -+ } - } - --static void usock_free(struct ipt_netflow_sock *usock) -+static void usock_close_free(struct ipt_netflow_sock *usock) - { -- printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n", -+ printk(KERN_INFO "ipt_NETFLOW: removed destination %u.%u.%u.%u:%u\n", - HIPQUAD(usock->ipaddr), -- usock->port, -- usock->sock); -+ usock->port); - if (usock->sock) - sock_release(usock->sock); - usock->sock = NULL; -- vfree(usock); -+ vfree(usock); - } - - static void destination_removeall(void) - { -- write_lock(&sock_lock); -+ mutex_lock(&sock_lock); - while (!list_empty(&usock_list)) { - struct ipt_netflow_sock *usock; - - usock = list_entry(usock_list.next, struct ipt_netflow_sock, list); - list_del(&usock->list); -- write_unlock(&sock_lock); -- usock_free(usock); -- write_lock(&sock_lock); -+ mutex_unlock(&sock_lock); -+ usock_close_free(usock); -+ mutex_lock(&sock_lock); - } -- write_unlock(&sock_lock); -+ mutex_unlock(&sock_lock); - } - - static void add_usock(struct ipt_netflow_sock *usock) - { - struct ipt_netflow_sock *sk; - -- /* don't need empty sockets */ -- if (!usock->sock) { -- usock_free(usock); -- return; -- } -- -- write_lock(&sock_lock); -+ mutex_lock(&sock_lock); - /* don't need duplicated sockets */ - list_for_each_entry(sk, &usock_list, list) { - if (sk->ipaddr == usock->ipaddr && - sk->port == usock->port) { -- write_unlock(&sock_lock); -- usock_free(usock); -+ mutex_unlock(&sock_lock); -+ usock_close_free(usock); - return; - } - } - list_add_tail(&usock->list, &usock_list); -- printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n", -+ printk(KERN_INFO "ipt_NETFLOW: added destination %u.%u.%u.%u:%u%s\n", - HIPQUAD(usock->ipaddr), -- usock->port); -- write_unlock(&sock_lock); --} -- --static struct socket *usock_alloc(__be32 ipaddr, unsigned short port) --{ -- struct sockaddr_in sin; -- struct socket *sock; -- int error; -- -- if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) { -- printk(KERN_ERR "netflow: sock_create_kern error %d\n", error); -- return NULL; -- } -- sock->sk->sk_allocation = GFP_ATOMIC; -- sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */ -- sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */ -- if (sndbuf) -- sock->sk->sk_sndbuf = sndbuf; -- else -- sndbuf = sock->sk->sk_sndbuf; -- memset(&sin, 0, sizeof(sin)); -- sin.sin_family = AF_INET; -- sin.sin_addr.s_addr = htonl(ipaddr); -- sin.sin_port = htons(port); -- if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin, -- sizeof(sin), 0)) < 0) { -- printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error); -- sock_release(sock); -- return NULL; -- } -- return sock; -+ usock->port, -+ (!usock->sock)? " (unconnected)" : ""); -+ mutex_unlock(&sock_lock); - } - - #define SEPARATORS " ,;\t\n" - static int add_destinations(char *ptr) - { - while (ptr) { -- unsigned char ip[4]; -+ unsigned char ip[4]; - unsigned short port; - - ptr += strspn(ptr, SEPARATORS); -@@ -737,17 +976,15 @@ static int add_destinations(char *ptr) - struct ipt_netflow_sock *usock; - - if (!(usock = vmalloc(sizeof(*usock)))) { -- printk(KERN_ERR "netflow: can't vmalloc socket\n"); -+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc socket\n"); - return -ENOMEM; - } - - memset(usock, 0, sizeof(*usock)); -+ atomic_set(&usock->err_connect, 0); - usock->ipaddr = ntohl(*(__be32 *)ip); - usock->port = port; -- usock->sock = usock_alloc(usock->ipaddr, port); -- atomic_set(&usock->wmem_peak, 0); -- atomic_set(&usock->err_full, 0); -- atomic_set(&usock->err_other, 0); -+ usock_connect(usock, 0); - add_usock(usock); - } else - break; -@@ -781,7 +1018,7 @@ static int add_aggregation(char *ptr) - LIST_HEAD(old_aggr_list); - - while (ptr && *ptr) { -- unsigned char ip[4]; -+ unsigned char ip[4]; - unsigned int mask; - unsigned int port1, port2; - unsigned int aggr_to; -@@ -792,16 +1029,16 @@ static int add_aggregation(char *ptr) - ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) { - - if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) { -- printk(KERN_ERR "netflow: can't vmalloc aggr\n"); -+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n"); - return -ENOMEM; - } - memset(aggr_n, 0, sizeof(*aggr_n)); - -- aggr_n->addr = ntohl(*(__be32 *)ip); - aggr_n->mask = bits2mask(mask); -+ aggr_n->addr = ntohl(*(__be32 *)ip) & aggr_n->mask; - aggr_n->aggr_mask = bits2mask(aggr_to); - aggr_n->prefix = mask; -- printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n", -+ printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u.%u.%u.%u/%u=%u]\n", - HIPQUAD(aggr_n->addr), mask, aggr_to); - list_add_tail(&aggr_n->list, &new_aggr_n_list); - -@@ -809,7 +1046,7 @@ static int add_aggregation(char *ptr) - sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) { - - if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) { -- printk(KERN_ERR "netflow: can't vmalloc aggr\n"); -+ printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n"); - return -ENOMEM; - } - memset(aggr_p, 0, sizeof(*aggr_p)); -@@ -817,11 +1054,11 @@ static int add_aggregation(char *ptr) - aggr_p->port1 = port1; - aggr_p->port2 = port2; - aggr_p->aggr_port = aggr_to; -- printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n", -+ printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u-%u=%u]\n", - port1, port2, aggr_to); - list_add_tail(&aggr_p->list, &new_aggr_p_list); - } else { -- printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr); -+ printk(KERN_ERR "ipt_NETFLOW: bad aggregation rule: %s (ignoring)\n", ptr); - break; - } - -@@ -846,17 +1083,23 @@ static int add_aggregation(char *ptr) - - static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple) - { -- /* tuple is rounded to u32s */ -- return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size; -+ return murmur3(tuple, sizeof(struct ipt_netflow_tuple), ipt_netflow_hash_rnd) % ipt_netflow_hash_size; - } - - static struct ipt_netflow * --ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash) -+ipt_netflow_find(const struct ipt_netflow_tuple *tuple, const unsigned int hash) - { - struct ipt_netflow *nf; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) -+#define compat_hlist_for_each_entry hlist_for_each_entry -+#define compat_hlist_for_each_entry_safe hlist_for_each_entry_safe - struct hlist_node *pos; -+#else /* since 3.9.0 */ -+#define compat_hlist_for_each_entry(a,pos,c,d) hlist_for_each_entry(a,c,d) -+#define compat_hlist_for_each_entry_safe(a,pos,c,d,e) hlist_for_each_entry_safe(a,c,d,e) -+#endif - -- hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) { -+ compat_hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) { - if (ipt_netflow_tuple_equal(tuple, &nf->tuple) && - nf->nr_bytes < FLOW_FULL_WATERMARK) { - NETFLOW_STAT_INC(found); -@@ -868,7 +1111,7 @@ ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash) - return NULL; - } - --static struct hlist_head *alloc_hashtable(int size) -+static struct hlist_head *alloc_hashtable(const int size) - { - struct hlist_head *hash; - -@@ -879,19 +1122,18 @@ static struct hlist_head *alloc_hashtable(int size) - for (i = 0; i < size; i++) - INIT_HLIST_HEAD(&hash[i]); - } else -- printk(KERN_ERR "netflow: unable to vmalloc hash table.\n"); -+ printk(KERN_ERR "ipt_NETFLOW: unable to vmalloc hash table.\n"); - - return hash; - } - --static int set_hashsize(int new_size) -+static int set_hashsize(const int new_size) - { - struct hlist_head *new_hash, *old_hash; -- unsigned int hash; - struct ipt_netflow *nf; - int rnd; - -- printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n", -+ printk(KERN_INFO "ipt_NETFLOW: allocating new hash table %u -> %u buckets\n", - ipt_netflow_hash_size, new_size); - new_hash = alloc_hashtable(new_size); - if (!new_hash) -@@ -900,19 +1142,24 @@ static int set_hashsize(int new_size) - get_random_bytes(&rnd, 4); - - /* rehash */ -- spin_lock_bh(&ipt_netflow_lock); -+ write_lock_bh(&htable_rwlock); - old_hash = ipt_netflow_hash; - ipt_netflow_hash = new_hash; - ipt_netflow_hash_size = new_size; - ipt_netflow_hash_rnd = rnd; - /* hash_netflow() is dependent on ipt_netflow_hash_* values */ -+ spin_lock(&hlist_lock); - list_for_each_entry(nf, &ipt_netflow_list, list) { -+ unsigned int hash; -+ - hash = hash_netflow(&nf->tuple); - /* hlist_add_head overwrites hlist pointers for this node - * so it's good */ - hlist_add_head(&nf->hlist, &new_hash[hash]); -+ nf->lock = &htable_locks[hash & LOCK_COUNT_MASK]; - } -- spin_unlock_bh(&ipt_netflow_lock); -+ spin_unlock(&hlist_lock); -+ write_unlock_bh(&htable_rwlock); - - vfree(old_hash); - -@@ -920,14 +1167,14 @@ static int set_hashsize(int new_size) - } - - static struct ipt_netflow * --ipt_netflow_alloc(struct ipt_netflow_tuple *tuple) -+ipt_netflow_alloc(const struct ipt_netflow_tuple *tuple) - { - struct ipt_netflow *nf; - long count; - - nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC); - if (!nf) { -- printk(KERN_ERR "Can't allocate netflow.\n"); -+ printk(KERN_ERR "ipt_NETFLOW: Can't allocate flow.\n"); - return NULL; - } - -@@ -945,13 +1192,15 @@ ipt_netflow_alloc(struct ipt_netflow_tuple *tuple) - - static void ipt_netflow_free(struct ipt_netflow *nf) - { -+ if (IS_DUMMY_FLOW(nf)) -+ return; - atomic_dec(&ipt_netflow_count); - kmem_cache_free(ipt_netflow_cachep, nf); - } - - static struct ipt_netflow * --init_netflow(struct ipt_netflow_tuple *tuple, -- struct sk_buff *skb, unsigned int hash) -+init_netflow(const struct ipt_netflow_tuple *tuple, -+ const struct sk_buff *skb, const unsigned int hash) - { - struct ipt_netflow *nf; - -@@ -959,93 +1208,774 @@ init_netflow(struct ipt_netflow_tuple *tuple, - if (!nf) - return NULL; - -+ nf->lock = &htable_locks[hash & LOCK_COUNT_MASK]; - hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]); -+ spin_lock(&hlist_lock); - list_add(&nf->list, &ipt_netflow_list); -+ spin_unlock(&hlist_lock); - - return nf; - } - - /* cook pdu, send, and clean */ - /* only called in scan worker path */ --static void netflow_export_pdu(void) -+static void netflow_export_pdu_v5(void) - { - struct timeval tv; - int pdusize; - -- if (!pdu.nr_records) -+ if (!pdu_data_records) - return; - - if (debug > 1) -- printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records); -- do_gettimeofday(&tv); -- -- pdu.version = htons(5); -- pdu.ts_uptime = htonl(jiffies_to_msecs(jiffies)); -- pdu.ts_usecs = htonl(tv.tv_sec); -- pdu.ts_unsecs = htonl(tv.tv_usec); -- //pdu.eng_type = 0; -- //pdu.eng_id = 0; -- //pdu.padding = 0; -+ printk(KERN_INFO "netflow_export_pdu_v5 with %d records\n", pdu_data_records); - -- pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records; -- -- /* especially fix nr_records before export */ -- pdu.nr_records = htons(pdu.nr_records); -+ pdu.v5.version = htons(5); -+ pdu.v5.nr_records = htons(pdu_data_records); -+ pdu.v5.ts_uptime = htonl(jiffies_to_msecs(jiffies)); -+ do_gettimeofday(&tv); -+ pdu.v5.ts_usecs = htonl(tv.tv_sec); -+ pdu.v5.ts_unsecs = htonl(tv.tv_usec); -+ pdu.v5.seq = htonl(pdu_seq); -+ //pdu.v5.eng_type = 0; -+ pdu.v5.eng_id = engine_id; -+ //pdu.v5.padding = 0; - -- if (netflow_send_pdu(&pdu, pdusize) == 0) { -- /* not least one send succeded, account stat for dropped packets */ -- NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets); -- NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf); -- } -+ pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu_data_records; - -- pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records)); -+ netflow_sendmsg(&pdu.v5, pdusize); - -- pdu.nr_records = 0; - pdu_packets = 0; -- pdu_traf = 0; -+ pdu_traf = 0; -+ -+ pdu_seq += pdu_data_records; -+ pdu_count++; -+ pdu_data_records = 0; - } - - /* only called in scan worker path */ --static void netflow_export_flow(struct ipt_netflow *nf) -+static void netflow_export_flow_v5(struct ipt_netflow *nf) - { - struct netflow5_record *rec; - -- if (debug > 2) -- printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records); -+ if (unlikely(debug > 2)) -+ printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records); - - pdu_packets += nf->nr_packets; - pdu_traf += nf->nr_bytes; - pdu_ts_mod = jiffies; -- rec = &pdu.flow[pdu.nr_records++]; -+ rec = &pdu.v5.flow[pdu_data_records++]; - - /* make V5 flow record */ -- rec->s_addr = nf->tuple.s_addr; -- rec->d_addr = nf->tuple.d_addr; -- //rec->nexthop = 0; -+ rec->s_addr = nf->tuple.src.ip; -+ rec->d_addr = nf->tuple.dst.ip; -+ rec->nexthop = nf->nh.ip; - rec->i_ifc = htons(nf->tuple.i_ifc); - rec->o_ifc = htons(nf->o_ifc); - rec->nr_packets = htonl(nf->nr_packets); - rec->nr_octets = htonl(nf->nr_bytes); -- rec->ts_first = htonl(jiffies_to_msecs(nf->ts_first)); -- rec->ts_last = htonl(jiffies_to_msecs(nf->ts_last)); -+ rec->first_ms = htonl(jiffies_to_msecs(nf->ts_first)); -+ rec->last_ms = htonl(jiffies_to_msecs(nf->ts_last)); - rec->s_port = nf->tuple.s_port; - rec->d_port = nf->tuple.d_port; -- //rec->reserved = 0; -+ //rec->reserved = 0; /* pdu is always zeroized for v5 in netflow_switch_version */ - rec->tcp_flags = nf->tcp_flags; - rec->protocol = nf->tuple.protocol; - rec->tos = nf->tuple.tos; -- //rec->s_as = 0; -- //rec->d_as = 0; -+#ifdef CONFIG_NF_NAT_NEEDED -+ rec->s_as = nf->s_as; -+ rec->d_as = nf->d_as; -+#endif - rec->s_mask = nf->s_mask; - rec->d_mask = nf->d_mask; - //rec->padding = 0; - ipt_netflow_free(nf); - -- if (pdu.nr_records == NETFLOW5_RECORDS_MAX) -+ if (pdu_data_records == NETFLOW5_RECORDS_MAX) -+ netflow_export_pdu_v5(); -+} -+ -+/* pdu is initially blank, export current pdu, and prepare next for filling. */ -+static void netflow_export_pdu_v9(void) -+{ -+ struct timeval tv; -+ int pdusize; -+ -+ if (pdu_data_used <= pdu.v9.data) -+ return; -+ -+ if (debug > 1) -+ printk(KERN_INFO "netflow_export_pdu_v9 with %d records\n", -+ pdu_data_records + pdu_tpl_records); -+ -+ pdu.v9.version = htons(9); -+ pdu.v9.nr_records = htons(pdu_data_records + pdu_tpl_records); -+ pdu.v9.sys_uptime_ms = htonl(jiffies_to_msecs(jiffies)); -+ do_gettimeofday(&tv); -+ pdu.v9.export_time_s = htonl(tv.tv_sec); -+ pdu.v9.seq = htonl(pdu_seq); -+ pdu.v9.source_id = engine_id; -+ -+ pdusize = pdu_data_used - (unsigned char *)&pdu.v9; -+ -+ netflow_sendmsg(&pdu.v9, pdusize); -+ -+ pdu_packets = 0; -+ pdu_traf = 0; -+ -+ pdu_seq++; -+ pdu_count++; -+ pdu_data_records = pdu_tpl_records = 0; -+ pdu_data_used = pdu.v9.data; -+ pdu_flowset = NULL; -+} -+ -+static void netflow_export_pdu_ipfix(void) -+{ -+ struct timeval tv; -+ int pdusize; -+ -+ if (pdu_data_used <= pdu.ipfix.data) -+ return; -+ -+ if (debug > 1) -+ printk(KERN_INFO "netflow_export_pduX with %d records\n", -+ pdu_data_records); -+ -+ pdu.ipfix.version = htons(10); -+ do_gettimeofday(&tv); -+ pdu.ipfix.export_time_s = htonl(tv.tv_sec); -+ pdu.ipfix.seq = htonl(pdu_seq); -+ pdu.ipfix.odomain_id = engine_id; -+ pdusize = pdu_data_used - (unsigned char *)&pdu; -+ pdu.ipfix.length = htons(pdusize); -+ -+ netflow_sendmsg(&pdu.ipfix, pdusize); -+ -+ pdu_packets = 0; -+ pdu_traf = 0; -+ -+ pdu_seq += pdu_data_records; -+ pdu_count++; -+ pdu_data_records = pdu_tpl_records = 0; -+ pdu_data_used = pdu.ipfix.data; -+ pdu_flowset = NULL; -+} -+ -+static inline int pdu_have_space(const size_t size) -+{ -+ return ((pdu_data_used + size) <= pdu_high_wm); -+} -+ -+static inline unsigned char *pdu_grab_space(const size_t size) -+{ -+ unsigned char *ptr = pdu_data_used; -+ pdu_data_used += size; -+ return ptr; -+} -+ -+// allocate data space in pdu, or fail if pdu is reallocated. -+static inline unsigned char *pdu_alloc_fail(const size_t size) -+{ -+ if (!pdu_have_space(size)) { - netflow_export_pdu(); -+ return NULL; -+ } -+ return pdu_grab_space(size); -+} -+ -+/* doesn't fail, but can provide empty pdu. */ -+static unsigned char *pdu_alloc(const size_t size) -+{ -+ return pdu_alloc_fail(size) ?: pdu_grab_space(size); -+} -+ -+/* global table of sizes of template field types */ -+static u_int8_t tpl_element_sizes[] = { -+ [IN_BYTES] = 4, -+ [IN_PKTS] = 4, -+ [PROTOCOL] = 1, -+ [TOS] = 1, -+ [TCP_FLAGS] = 1, -+ [L4_SRC_PORT] = 2, -+ [IPV4_SRC_ADDR] = 4, -+ [SRC_MASK] = 1, -+ [INPUT_SNMP] = 2, -+ [L4_DST_PORT] = 2, -+ [IPV4_DST_ADDR] = 4, -+ [DST_MASK] = 1, -+ [OUTPUT_SNMP] = 2, -+ [IPV4_NEXT_HOP] = 4, -+ //[SRC_AS] = 2, -+ //[DST_AS] = 2, -+ //[BGP_IPV4_NEXT_HOP] = 4, -+ //[MUL_DST_PKTS] = 4, -+ //[MUL_DST_BYTES] = 4, -+ [LAST_SWITCHED] = 4, -+ [FIRST_SWITCHED]= 4, -+ [IPV6_SRC_ADDR] = 16, -+ [IPV6_DST_ADDR] = 16, -+ [IPV6_FLOW_LABEL] = 3, -+ [ICMP_TYPE] = 2, -+ [MUL_IGMP_TYPE] = 1, -+ //[TOTAL_BYTES_EXP] = 4, -+ //[TOTAL_PKTS_EXP] = 4, -+ //[TOTAL_FLOWS_EXP] = 4, -+ [IPV6_NEXT_HOP] = 16, -+ [IPV6_OPTION_HEADERS] = 2, -+ [commonPropertiesId] = 4, -+ [ipv4Options] = 4, -+ [tcpOptions] = 4, -+ [postNATSourceIPv4Address] = 4, -+ [postNATDestinationIPv4Address] = 4, -+ [postNAPTSourceTransportPort] = 2, -+ [postNAPTDestinationTransportPort] = 2, -+ [natEvent] = 1, -+ [postNATSourceIPv6Address] = 16, -+ [postNATDestinationIPv6Address] = 16, -+ [IPSecSPI] = 4, -+ [observationTimeMilliseconds] = 8, -+ [observationTimeMicroseconds] = 8, -+ [observationTimeNanoseconds] = 8, -+}; -+ -+#define TEMPLATES_HASH_BSIZE 8 -+#define TEMPLATES_HASH_SIZE (1<<TEMPLATES_HASH_BSIZE) -+static struct hlist_head templates_hash[TEMPLATES_HASH_SIZE]; -+ -+struct base_template { -+ int length; /* number of elements in template */ -+ u_int16_t types[]; /* {type, size} pairs */ -+}; -+ -+/* base templates */ -+#define BTPL_BASE 0x00000001 /* base stat */ -+#define BTPL_IP4 0x00000002 /* IPv4 */ -+#define BTPL_MASK4 0x00000004 /* Aggregated */ -+#define BTPL_PORTS 0x00000008 /* UDP&TCP */ -+#define BTPL_IP6 0x00000010 /* IPv6 */ -+#define BTPL_ICMP 0x00000020 /* ICMP */ -+#define BTPL_IGMP 0x00000040 /* IGMP */ -+#define BTPL_IPSEC 0x00000080 /* AH&ESP */ -+#define BTPL_NAT4 0x00000100 /* NAT IPv4 */ -+#define BTPL_MARK 0x00000400 /* connmark */ -+#define BTPL_LABEL6 0x00000800 /* IPv6 flow label */ -+#define BTPL_OPTIONS4 0x00001000 /* IPv4 Options */ -+#define BTPL_OPTIONS6 0x00002000 /* IPv6 Options */ -+#define BTPL_TCPOPTIONS 0x00004000 /* TCP Options */ -+#define BTPL_MAX 32 -+ -+static struct base_template template_base = { -+ .types = { -+ INPUT_SNMP, -+ OUTPUT_SNMP, -+ IN_PKTS, -+ IN_BYTES, -+ FIRST_SWITCHED, -+ LAST_SWITCHED, -+ PROTOCOL, -+ TOS, -+ 0 -+ } -+}; -+static struct base_template template_ipv4 = { -+ .types = { -+ IPV4_SRC_ADDR, -+ IPV4_DST_ADDR, -+ IPV4_NEXT_HOP, -+ 0 -+ } -+}; -+static struct base_template template_options4 = { -+ .types = { ipv4Options, 0 } -+}; -+static struct base_template template_tcpoptions = { -+ .types = { tcpOptions, 0 } -+}; -+static struct base_template template_ipv6 = { -+ .types = { -+ IPV6_SRC_ADDR, -+ IPV6_DST_ADDR, -+ IPV6_NEXT_HOP, -+ 0 -+ } -+}; -+static struct base_template template_options6 = { -+ .types = { IPV6_OPTION_HEADERS, 0 } -+}; -+static struct base_template template_label6 = { -+ .types = { IPV6_FLOW_LABEL, 0 } -+}; -+static struct base_template template_ipv4_mask = { -+ .types = { -+ SRC_MASK, -+ DST_MASK, -+ 0 -+ } -+}; -+static struct base_template template_ports = { -+ .types = { -+ L4_SRC_PORT, -+ L4_DST_PORT, -+ TCP_FLAGS, -+ 0 -+ } -+}; -+static struct base_template template_icmp = { -+ .types = { ICMP_TYPE, 0 } -+}; -+static struct base_template template_igmp = { -+ .types = { MUL_IGMP_TYPE, 0 } -+}; -+static struct base_template template_ipsec = { -+ .types = { IPSecSPI, 0 } -+}; -+static struct base_template template_nat4 = { -+ .types = { -+ observationTimeMilliseconds, -+ IPV4_SRC_ADDR, -+ IPV4_DST_ADDR, -+ postNATSourceIPv4Address, -+ postNATDestinationIPv4Address, -+ L4_SRC_PORT, -+ L4_DST_PORT, -+ postNAPTSourceTransportPort, -+ postNAPTDestinationTransportPort, -+ PROTOCOL, -+ natEvent, -+ 0 -+ } -+}; -+static struct base_template template_mark = { -+ .types = { commonPropertiesId, 0 } -+}; -+ -+struct data_template { -+ struct hlist_node hlist; -+ int tpl_mask; -+ -+ int length; /* number of elements in template */ -+ int tpl_size; /* summary size of template with flowset header */ -+ int rec_size; /* summary size of all recods of template (w/o flowset header) */ -+ int template_id_n; /* assigned from template_ids, network order. */ -+ int exported_cnt; -+ unsigned long exported_ts; /* jiffies */ -+ u_int16_t fields[]; /* {type, size} pairs */ -+} __attribute__ ((packed)); -+ -+#define TPL_FIELD_NSIZE 4 /* one complete template field's network size */ -+ -+static void free_templates(void) -+{ -+ int i; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) -+ struct hlist_node *pos; -+#endif -+ struct hlist_node *tmp; -+ -+ for (i = 0; i < TEMPLATES_HASH_SIZE; i++) { -+ struct hlist_head *thead = &templates_hash[i]; -+ struct data_template *tpl; -+ -+ compat_hlist_for_each_entry_safe(tpl, pos, tmp, thead, hlist) -+ kfree(tpl); -+ INIT_HLIST_HEAD(thead); -+ } -+ tpl_count = 0; -+} -+ -+/* create combined template from mask */ -+static struct data_template *get_template(const int tmask) -+{ -+ struct base_template *tlist[BTPL_MAX]; -+ struct data_template *tpl; -+ int tnum; -+ int length; -+ int i, j, k; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) -+ struct hlist_node *pos; -+#endif -+ int hash = hash_long(tmask, TEMPLATES_HASH_BSIZE); -+ -+ compat_hlist_for_each_entry(tpl, pos, &templates_hash[hash], hlist) -+ if (tpl->tpl_mask == tmask) -+ return tpl; -+ -+ tnum = 0; -+ if (tmask & BTPL_IP4) { -+ tlist[tnum++] = &template_ipv4; -+ if (tmask & BTPL_OPTIONS4) -+ tlist[tnum++] = &template_options4; -+ if (tmask & BTPL_MASK4) -+ tlist[tnum++] = &template_ipv4_mask; -+ } else if (tmask & BTPL_IP6) { -+ tlist[tnum++] = &template_ipv6; -+ if (tmask & BTPL_LABEL6) -+ tlist[tnum++] = &template_label6; -+ if (tmask & BTPL_OPTIONS6) -+ tlist[tnum++] = &template_options6; -+ } else if (tmask & BTPL_NAT4) -+ tlist[tnum++] = &template_nat4; -+ if (tmask & BTPL_PORTS) -+ tlist[tnum++] = &template_ports; -+ if (tmask & BTPL_BASE) -+ tlist[tnum++] = &template_base; -+ if (tmask & BTPL_TCPOPTIONS) -+ tlist[tnum++] = &template_tcpoptions; -+ if (tmask & BTPL_ICMP) -+ tlist[tnum++] = &template_icmp; -+ if (tmask & BTPL_IGMP) -+ tlist[tnum++] = &template_igmp; -+ if (tmask & BTPL_IPSEC) -+ tlist[tnum++] = &template_ipsec; -+ if (tmask & BTPL_MARK) -+ tlist[tnum++] = &template_mark; -+ -+ /* calc memory size */ -+ length = 0; -+ for (i = 0; i < tnum; i++) { -+ if (!tlist[i]->length) { -+ for (k = 0; tlist[i]->types[k]; k++); -+ tlist[i]->length = k; -+ } -+ length += tlist[i]->length; -+ } -+ /* elements are pairs + one termiantor */ -+ tpl = kmalloc(sizeof(struct data_template) + (length * 2 + 1) * sizeof(u_int16_t), GFP_KERNEL); -+ if (!tpl) { -+ printk(KERN_ERR "ipt_NETFLOW: unable to kmalloc template.\n"); -+ return NULL; -+ } -+ tpl->tpl_mask = tmask; -+ tpl->length = length; -+ tpl->tpl_size = sizeof(struct flowset_template); -+ tpl->rec_size = 0; -+ tpl->template_id_n = htons(template_ids++); -+ tpl->exported_cnt = 0; -+ tpl->exported_ts = 0; -+ -+ j = 0; -+ for (i = 0; i < tnum; i++) { -+ struct base_template *btpl = tlist[i]; -+ -+ for (k = 0; k < btpl->length; k++) { -+ int size; -+ int type = btpl->types[k]; -+ -+ tpl->fields[j++] = type; -+ size = tpl_element_sizes[type]; -+ tpl->fields[j++] = size; -+ tpl->rec_size += size; -+ } -+ tpl->tpl_size += btpl->length * TPL_FIELD_NSIZE; -+ } -+ tpl->fields[j++] = 0; -+ -+ hlist_add_head(&tpl->hlist, &templates_hash[hash]); -+ tpl_count++; -+ -+ return tpl; -+} -+ -+static void pdu_add_template(struct data_template *tpl) -+{ -+ int i; -+ unsigned char *ptr; -+ struct flowset_template *ntpl; -+ __be16 *sptr; -+ -+ ptr = pdu_alloc(tpl->tpl_size); -+ ntpl = (struct flowset_template *)ptr; -+ ntpl->flowset_id = protocol == 9? htons(FLOWSET_TEMPLATE) : htons(IPFIX_TEMPLATE); -+ ntpl->length = htons(tpl->tpl_size); -+ ntpl->template_id = tpl->template_id_n; -+ ntpl->field_count = htons(tpl->length); -+ ptr += sizeof(struct flowset_template); -+ sptr = (__be16 *)ptr; -+ for (i = 0; ; ) { -+ int type = tpl->fields[i++]; -+ if (!type) -+ break; -+ *sptr++ = htons(type); -+ *sptr++ = htons(tpl->fields[i++]); -+ } -+ -+ tpl->exported_cnt = pdu_count; -+ tpl->exported_ts = jiffies; -+ -+ pdu_flowset = NULL; -+ pdu_tpl_records++; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) -+static inline s64 portable_ktime_to_ms(const ktime_t kt) -+{ -+ struct timeval tv = ktime_to_timeval(kt); -+ return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC; -+} -+#define ktime_to_ms portable_ktime_to_ms -+#endif -+ -+/* encode one field */ -+typedef struct in6_addr in6_t; -+static inline void add_ipv4_field(__u8 *ptr, const int type, const struct ipt_netflow *nf) -+{ -+ switch (type) { -+ case IN_BYTES: *(__be32 *)ptr = htonl(nf->nr_bytes); break; -+ case IN_PKTS: *(__be32 *)ptr = htonl(nf->nr_packets); break; -+ case FIRST_SWITCHED: *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_first)); break; -+ case LAST_SWITCHED: *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_last)); break; -+ case IPV4_SRC_ADDR: *(__be32 *)ptr = nf->tuple.src.ip; break; -+ case IPV4_DST_ADDR: *(__be32 *)ptr = nf->tuple.dst.ip; break; -+ case IPV4_NEXT_HOP: *(__be32 *)ptr = nf->nh.ip; break; -+ case L4_SRC_PORT: *(__be16 *)ptr = nf->tuple.s_port; break; -+ case L4_DST_PORT: *(__be16 *)ptr = nf->tuple.d_port; break; -+ case INPUT_SNMP: *(__be16 *)ptr = htons(nf->tuple.i_ifc); break; -+ case OUTPUT_SNMP: *(__be16 *)ptr = htons(nf->o_ifc); break; -+ case PROTOCOL: *ptr = nf->tuple.protocol; break; -+ case TCP_FLAGS: *ptr = nf->tcp_flags; break; -+ case TOS: *ptr = nf->tuple.tos; break; -+ case IPV6_SRC_ADDR: *(in6_t *)ptr = nf->tuple.src.in6; break; -+ case IPV6_DST_ADDR: *(in6_t *)ptr = nf->tuple.dst.in6; break; -+ case IPV6_NEXT_HOP: *(in6_t *)ptr = nf->nh.in6; break; -+ case IPV6_FLOW_LABEL: *ptr++ = nf->flow_label >> 16; -+ *(__be16 *)ptr = nf->flow_label; -+ break; -+ case tcpOptions: *(__be32 *)ptr = htonl(nf->tcpoptions); break; -+ case ipv4Options: *(__be32 *)ptr = htonl(nf->options); break; -+ case IPV6_OPTION_HEADERS: *(__be16 *)ptr = htons(nf->options); break; -+#ifdef CONFIG_NF_CONNTRACK_MARK -+ case commonPropertiesId: -+ *(__be32 *)ptr = htonl(nf->mark); break; -+#endif -+ case SRC_MASK: *ptr = nf->s_mask; break; -+ case DST_MASK: *ptr = nf->d_mask; break; -+ case ICMP_TYPE: *(__be16 *)ptr = nf->tuple.d_port; break; -+ case MUL_IGMP_TYPE: *ptr = nf->tuple.d_port; break; -+#ifdef CONFIG_NF_NAT_NEEDED -+ case postNATSourceIPv4Address: *(__be32 *)ptr = nf->nat->post.s_addr; break; -+ case postNATDestinationIPv4Address: *(__be32 *)ptr = nf->nat->post.d_addr; break; -+ case postNAPTSourceTransportPort: *(__be16 *)ptr = nf->nat->post.s_port; break; -+ case postNAPTDestinationTransportPort: *(__be16 *)ptr = nf->nat->post.d_port; break; -+ case natEvent: *ptr = nf->nat->nat_event; break; -+#endif -+ case IPSecSPI: *(__u32 *)ptr = (nf->tuple.s_port << 16) | nf->tuple.d_port; break; -+ case observationTimeMilliseconds: -+ *(__be64 *)ptr = cpu_to_be64(ktime_to_ms(nf->ts_obs)); break; -+ case observationTimeMicroseconds: -+ *(__be64 *)ptr = cpu_to_be64(ktime_to_us(nf->ts_obs)); break; -+ case observationTimeNanoseconds: -+ *(__be64 *)ptr = cpu_to_be64(ktime_to_ns(nf->ts_obs)); break; -+ default: -+ memset(ptr, 0, tpl_element_sizes[type]); -+ } -+} -+ -+#define PAD_SIZE 4 /* rfc prescribes flowsets to be padded */ -+ -+/* cache timeout_rate in jiffies */ -+static inline unsigned long timeout_rate_j(void) -+{ -+ static unsigned int t_rate = 0; -+ static unsigned long t_rate_j = 0; -+ -+ if (unlikely(timeout_rate != t_rate)) { -+ struct timeval tv = { .tv_sec = timeout_rate * 60, .tv_usec = 0 }; -+ -+ t_rate = timeout_rate; -+ t_rate_j = timeval_to_jiffies(&tv); -+ } -+ return t_rate_j; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -+#define IPPROTO_UDPLITE 136 -+#endif -+ -+#ifndef time_is_before_jiffies -+#define time_is_before_jiffies(a) time_after(jiffies, a) -+#endif -+ -+static void netflow_export_flow_tpl(struct ipt_netflow *nf) -+{ -+ unsigned char *ptr; -+ int i; -+ struct data_template *tpl; -+ int tpl_mask = BTPL_BASE; -+ -+ if (unlikely(debug > 2)) -+ printk(KERN_INFO "adding flow to export (%d)\n", -+ pdu_data_records + pdu_tpl_records); -+ -+ if (likely(nf->tuple.l3proto == AF_INET)) { -+ tpl_mask |= BTPL_IP4; -+ if (unlikely(nf->options)) -+ tpl_mask |= BTPL_OPTIONS4; -+ } else { -+ tpl_mask |= BTPL_IP6; -+ if (unlikely(nf->options)) -+ tpl_mask |= BTPL_OPTIONS6; -+ if (unlikely(nf->flow_label)) -+ tpl_mask |= BTPL_LABEL6; -+ } -+ if (unlikely(nf->tcpoptions)) -+ tpl_mask |= BTPL_TCPOPTIONS; -+ if (unlikely(nf->s_mask || nf->d_mask)) -+ tpl_mask |= BTPL_MASK4; -+ if (likely(nf->tuple.protocol == IPPROTO_TCP || -+ nf->tuple.protocol == IPPROTO_UDP || -+ nf->tuple.protocol == IPPROTO_SCTP || -+ nf->tuple.protocol == IPPROTO_UDPLITE)) -+ tpl_mask |= BTPL_PORTS; -+ else if (nf->tuple.protocol == IPPROTO_ICMP) -+ tpl_mask |= BTPL_ICMP; -+ else if (nf->tuple.protocol == IPPROTO_IGMP) -+ tpl_mask |= BTPL_IGMP; -+#ifdef CONFIG_NF_CONNTRACK_MARK -+ if (nf->mark) -+ tpl_mask |= BTPL_MARK; -+#endif -+#ifdef CONFIG_NF_NAT_NEEDED -+ if (nf->nat) -+ tpl_mask = BTPL_NAT4; -+#endif -+ -+ tpl = get_template(tpl_mask); -+ if (unlikely(!tpl)) { -+ printk(KERN_INFO "ipt_NETFLOW: template allocation failed.\n"); -+ NETFLOW_STAT_INC(alloc_err); -+ NETFLOW_STAT_ADD_ATOMIC(pkt_drop, nf->nr_packets); -+ NETFLOW_STAT_ADD_ATOMIC(traf_drop, nf->nr_bytes); -+ ipt_netflow_free(nf); -+ return; -+ } -+ -+ if (unlikely(!pdu_flowset || -+ pdu_flowset->flowset_id != tpl->template_id_n || -+ !(ptr = pdu_alloc_fail(tpl->rec_size)))) { -+ -+ /* if there was previous data template we should pad it to 4 bytes */ -+ if (pdu_flowset) { -+ int padding = (PAD_SIZE - ntohs(pdu_flowset->length) % PAD_SIZE) % PAD_SIZE; -+ if (padding && (ptr = pdu_alloc_fail(padding))) { -+ pdu_flowset->length = htons(ntohs(pdu_flowset->length) + padding); -+ for (; padding; padding--) -+ *ptr++ = 0; -+ } -+ } -+ -+ if (!tpl->exported_ts || -+ pdu_count > (tpl->exported_cnt + refresh_rate) || -+ time_is_before_jiffies(tpl->exported_ts + timeout_rate_j())) { -+ pdu_add_template(tpl); -+ } -+ -+ ptr = pdu_alloc(sizeof(struct flowset_data) + tpl->rec_size); -+ pdu_flowset = (struct flowset_data *)ptr; -+ pdu_flowset->flowset_id = tpl->template_id_n; -+ pdu_flowset->length = htons(sizeof(struct flowset_data)); -+ ptr += sizeof(struct flowset_data); -+ } -+ -+ /* encode all fields */ -+ for (i = 0; ; ) { -+ int type = tpl->fields[i++]; -+ -+ if (!type) -+ break; -+ add_ipv4_field(ptr, type, nf); -+ ptr += tpl->fields[i++]; -+ } -+ -+ pdu_data_records++; -+ pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size); -+ -+ pdu_packets += nf->nr_packets; -+ pdu_traf += nf->nr_bytes; -+ -+ ipt_netflow_free(nf); -+ pdu_ts_mod = jiffies; -+} -+ -+static void netflow_switch_version(const int ver) -+{ -+ protocol = ver; -+ if (protocol == 5) { -+ memset(&pdu, 0, sizeof(pdu)); -+ netflow_export_flow = &netflow_export_flow_v5; -+ netflow_export_pdu = &netflow_export_pdu_v5; -+ } else if (protocol == 9) { -+ pdu_data_used = pdu.v9.data; -+ pdu_max_size = sizeof(pdu.v9); -+ pdu_high_wm = (unsigned char *)&pdu + pdu_max_size; -+ netflow_export_flow = &netflow_export_flow_tpl; -+ netflow_export_pdu = &netflow_export_pdu_v9; -+ } else { /* IPFIX */ -+ pdu_data_used = pdu.ipfix.data; -+ pdu_max_size = sizeof(pdu.ipfix); -+ pdu_high_wm = (unsigned char *)&pdu + pdu_max_size; -+ netflow_export_flow = &netflow_export_flow_tpl; -+ netflow_export_pdu = &netflow_export_pdu_ipfix; -+ } -+ if (protocol != 5) -+ free_templates(); -+ pdu_data_records = pdu_tpl_records = 0; -+ pdu_flowset = NULL; -+ printk(KERN_INFO "ipt_NETFLOW protocol version %d (%s) enabled.\n", -+ protocol, protocol == 10? "IPFIX" : "NetFlow"); -+} -+ -+#ifdef CONFIG_NF_NAT_NEEDED -+static void export_nat_event(struct nat_event *nel) -+{ -+ static struct ipt_netflow nf = { { NULL } }; -+ -+ nf.tuple.l3proto = AF_INET; -+ nf.tuple.protocol = nel->protocol; -+ nf.nat = nel; /* this is also flag of dummy flow */ -+ nf.tcp_flags = (nel->nat_event == NAT_DESTROY)? TCP_FIN_RST : TCP_SYN_ACK; -+ if (protocol >= 9) { -+ nf.ts_obs = nel->ts_ktime; -+ nf.tuple.src.ip = nel->pre.s_addr; -+ nf.tuple.dst.ip = nel->pre.d_addr; -+ nf.tuple.s_port = nel->pre.s_port; -+ nf.tuple.d_port = nel->pre.d_port; -+ netflow_export_flow(&nf); -+ } else { /* v5 */ -+ /* The weird v5 packet(s). -+ * src and dst will be same as in data flow from the FORWARD chain -+ * where src is pre-nat src ip and dst is post-nat dst ip. -+ * What we lacking here is external src ip for SNAT, or -+ * pre-nat dst ip for DNAT. We will put this into Nexthop field -+ * with port into src/dst AS field. tcp_flags will distinguish it's -+ * start or stop event. Two flows in case of full nat. */ -+ nf.tuple.src.ip = nel->pre.s_addr; -+ nf.tuple.s_port = nel->pre.s_port; -+ nf.tuple.dst.ip = nel->post.d_addr; -+ nf.tuple.d_port = nel->post.d_port; -+ -+ nf.ts_first = nel->ts_jiffies; -+ nf.ts_last = nel->ts_jiffies; -+ if (nel->pre.s_addr != nel->post.s_addr || -+ nel->pre.s_port != nel->post.s_port) { -+ nf.nh.ip = nel->post.s_addr; -+ nf.s_as = nel->post.s_port; -+ nf.d_as = 0; -+ netflow_export_flow(&nf); -+ } -+ if (nel->pre.d_addr != nel->post.d_addr || -+ nel->pre.d_port != nel->post.d_port) { -+ nf.nh.ip = nel->pre.d_addr; -+ nf.s_as = 0; -+ nf.d_as = nel->pre.d_port; -+ netflow_export_flow(&nf); -+ } -+ } -+ kfree(nel); - } -+#endif /* CONFIG_NF_NAT_NEEDED */ - --static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout) -+static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout) - { - /* active too long, finishing, or having too much bytes */ - return ((jiffies - nf->ts_first) > a_timeout) || -@@ -1057,42 +1987,77 @@ static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout) - - /* could be called with zero to flush cache and pdu */ - /* this function is guaranteed to be called non-concurrently */ --static void netflow_scan_and_export(int flush) -+/* return -1 is trylockfailed, 0 if nothin gexported, >=1 if exported something */ -+static int netflow_scan_and_export(const int flush) - { - long i_timeout = inactive_timeout * HZ; - long a_timeout = active_timeout * HZ; -+ int trylock_failed = 0; -+ int pdu_c = pdu_count; - - if (flush) - i_timeout = 0; - -- spin_lock_bh(&ipt_netflow_lock); -- while (!list_empty(&ipt_netflow_list)) { -+ local_bh_disable(); -+ spin_lock(&hlist_lock); -+ /* This is different order of locking than elsewhere, -+ * so we trylock&break to avoid deadlock. */ -+ -+ while (likely(!list_empty(&ipt_netflow_list))) { - struct ipt_netflow *nf; -- -+ -+ /* Last entry, which is usually oldest. */ - nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list); -+ if (!spin_trylock(nf->lock)) { -+ trylock_failed = 1; -+ break; -+ } - /* Note: i_timeout checked with >= to allow specifying zero timeout - * to purge all flows on module unload */ - if (((jiffies - nf->ts_last) >= i_timeout) || - active_needs_export(nf, a_timeout)) { - hlist_del(&nf->hlist); -+ spin_unlock(nf->lock); -+ - list_del(&nf->list); -+ spin_unlock(&hlist_lock); -+ local_bh_enable(); -+ - NETFLOW_STAT_ADD(pkt_out, nf->nr_packets); - NETFLOW_STAT_ADD(traf_out, nf->nr_bytes); -- spin_unlock_bh(&ipt_netflow_lock); - netflow_export_flow(nf); -- spin_lock_bh(&ipt_netflow_lock); -+ -+ local_bh_disable(); -+ spin_lock(&hlist_lock); - } else { -+ spin_unlock(nf->lock); - /* all flows which need to be exported is always at the tail - * so if no more exportable flows we can break */ - break; - } - } -- spin_unlock_bh(&ipt_netflow_lock); -- -+ spin_unlock(&hlist_lock); -+ local_bh_enable(); -+ -+#ifdef CONFIG_NF_NAT_NEEDED -+ spin_lock_bh(&nat_lock); -+ while (!list_empty(&nat_list)) { -+ struct nat_event *nel; -+ -+ nel = list_entry(nat_list.next, struct nat_event, list); -+ list_del(&nel->list); -+ spin_unlock_bh(&nat_lock); -+ export_nat_event(nel); -+ spin_lock_bh(&nat_lock); -+ } -+ spin_unlock_bh(&nat_lock); -+#endif - /* flush flows stored in pdu if there no new flows for too long */ - /* Note: using >= to allow flow purge on zero timeout */ - if ((jiffies - pdu_ts_mod) >= i_timeout) - netflow_export_pdu(); -+ -+ return trylock_failed? -1 : pdu_count - pdu_c; - } - - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) -@@ -1101,8 +2066,10 @@ static void netflow_work_fn(void *dummy) - static void netflow_work_fn(struct work_struct *dummy) - #endif - { -- netflow_scan_and_export(0); -- __start_scan_worker(); -+ int status; -+ -+ status = netflow_scan_and_export(DONT_FLUSH); -+ _schedule_scan_worker(status); - } - - #define RATESHIFT 2 -@@ -1154,7 +2121,7 @@ static void rate_timer_calc(unsigned long dummy) - old_found = found; - old_notfound = notfound; - /* if there is no access to hash keep rate steady */ -- metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric; -+ metric = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric; - CALC_RATE(min15_metric, (unsigned long long)metric, 15); - CALC_RATE(min5_metric, (unsigned long long)metric, 5); - CALC_RATE(min_metric, (unsigned long long)metric, 1); -@@ -1162,6 +2129,262 @@ static void rate_timer_calc(unsigned long dummy) - mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); - } - -+#ifdef CONFIG_NF_NAT_NEEDED -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+static struct nf_ct_event_notifier *saved_event_cb __read_mostly = NULL; -+static int netflow_conntrack_event(const unsigned int events, struct nf_ct_event *item) -+#else -+static int netflow_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr) -+#endif -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+ struct nf_conn *ct = item->ct; -+#else -+ struct nf_conn *ct = (struct nf_conn *)ptr; -+#endif -+ struct nat_event *nel; -+ const struct nf_conntrack_tuple *t; -+ int ret = NOTIFY_DONE; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+ struct nf_ct_event_notifier *notifier; -+ -+ /* Call netlink first. */ -+ notifier = rcu_dereference(saved_event_cb); -+ if (likely(notifier)) -+ ret = notifier->fcn(events, item); -+#endif -+ if (unlikely(!natevents)) -+ return ret; -+ -+ if (!(events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED) | (1 << IPCT_DESTROY)))) -+ return ret; -+ -+ if (!(ct->status & IPS_NAT_MASK)) -+ return ret; -+ -+ if (unlikely(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET || -+ ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num != AF_INET)) { -+ /* Well, there is no linux NAT for IPv6 anyway. */ -+ return ret; -+ } -+ -+ if (!(nel = kmalloc(sizeof(struct nat_event), GFP_ATOMIC))) { -+ printk(KERN_ERR "ipt_NETFLOW: can't kmalloc nat event\n"); -+ return ret; -+ } -+ memset(nel, 0, sizeof(struct nat_event)); -+ nel->ts_ktime = ktime_get_real(); -+ nel->ts_jiffies = jiffies; -+ t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; -+ nel->protocol = t->dst.protonum; -+ nel->pre.s_addr = t->src.u3.ip; -+ nel->pre.d_addr = t->dst.u3.ip; -+ nel->pre.s_port = t->src.u.all; -+ nel->pre.d_port = t->dst.u.all; -+ t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; -+ /* reply is reversed */ -+ nel->post.s_addr = t->dst.u3.ip; -+ nel->post.d_addr = t->src.u3.ip; -+ nel->post.s_port = t->dst.u.all; -+ nel->post.d_port = t->src.u.all; -+ if (events & (1 << IPCT_DESTROY)) { -+ nel->nat_event = NAT_DESTROY; -+ nat_events_stop++; -+ } else { -+ nel->nat_event = NAT_CREATE; -+ nat_events_start++; -+ } -+ -+ spin_lock_bh(&nat_lock); -+ list_add_tail(&nel->list, &nat_list); -+ spin_unlock_bh(&nat_lock); -+ -+ return ret; -+} -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) -+static struct notifier_block ctnl_notifier = { -+ .notifier_call = netflow_conntrack_event -+}; -+#else -+static struct nf_ct_event_notifier ctnl_notifier = { -+ .fcn = netflow_conntrack_event -+}; -+#endif /* since 2.6.31 */ -+#endif /* CONFIG_NF_NAT_NEEDED */ -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) && \ -+ LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) -+static bool -+#else -+static int -+#endif -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -+netflow_target_check(const char *tablename, const void *entry, const struct xt_target *target, -+ void *targinfo, -+#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18) -+ unsigned int targinfosize, -+#endif -+ unsigned int hook_mask) -+{ -+#else -+netflow_target_check(const struct xt_tgchk_param *par) -+{ -+ const char *tablename = par->table; -+ const struct xt_target *target = par->target; -+#endif -+ if (strcmp("nat", tablename) == 0) { -+ /* In the nat table we only see single packet per flow, which is useless. */ -+ printk(KERN_ERR "%s target: is not valid in %s table\n", target->name, tablename); -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) -+#define CHECK_FAIL 0 -+#define CHECK_OK 1 -+#else -+#define CHECK_FAIL -EINVAL -+#define CHECK_OK 0 -+#endif -+ return CHECK_FAIL; -+ } -+ if (target->family == AF_INET6 && protocol == 5) { -+ printk(KERN_ERR "ip6tables NETFLOW target is meaningful for protocol 9 or 10 only.\n"); -+ return CHECK_FAIL; -+ } -+ return CHECK_OK; -+} -+ -+#define SetXBit(x) (0x8000 >> (x)) /* Proper bit for htons later. */ -+#ifndef IPPROTO_MH -+#define IPPROTO_MH 135 -+#endif -+static inline __u16 observed_hdrs(const __u8 currenthdr) -+{ -+ switch (currenthdr) { -+ case IPPROTO_TCP: -+ case IPPROTO_UDP: -+ /* For speed, in case switch is not optimized. */ -+ return 0; -+ case IPPROTO_DSTOPTS: return SetXBit(0); -+ case IPPROTO_HOPOPTS: return SetXBit(1); -+ case IPPROTO_ROUTING: return SetXBit(5); -+ case IPPROTO_MH: return SetXBit(12); -+ case IPPROTO_ESP: return SetXBit(13); -+ case IPPROTO_AH: return SetXBit(14); -+ case IPPROTO_COMP: return SetXBit(15); -+ case IPPROTO_FRAGMENT: /* Handled elsewhere. */ -+ /* Next is known headers. */ -+ case IPPROTO_ICMPV6: -+ case IPPROTO_UDPLITE: -+ case IPPROTO_IPIP: -+ case IPPROTO_PIM: -+ case IPPROTO_GRE: -+ case IPPROTO_SCTP: -+#ifdef IPPROTO_L2TP -+ case IPPROTO_L2TP: -+#endif -+ case IPPROTO_DCCP: -+ return 0; -+ } -+ return SetXBit(3); /* Unknown header. */ -+} -+ -+/* http://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml */ -+static const __u8 ip4_opt_table[] = { -+ [7] = 0, /* RR */ /* parsed manually becasue of 0 */ -+ [134] = 1, /* CIPSO */ -+ [133] = 2, /* E-SEC */ -+ [68] = 3, /* TS */ -+ [131] = 4, /* LSR */ -+ [130] = 5, /* SEC */ -+ [1] = 6, /* NOP */ -+ [0] = 7, /* EOOL */ -+ [15] = 8, /* ENCODE */ -+ [142] = 9, /* VISA */ -+ [205] = 10, /* FINN */ -+ [12] = 11, /* MTUR */ -+ [11] = 12, /* MTUP */ -+ [10] = 13, /* ZSU */ -+ [137] = 14, /* SSR */ -+ [136] = 15, /* SID */ -+ [151] = 16, /* DPS */ -+ [150] = 17, /* NSAPA */ -+ [149] = 18, /* SDB */ -+ [147] = 19, /* ADDEXT */ -+ [148] = 20, /* RTRALT */ -+ [82] = 21, /* TR */ -+ [145] = 22, /* EIP */ -+ [144] = 23, /* IMITD */ -+ [30] = 25, /* EXP */ -+ [94] = 25, /* EXP */ -+ [158] = 25, /* EXP */ -+ [222] = 25, /* EXP */ -+ [25] = 30, /* QS */ -+ [152] = 31, /* UMP */ -+}; -+/* Parse IPv4 Options array int ipv4Options IPFIX value. */ -+static inline __u32 ip4_options(const u_int8_t *p, const unsigned int optsize) -+{ -+ __u32 ret = 0; -+ unsigned int i; -+ -+ for (i = 0; likely(i < optsize); ) { -+ u_int8_t op = p[i++]; -+ -+ if (op == 7) /* RR: bit 0 */ -+ ret |= 1; -+ else if (likely(op < ARRAY_SIZE(ip4_opt_table))) { -+ /* Btw, IANA doc is messed up in a crazy way: -+ * http://www.ietf.org/mail-archive/web/ipfix/current/msg06008.html (2011) -+ * I decided to follow IANA _text_ description from -+ * http://www.iana.org/assignments/ipfix/ipfix.xhtml (2013-09-18) -+ * -+ * Set proper bit for htonl later. */ -+ if (ip4_opt_table[op]) -+ ret |= 1 << (32 - ip4_opt_table[op]); -+ } -+ if (likely(i >= optsize || op == 0)) -+ break; -+ else if (unlikely(op == 1)) -+ continue; -+ else if (unlikely(p[i] < 2)) -+ break; -+ else -+ i += p[i] - 1; -+ } -+ return ret; -+} -+ -+#define TCPHDR_MAXSIZE (4 * 15) -+/* List of options: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml */ -+static inline __u32 tcp_options(const struct sk_buff *skb, const unsigned int ptr, const struct tcphdr *th) -+{ -+ const unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr); -+ __u8 _opt[TCPHDR_MAXSIZE]; -+ const u_int8_t *p; -+ __u32 ret; -+ unsigned int i; -+ -+ p = skb_header_pointer(skb, ptr + sizeof(struct tcphdr), optsize, _opt); -+ if (unlikely(!p)) -+ return 0; -+ ret = 0; -+ for (i = 0; likely(i < optsize); ) { -+ u_int8_t opt = p[i++]; -+ -+ if (likely(opt < 32)) { -+ /* IANA doc is messed up, see above. */ -+ ret |= 1 << (32 - opt); -+ } -+ if (likely(i >= optsize || opt == 0)) -+ break; -+ else if (unlikely(opt == 1)) -+ continue; -+ else if (unlikely(p[i] < 2)) /* "silly options" */ -+ break; -+ else -+ i += p[i] - 1; -+ } -+ return ret; -+} - /* packet receiver */ - static unsigned int netflow_target( - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) -@@ -1192,27 +2415,38 @@ static unsigned int netflow_target( - ) - { - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) -- struct sk_buff *skb = *pskb; -+ const struct sk_buff *skb = *pskb; -+#endif -+ union { -+ struct iphdr ip; -+ struct ipv6hdr ip6; -+ } _iph, *iph; -+ unsigned int hash; -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -+ const int family = target->family; -+#else -+ const int family = par->family; - #endif -- struct iphdr _iph, *iph; - struct ipt_netflow_tuple tuple; - struct ipt_netflow *nf; - __u8 tcp_flags; - struct netflow_aggr_n *aggr_n; - struct netflow_aggr_p *aggr_p; - __u8 s_mask, d_mask; -- unsigned int hash; -- -- iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb); -- -- if (iph == NULL) { -+ unsigned int ptr; -+ int fragment; -+ size_t pkt_len; -+ int options = 0; -+ int tcpoptions = 0; -+ -+ iph = skb_header_pointer(skb, 0, (likely(family == AF_INET))? sizeof(_iph.ip) : sizeof(_iph.ip6), &iph); -+ if (unlikely(iph == NULL)) { - NETFLOW_STAT_INC(truncated); - NETFLOW_STAT_INC(pkt_drop); - return IPT_CONTINUE; - } - -- tuple.s_addr = iph->saddr; -- tuple.d_addr = iph->daddr; -+ tuple.l3proto = family; - tuple.s_port = 0; - tuple.d_port = 0; - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) -@@ -1220,30 +2454,118 @@ static unsigned int netflow_target( - #else - tuple.i_ifc = par->in? par->in->ifindex : -1; - #endif -- tuple.protocol = iph->protocol; -- tuple.tos = iph->tos; - tcp_flags = 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */ - s_mask = 0; - d_mask = 0; - -- if (iph->frag_off & htons(IP_OFFSET)) -+ if (likely(family == AF_INET)) { -+ tuple.src = (union nf_inet_addr){ .ip = iph->ip.saddr }; -+ tuple.dst = (union nf_inet_addr){ .ip = iph->ip.daddr }; -+ tuple.tos = iph->ip.tos; -+ tuple.protocol = iph->ip.protocol; -+ fragment = unlikely(iph->ip.frag_off & htons(IP_OFFSET)); -+ ptr = iph->ip.ihl * 4; -+ pkt_len = ntohs(iph->ip.tot_len); -+ -+#define IPHDR_MAXSIZE (4 * 15) -+ if (unlikely(iph->ip.ihl * 4 > sizeof(struct iphdr))) { -+ u_int8_t _opt[IPHDR_MAXSIZE - sizeof(struct iphdr)]; -+ const u_int8_t *op; -+ unsigned int optsize = iph->ip.ihl * 4 - sizeof(struct iphdr); -+ -+ op = skb_header_pointer(skb, sizeof(struct iphdr), optsize, _opt); -+ if (likely(op)) -+ options = ip4_options(op, optsize); -+ } -+ } else { -+ __u8 currenthdr; -+ -+ tuple.src.in6 = iph->ip6.saddr; -+ tuple.dst.in6 = iph->ip6.daddr; -+ tuple.tos = iph->ip6.priority; -+ fragment = 0; -+ ptr = sizeof(struct ipv6hdr); -+ pkt_len = ntohs(iph->ip6.payload_len) + sizeof(struct ipv6hdr); -+ -+ currenthdr = iph->ip6.nexthdr; -+ while (currenthdr != NEXTHDR_NONE && ipv6_ext_hdr(currenthdr)) { -+ struct ipv6_opt_hdr _hdr; -+ const struct ipv6_opt_hdr *hp; -+ unsigned int hdrlen = 0; -+ -+ options |= observed_hdrs(currenthdr); -+ hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr); -+ if (hp == NULL) { -+ /* We have src/dst, so must account something. */ -+ tuple.protocol = currenthdr; -+ fragment = 3; -+ goto do_protocols; -+ } -+ -+ switch (currenthdr) { -+ case IPPROTO_FRAGMENT: { -+ struct frag_hdr _fhdr; -+ const struct frag_hdr *fh; -+ -+ fh = skb_header_pointer(skb, ptr, sizeof(_fhdr), -+ &_fhdr); -+ if (fh == NULL) { -+ tuple.protocol = currenthdr; -+ fragment = 2; -+ goto do_protocols; -+ } -+ fragment = 1; -+#define FRA0 SetXBit(4) /* Fragment header - first fragment */ -+#define FRA1 SetXBit(6) /* Fragmentation header - not first fragment */ -+ options |= (ntohs(fh->frag_off) & 0xFFF8)? FRA1 : FRA0; -+ hdrlen = 8; -+ break; -+ } -+ case IPPROTO_AH: { -+ struct ip_auth_hdr _hdr, *hp; -+ -+ if (likely(hp = skb_header_pointer(skb, ptr, 8, &_hdr))) { -+ tuple.s_port = hp->spi >> 16; -+ tuple.d_port = hp->spi; -+ } -+ hdrlen = (hp->hdrlen + 2) << 2; -+ break; -+ } -+ default: -+ hdrlen = ipv6_optlen(hp); -+ } -+ currenthdr = hp->nexthdr; -+ ptr += hdrlen; -+ } -+ tuple.protocol = currenthdr; -+ options |= observed_hdrs(currenthdr); -+ } -+ -+do_protocols: -+ if (fragment) { -+ /* if conntrack is enabled it should defrag on pre-routing and local-out */ - NETFLOW_STAT_INC(frags); -- else { -+ } else { - switch (tuple.protocol) { - case IPPROTO_TCP: { - struct tcphdr _hdr, *hp; - -- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) { -+ if (likely(hp = skb_header_pointer(skb, ptr, 14, &_hdr))) { - tuple.s_port = hp->source; - tuple.d_port = hp->dest; - tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16); -+ -+ if (unlikely(hp->doff * 4 > sizeof(struct tcphdr))) -+ tcpoptions = tcp_options(skb, ptr, hp); - } - break; - } -- case IPPROTO_UDP: { -+ case IPPROTO_UDP: -+ case IPPROTO_UDPLITE: -+ case IPPROTO_SCTP: { - struct udphdr _hdr, *hp; - -- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) { -+ if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) { - tuple.s_port = hp->source; - tuple.d_port = hp->dest; - } -@@ -1252,72 +2574,111 @@ static unsigned int netflow_target( - case IPPROTO_ICMP: { - struct icmphdr _hdr, *hp; - -- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr))) -- tuple.d_port = (hp->type << 8) | hp->code; -+ if (likely(family == AF_INET && -+ (hp = skb_header_pointer(skb, ptr, 2, &_hdr)))) -+ tuple.d_port = htons((hp->type << 8) | hp->code); - break; - } -+ case IPPROTO_ICMPV6: { -+ struct icmp6hdr _icmp6h, *ic; -+ -+ if (likely(family == AF_INET6 && -+ (ic = skb_header_pointer(skb, ptr, 2, &_icmp6h)))) -+ tuple.d_port = htons((ic->icmp6_type << 8) | ic->icmp6_code); -+ break; -+ } - case IPPROTO_IGMP: { -- struct igmphdr *_hdr, *hp; -+ struct igmphdr _hdr, *hp; - -- if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr))) -+ if (likely(hp = skb_header_pointer(skb, ptr, 1, &_hdr))) - tuple.d_port = hp->type; - } - break; -+ case IPPROTO_AH: { /* IPSEC */ -+ struct ip_auth_hdr _hdr, *hp; -+ -+ if (likely(family == AF_INET && /* For IPv6 it's parsed above. */ -+ (hp = skb_header_pointer(skb, ptr, 8, &_hdr)))) { -+ tuple.s_port = hp->spi >> 16; -+ tuple.d_port = hp->spi; -+ } -+ break; -+ } -+ case IPPROTO_ESP: { -+ struct ip_esp_hdr _hdr, *hp; -+ -+ if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) -+ tuple.s_port = hp->spi >> 16; -+ tuple.d_port = hp->spi; -+ } -+ break; - } - } /* not fragmented */ - - /* aggregate networks */ - read_lock_bh(&aggr_lock); -- list_for_each_entry(aggr_n, &aggr_n_list, list) -- if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) { -- tuple.s_addr &= htonl(aggr_n->aggr_mask); -- s_mask = aggr_n->prefix; -- break; -- } -- list_for_each_entry(aggr_n, &aggr_n_list, list) -- if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) { -- tuple.d_addr &= htonl(aggr_n->aggr_mask); -- d_mask = aggr_n->prefix; -- break; -- } -+ if (family == AF_INET) { -+ list_for_each_entry(aggr_n, &aggr_n_list, list) -+ if (unlikely((ntohl(tuple.src.ip) & aggr_n->mask) == aggr_n->addr)) { -+ tuple.src.ip &= htonl(aggr_n->aggr_mask); -+ s_mask = aggr_n->prefix; -+ atomic_inc(&aggr_n->usage); -+ break; -+ } -+ list_for_each_entry(aggr_n, &aggr_n_list, list) -+ if (unlikely((ntohl(tuple.dst.ip) & aggr_n->mask) == aggr_n->addr)) { -+ tuple.dst.ip &= htonl(aggr_n->aggr_mask); -+ d_mask = aggr_n->prefix; -+ atomic_inc(&aggr_n->usage); -+ break; -+ } -+ } - -- /* aggregate ports */ -- list_for_each_entry(aggr_p, &aggr_p_list, list) -- if (ntohs(tuple.s_port) >= aggr_p->port1 && -- ntohs(tuple.s_port) <= aggr_p->port2) { -- tuple.s_port = htons(aggr_p->aggr_port); -- break; -- } -+ if (tuple.protocol == IPPROTO_TCP || -+ tuple.protocol == IPPROTO_UDP || -+ tuple.protocol == IPPROTO_SCTP || -+ tuple.protocol == IPPROTO_UDPLITE) { -+ /* aggregate ports */ -+ list_for_each_entry(aggr_p, &aggr_p_list, list) -+ if (unlikely(ntohs(tuple.s_port) >= aggr_p->port1 && -+ ntohs(tuple.s_port) <= aggr_p->port2)) { -+ tuple.s_port = htons(aggr_p->aggr_port); -+ atomic_inc(&aggr_p->usage); -+ break; -+ } - -- list_for_each_entry(aggr_p, &aggr_p_list, list) -- if (ntohs(tuple.d_port) >= aggr_p->port1 && -- ntohs(tuple.d_port) <= aggr_p->port2) { -- tuple.d_port = htons(aggr_p->aggr_port); -- break; -- } -+ list_for_each_entry(aggr_p, &aggr_p_list, list) -+ if (unlikely(ntohs(tuple.d_port) >= aggr_p->port1 && -+ ntohs(tuple.d_port) <= aggr_p->port2)) { -+ tuple.d_port = htons(aggr_p->aggr_port); -+ atomic_inc(&aggr_p->usage); -+ break; -+ } -+ } - read_unlock_bh(&aggr_lock); - - hash = hash_netflow(&tuple); -- spin_lock_bh(&ipt_netflow_lock); -+ read_lock_bh(&htable_rwlock); -+ spin_lock(&htable_locks[hash & LOCK_COUNT_MASK]); - /* record */ - nf = ipt_netflow_find(&tuple, hash); -- if (!nf) { -- if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) { -+ if (unlikely(!nf)) { -+ struct rtable *rt; -+ -+ if (unlikely(maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows)) { - /* This is DOS attack prevention */ - NETFLOW_STAT_INC(maxflows_err); - NETFLOW_STAT_INC(pkt_drop); -- NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len)); -- spin_unlock_bh(&ipt_netflow_lock); -- return IPT_CONTINUE; -+ NETFLOW_STAT_ADD(traf_drop, pkt_len); -+ goto unlock_return; - } - - nf = init_netflow(&tuple, skb, hash); -- if (!nf || IS_ERR(nf)) { -+ if (unlikely(!nf || IS_ERR(nf))) { - NETFLOW_STAT_INC(alloc_err); - NETFLOW_STAT_INC(pkt_drop); -- NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len)); -- spin_unlock_bh(&ipt_netflow_lock); -- return IPT_CONTINUE; -+ NETFLOW_STAT_ADD(traf_drop, pkt_len); -+ goto unlock_return; - } - - nf->ts_first = jiffies; -@@ -1330,31 +2691,68 @@ static unsigned int netflow_target( - nf->s_mask = s_mask; - nf->d_mask = d_mask; - -- if (debug > 2) -- printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n", -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) -+ rt = (struct rtable *)skb->dst; -+#else /* since 2.6.26 */ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) -+ rt = skb->rtable; -+#else /* since 2.6.31 */ -+ rt = skb_rtable(skb); -+#endif -+#endif -+ if (likely(family == AF_INET)) { -+ if (rt) -+ nf->nh.ip = rt->rt_gateway; -+ } else { -+ if (rt) -+ nf->nh.in6 = ((struct rt6_info *)rt)->rt6i_gateway; -+ nf->flow_label = (iph->ip6.flow_lbl[0] << 16) | -+ (iph->ip6.flow_lbl[1] << 8) | (iph->ip6.flow_lbl[2]); -+ } -+#if 0 -+ if (unlikely(debug > 2)) -+ printk(KERN_INFO "ipt_NETFLOW: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n", - atomic_read(&ipt_netflow_count), - tuple.i_ifc, nf->o_ifc, - NIPQUAD(tuple.s_addr), ntohs(tuple.s_port), - NIPQUAD(tuple.d_addr), ntohs(tuple.d_port)); -+#endif - } else { - /* ipt_netflow_list is sorted by access time: - * most recently accessed flows are at head, old flows remain at tail - * this function bubble up flow to the head */ -+ spin_lock(&hlist_lock); - list_move(&nf->list, &ipt_netflow_list); -+ spin_unlock(&hlist_lock); - } - -+#ifdef CONFIG_NF_CONNTRACK_MARK -+ { -+ struct nf_conn *ct; -+ enum ip_conntrack_info ctinfo; -+ ct = nf_ct_get(skb, &ctinfo); -+ if (ct) -+ nf->mark = ct->mark; -+ } -+#endif -+ - nf->nr_packets++; -- nf->nr_bytes += ntohs(iph->tot_len); -+ nf->nr_bytes += pkt_len; - nf->ts_last = jiffies; - nf->tcp_flags |= tcp_flags; -+ nf->options |= options; -+ if (tuple.protocol == IPPROTO_TCP) -+ nf->tcpoptions |= tcpoptions; - - NETFLOW_STAT_INC(pkt_total); -- NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len)); -+ NETFLOW_STAT_ADD(traf_total, pkt_len); - -- if (active_needs_export(nf, active_timeout * HZ)) { -+ if (likely(active_needs_export(nf, active_timeout * HZ))) { - /* ok, if this active flow to be exported - * bubble it to the tail */ -+ spin_lock(&hlist_lock); - list_move_tail(&nf->list, &ipt_netflow_list); -+ spin_unlock(&hlist_lock); - - /* Blog: I thought about forcing timer to wake up sooner if we have - * enough exportable flows, but in fact this doesn't have much sense, -@@ -1363,35 +2761,194 @@ static unsigned int netflow_target( - * limited size). But yes, this is disputable. */ - } - -- spin_unlock_bh(&ipt_netflow_lock); -+unlock_return: -+ spin_unlock(&htable_locks[hash & LOCK_COUNT_MASK]); -+ read_unlock_bh(&htable_rwlock); - - return IPT_CONTINUE; - } - --static struct ipt_target ipt_netflow_reg = { -- .name = "NETFLOW", -- .target = netflow_target, -- .family = AF_INET, --#ifndef RAW_PROMISC_HACK -- .table = "filter", --#ifndef NF_IP_LOCAL_IN /* 2.6.25 */ -- .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | -- (1 << NF_INET_LOCAL_OUT), --#else -- .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | -- (1 << NF_IP_LOCAL_OUT), --#endif /* NF_IP_LOCAL_IN */ -+#ifdef CONFIG_NF_NAT_NEEDED -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+ /* Below 2.6.31 we don't need to handle callback chain manually. */ -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) -+#define NET_STRUCT struct net *net -+#define NET_ARG net, -+#define nf_conntrack_event_cb net->ct.nf_conntrack_event_cb - #else -- .table = "raw", --#ifndef NF_IP_LOCAL_IN -- .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | -- (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING), -+#define NET_STRUCT void -+#define NET_ARG -+#endif -+static int set_notifier_cb(NET_STRUCT) -+{ -+ struct nf_ct_event_notifier *notifier; -+ -+ notifier = rcu_dereference(nf_conntrack_event_cb); -+ if (notifier == NULL) { -+ /* Polite mode. */ -+ nf_conntrack_register_notifier(NET_ARG &ctnl_notifier); -+ } else if (notifier != &ctnl_notifier) { -+ if (!saved_event_cb) -+ saved_event_cb = notifier; -+ else if (saved_event_cb != notifier) -+ printk(KERN_ERR "natevents_net_init: %p != %p (report error.)\n", -+ saved_event_cb, notifier); -+ rcu_assign_pointer(nf_conntrack_event_cb, &ctnl_notifier); -+ } else -+ printk(KERN_ERR "ipt_NETFLOW: natevents already enabled.\n"); -+ return 0; -+} -+static void unset_notifier_cb(NET_STRUCT) -+{ -+ struct nf_ct_event_notifier *notifier; -+ -+ notifier = rcu_dereference(nf_conntrack_event_cb); -+ if (notifier == &ctnl_notifier) { -+ if (saved_event_cb == NULL) -+ nf_conntrack_unregister_notifier(NET_ARG &ctnl_notifier); -+ else -+ rcu_assign_pointer(nf_conntrack_event_cb, saved_event_cb); -+ } else -+ printk(KERN_ERR "ipt_NETFLOW: natevents already disabled.\n"); -+} -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) -+#undef nf_conntrack_event_cb -+static struct pernet_operations natevents_net_ops = { -+ .init = set_notifier_cb, -+ .exit = unset_notifier_cb -+}; -+#endif -+#endif /* since 2.6.31 */ -+ -+static DEFINE_MUTEX(events_lock); -+/* Both functions may be called multiple times. */ -+static void register_ct_events(void) -+{ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+#define NETLINK_M "nf_conntrack_netlink" -+ struct module *netlink_m; -+ static int referenced = 0; -+#endif -+ -+ printk(KERN_INFO "ipt_NETFLOW: enable natevents.\n"); -+ mutex_lock(&events_lock); -+ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+ /* Pre-load netlink module who will be first notifier -+ * user, and then hijack nf_conntrack_event_cb from it. */ -+ if ( -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) -+ !rcu_dereference(nf_conntrack_event_cb) || -+#endif -+ !(netlink_m = find_module(NETLINK_M))) { -+ printk("Loading " NETLINK_M "\n"); -+ request_module(NETLINK_M); -+ } -+ /* Reference netlink module to prevent it's unsafe unload before us. */ -+ if (!referenced && (netlink_m = find_module(NETLINK_M))) { -+ referenced++; -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) -+#define use_module ref_module -+#endif -+ use_module(THIS_MODULE, netlink_m); -+ } -+ -+ /* Register ct events callback. */ -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) -+ register_pernet_subsys(&natevents_net_ops); - #else -- .hooks = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | -- (1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_PRE_ROUTING), --#endif /* NF_IP_LOCAL_IN */ --#endif /* !RAW_PROMISC_HACK */ -- .me = THIS_MODULE -+ set_notifier_cb(); -+#endif -+#else /* below v2.6.31 */ -+ if (!natevents && nf_conntrack_register_notifier(&ctnl_notifier) < 0) -+ printk(KERN_ERR "Can't register conntrack notifier, natevents disabled.\n"); -+ else -+#endif -+ natevents = 1; -+ mutex_unlock(&events_lock); -+} -+ -+static void unregister_ct_events(void) -+{ -+ printk(KERN_INFO "ipt_NETFLOW: disable natevents.\n"); -+ mutex_lock(&events_lock); -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31) -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0) -+ unregister_pernet_subsys(&natevents_net_ops); -+#else /* < v3.2 */ -+ unset_notifier_cb(); -+#endif /* v3.2 */ -+ rcu_assign_pointer(saved_event_cb, NULL); -+#else /* < v2.6.31 */ -+ nf_conntrack_unregister_notifier(&ctnl_notifier); -+#endif -+ natevents = 0; -+ mutex_unlock(&events_lock); -+} -+#endif /* CONFIG_NF_NAT_NEEDED */ -+ -+#ifndef NF_IP_LOCAL_IN /* 2.6.25 */ -+#define NF_IP_PRE_ROUTING NF_INET_PRE_ROUTING -+#define NF_IP_LOCAL_IN NF_INET_LOCAL_IN -+#define NF_IP_FORWARD NF_INET_FORWARD -+#define NF_IP_LOCAL_OUT NF_INET_LOCAL_OUT -+#define NF_IP_POST_ROUTING NF_INET_POST_ROUTING -+#endif -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) -+/* net/netfilter/x_tables.c */ -+static void xt_unregister_targets(struct xt_target *target, unsigned int n) -+{ -+ unsigned int i; -+ -+ for (i = 0; i < n; i++) -+ xt_unregister_target(&target[i]); -+} -+static int xt_register_targets(struct xt_target *target, unsigned int n) -+{ -+ unsigned int i; -+ -+ int err = 0; -+ for (i = 0; i < n; i++) -+ if ((err = xt_register_target(&target[i]))) -+ goto err; -+ return err; -+err: -+ if (i > 0) -+ xt_unregister_targets(target, i); -+ return err; -+} -+#endif -+ -+static struct ipt_target ipt_netflow_reg[] __read_mostly = { -+ { -+ .name = "NETFLOW", -+ .target = netflow_target, -+ .checkentry = netflow_target_check, -+ .family = AF_INET, -+ .hooks = -+ (1 << NF_IP_PRE_ROUTING) | -+ (1 << NF_IP_LOCAL_IN) | -+ (1 << NF_IP_FORWARD) | -+ (1 << NF_IP_LOCAL_OUT) | -+ (1 << NF_IP_POST_ROUTING), -+ .me = THIS_MODULE -+ }, -+ { -+ .name = "NETFLOW", -+ .target = netflow_target, -+ .checkentry = netflow_target_check, -+ .family = AF_INET6, -+ .hooks = -+ (1 << NF_IP_PRE_ROUTING) | -+ (1 << NF_IP_LOCAL_IN) | -+ (1 << NF_IP_FORWARD) | -+ (1 << NF_IP_LOCAL_OUT) | -+ (1 << NF_IP_POST_ROUTING), -+ .me = THIS_MODULE -+ }, - }; - - static int __init ipt_netflow_init(void) -@@ -1399,11 +2956,16 @@ static int __init ipt_netflow_init(void) - #ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc_stat; - #endif -+ printk(KERN_INFO "ipt_NETFLOW version %s, srcversion %s\n", -+ IPT_NETFLOW_VERSION, THIS_MODULE->srcversion); - - get_random_bytes(&ipt_netflow_hash_rnd, 4); - - /* determine hash size (idea from nf_conntrack_core.c) */ - if (!hashsize) { -+#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0) -+#define num_physpages totalram_pages -+#endif - hashsize = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct hlist_head)); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) -@@ -1411,8 +2973,7 @@ static int __init ipt_netflow_init(void) - } - if (hashsize < 16) - hashsize = 16; -- printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n", -- IPT_NETFLOW_VERSION, hashsize); -+ printk(KERN_INFO "ipt_NETFLOW: hashsize %u\n", hashsize); - - ipt_netflow_hash_size = hashsize; - ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size); -@@ -1434,12 +2995,18 @@ static int __init ipt_netflow_init(void) - } - - #ifdef CONFIG_PROC_FS -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat)); -+#else -+ proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat), &nf_seq_fops); -+#endif - if (!proc_stat) { - printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n"); - goto err_free_netflow_slab; - } -+#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) - proc_stat->proc_fops = &nf_seq_fops; -+#endif - #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) - proc_stat->owner = THIS_MODULE; - #endif -@@ -1480,21 +3047,28 @@ static int __init ipt_netflow_init(void) - } - add_aggregation(aggregation); - -- __start_scan_worker(); -+ netflow_switch_version(protocol); -+ _schedule_scan_worker(0); - setup_timer(&rate_timer, rate_timer_calc, 0); - mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE)); - -- if (xt_register_target(&ipt_netflow_reg)) -+ peakflows_at = jiffies; -+ if (xt_register_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg))) - goto err_stop_timer; - -- peakflows_at = jiffies; -+#ifdef CONFIG_NF_NAT_NEEDED -+ if (natevents) -+ register_ct_events(); -+#endif - -- printk(KERN_INFO "ipt_netflow loaded.\n"); -+ printk(KERN_INFO "ipt_NETFLOW is loaded.\n"); - return 0; - - err_stop_timer: -- __stop_scan_worker(); -+ _unschedule_scan_worker(); -+ netflow_scan_and_export(AND_FLUSH); - del_timer_sync(&rate_timer); -+ free_templates(); - destination_removeall(); - aggregation_remove(&aggr_n_list); - aggregation_remove(&aggr_p_list); -@@ -1506,17 +3080,18 @@ err_free_proc_stat: - #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat)); - err_free_netflow_slab: --#endif -+#endif - kmem_cache_destroy(ipt_netflow_cachep); - err_free_hash: - vfree(ipt_netflow_hash); - err: -+ printk(KERN_INFO "ipt_NETFLOW is not loaded.\n"); - return -ENOMEM; - } - - static void __exit ipt_netflow_fini(void) - { -- printk(KERN_INFO "ipt_netflow unloading..\n"); -+ printk(KERN_INFO "ipt_NETFLOW unloading..\n"); - - #ifdef CONFIG_SYSCTL - unregister_sysctl_table(netflow_sysctl_header); -@@ -1524,14 +3099,18 @@ static void __exit ipt_netflow_fini(void) - #ifdef CONFIG_PROC_FS - remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat)); - #endif -- -- xt_unregister_target(&ipt_netflow_reg); -- __stop_scan_worker(); -- netflow_scan_and_export(1); -+ xt_unregister_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg)); -+#ifdef CONFIG_NF_NAT_NEEDED -+ if (natevents) -+ unregister_ct_events(); -+#endif -+ _unschedule_scan_worker(); -+ netflow_scan_and_export(AND_FLUSH); - del_timer_sync(&rate_timer); - - synchronize_sched(); - -+ free_templates(); - destination_removeall(); - aggregation_remove(&aggr_n_list); - aggregation_remove(&aggr_p_list); -@@ -1539,7 +3118,7 @@ static void __exit ipt_netflow_fini(void) - kmem_cache_destroy(ipt_netflow_cachep); - vfree(ipt_netflow_hash); - -- printk(KERN_INFO "ipt_netflow unloaded.\n"); -+ printk(KERN_INFO "ipt_NETFLOW unloaded.\n"); - } - - module_init(ipt_netflow_init); -diff --git a/ipt_NETFLOW.h b/ipt_NETFLOW.h -index 4a7b645..749f985 100644 ---- a/ipt_NETFLOW.h -+++ b/ipt_NETFLOW.h -@@ -35,8 +35,8 @@ struct netflow5_record { - __be16 o_ifc; - __be32 nr_packets; - __be32 nr_octets; -- __be32 ts_first; -- __be32 ts_last; -+ __be32 first_ms; -+ __be32 last_ms; - __be16 s_port; - __be16 d_port; - __u8 reserved; -@@ -54,9 +54,9 @@ struct netflow5_record { - struct netflow5_pdu { - __be16 version; - __be16 nr_records; -- __be32 ts_uptime; -- __be32 ts_usecs; -- __be32 ts_unsecs; -+ __be32 ts_uptime; /* ms */ -+ __be32 ts_usecs; /* s */ -+ __be32 ts_unsecs; /* ns */ - __be32 seq; - __u8 eng_type; - __u8 eng_id; -@@ -65,42 +65,185 @@ struct netflow5_pdu { - } __attribute__ ((packed)); - #define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record)) - -+/* NetFlow v9 RFC http://www.ietf.org/rfc/rfc3954.txt */ -+enum { -+ IN_BYTES = 1, -+ IN_PKTS = 2, -+ PROTOCOL = 4, -+ TOS = 5, -+ TCP_FLAGS = 6, -+ L4_SRC_PORT = 7, -+ IPV4_SRC_ADDR = 8, -+ SRC_MASK = 9, -+ INPUT_SNMP = 10, -+ L4_DST_PORT = 11, -+ IPV4_DST_ADDR = 12, -+ DST_MASK = 13, -+ OUTPUT_SNMP = 14, -+ IPV4_NEXT_HOP = 15, -+ //SRC_AS = 16, -+ //DST_AS = 17, -+ //BGP_IPV4_NEXT_HOP = 18, -+ //MUL_DST_PKTS = 19, -+ //MUL_DST_BYTES = 20, -+ LAST_SWITCHED = 21, -+ FIRST_SWITCHED = 22, -+ IPV6_SRC_ADDR = 27, -+ IPV6_DST_ADDR = 28, -+ IPV6_FLOW_LABEL = 31, -+ ICMP_TYPE = 32, -+ MUL_IGMP_TYPE = 33, -+ //TOTAL_BYTES_EXP = 40, -+ //TOTAL_PKTS_EXP = 41, -+ //TOTAL_FLOWS_EXP = 42, -+ IPV6_NEXT_HOP = 62, -+ IPV6_OPTION_HEADERS = 64, -+ commonPropertiesId = 137, /* for MARK */ -+ ipv4Options = 208, -+ tcpOptions = 209, -+ postNATSourceIPv4Address = 225, -+ postNATDestinationIPv4Address = 226, -+ postNAPTSourceTransportPort = 227, -+ postNAPTDestinationTransportPort = 228, -+ natEvent = 230, -+ postNATSourceIPv6Address = 281, -+ postNATDestinationIPv6Address = 282, -+ IPSecSPI = 295, -+ observationTimeMilliseconds = 323, -+ observationTimeMicroseconds = 324, -+ observationTimeNanoseconds = 325, -+}; -+ -+enum { -+ FLOWSET_TEMPLATE = 0, -+ FLOWSET_OPTIONS = 1, -+ IPFIX_TEMPLATE = 2, -+ IPFIX_OPTIONS = 3, -+ FLOWSET_DATA_FIRST = 256, -+}; -+ -+struct flowset_template { -+ __be16 flowset_id; -+ __be16 length; -+ __be16 template_id; -+ __be16 field_count; -+} __attribute__ ((packed)); -+ -+struct flowset_data { -+ __be16 flowset_id; -+ __be16 length; -+} __attribute__ ((packed)); -+ -+/* NetFlow v9 packet. */ -+struct netflow9_pdu { -+ __be16 version; -+ __be16 nr_records; -+ __be32 sys_uptime_ms; -+ __be32 export_time_s; -+ __be32 seq; -+ __be32 source_id; /* Exporter Observation Domain */ -+ __u8 data[1400]; -+} __attribute__ ((packed)); -+ -+/* IPFIX packet. */ -+struct ipfix_pdu { -+ __be16 version; -+ __be16 length; -+ __be32 export_time_s; -+ __be32 seq; -+ __be32 odomain_id; /* Observation Domain ID */ -+ __u8 data[1400]; -+} __attribute__ ((packed)); -+ -+/* Maximum bytes flow can have, after it's reached flow will become -+ * not searchable and will be exported soon. */ -+#define FLOW_FULL_WATERMARK 0xffefffff -+ -+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) -+union nf_inet_addr { -+ __be32 ip; -+ __be32 ip6[4]; -+ struct in_addr in; -+ struct in6_addr in6; -+}; -+#endif -+ - /* hashed data which identify unique flow */ -+/* 16+16 + 2+2 + 2+1+1+1 = 41 */ - struct ipt_netflow_tuple { -- __be32 s_addr; // Network byte order -- __be32 d_addr; // -"- -- __be16 s_port; // -"- -+ union nf_inet_addr src; -+ union nf_inet_addr dst; -+ __be16 s_port; // Network byte order - __be16 d_port; // -"- -- __be16 i_ifc; // Local byte order -+ __u16 i_ifc; // Host byte order - __u8 protocol; - __u8 tos; -+ __u8 l3proto; - }; --/* tuple size is rounded to u32s */ --#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4) -- --/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */ --#define FLOW_FULL_WATERMARK 0xffefffff - --/* flow entry */ -+/* hlist[2] + tuple[]: 8+8 + 41 = 57 (less than usual cache line, 64) */ - struct ipt_netflow { - struct hlist_node hlist; // hashtable search chain -- struct list_head list; // all flows chain - - /* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */ - struct ipt_netflow_tuple tuple; - - /* volatile data */ -- __be16 o_ifc; -+ union nf_inet_addr nh; -+ __u16 o_ifc; - __u8 s_mask; - __u8 d_mask; -+ __u8 tcp_flags; /* `OR' of all tcp flags */ - - /* flow statistics */ - u_int32_t nr_packets; - u_int32_t nr_bytes; -- unsigned long ts_first; -- unsigned long ts_last; -- __u8 tcp_flags; /* `OR' of all tcp flags */ -+ union { -+ struct { -+ unsigned long first; -+ unsigned long last; -+ } ts; -+ ktime_t ts_obs; -+ } _ts_un; -+#define ts_first _ts_un.ts.first -+#define ts_last _ts_un.ts.last -+#define ts_obs _ts_un.ts_obs -+ u_int32_t flow_label; /* IPv6 */ -+ u_int32_t options; /* IPv4(16) & IPv6(32) Options */ -+ u_int32_t tcpoptions; -+#ifdef CONFIG_NF_CONNTRACK_MARK -+ u_int32_t mark; /* Exported as commonPropertiesId */ -+#endif -+#ifdef CONFIG_NF_NAT_NEEDED -+ __be32 s_as; -+ __be32 d_as; -+ struct nat_event *nat; -+#endif -+ struct list_head list; // all flows chain -+ spinlock_t *lock; -+}; -+ -+#ifdef CONFIG_NF_NAT_NEEDED -+enum { -+ NAT_CREATE, NAT_DESTROY, NAT_POOLEXHAUSTED - }; -+struct nat_event { -+ struct list_head list; -+ struct { -+ __be32 s_addr; -+ __be32 d_addr; -+ __be16 s_port; -+ __be16 d_port; -+ } pre, post; -+ ktime_t ts_ktime; -+ unsigned long ts_jiffies; -+ __u8 protocol; -+ __u8 nat_event; -+}; -+#define IS_DUMMY_FLOW(nf) (nf->nat) -+#else -+#define IS_DUMMY_FLOW(nf) 0 -+#endif - - static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1, - const struct ipt_netflow_tuple *t2) -@@ -115,11 +258,13 @@ struct ipt_netflow_sock { - unsigned short port; - atomic_t wmem_peak; // sk_wmem_alloc peak value - atomic_t err_full; // socket filled error -+ atomic_t err_connect; // connect errors - atomic_t err_other; // other socket errors - }; - - struct netflow_aggr_n { - struct list_head list; -+ atomic_t usage; - __u32 mask; - __u32 addr; - __u32 aggr_mask; -@@ -128,6 +273,7 @@ struct netflow_aggr_n { - - struct netflow_aggr_p { - struct list_head list; -+ atomic_t usage; - __u16 port1; - __u16 port2; - __u16 aggr_port; -diff --git a/libipt_NETFLOW.c b/libipt_NETFLOW.c -index d85b6d9..a0f9e5d 100644 ---- a/libipt_NETFLOW.c -+++ b/libipt_NETFLOW.c -@@ -58,24 +58,24 @@ - #define _IPT_IP struct ipt_ip - #endif - -+#ifndef IPTABLES_VERSION -+#define IPTABLES_VERSION XTABLES_VERSION -+#endif -+ - static struct option opts[] = { -- {0} -+ { 0 } - }; - - static void help(void) - { -- printf( "NETFLOW target\n"); -+ printf("NETFLOW target\n"); - } - --//static int parse(int c, char **argv, int invert, unsigned int *flags, --// const _IPT_ENTRY *entry, --// struct ipt_entry_target **target) - static int parse(int c, char **argv, int invert, unsigned int *flags, - const _IPT_ENTRY *entry, - struct ipt_entry_target **targetinfo) - - { -- - return 1; - } - -@@ -95,16 +95,9 @@ static void print(const _IPT_IP *ip, - } - - static struct iptables_target netflow = { --#ifdef MOD140 -- .family = AF_INET, --#endif - .next = NULL, - .name = "NETFLOW", --#ifdef XTABLES_VERSION -- .version = XTABLES_VERSION, --#else - .version = IPTABLES_VERSION, --#endif - .size = IPT_ALIGN(0), - .userspacesize = IPT_ALIGN(0), - .help = &help, -diff --git a/murmur3.h b/murmur3.h -new file mode 100644 -index 0000000..57a6006 ---- /dev/null -+++ b/murmur3.h -@@ -0,0 +1,42 @@ -+/* MurmurHash3, based on https://code.google.com/p/smhasher of Austin Appleby. */ -+ -+static __always_inline uint32_t rotl32(const uint32_t x, const int8_t r) -+{ -+ return (x << r) | (x >> (32 - r)); -+} -+ -+static __always_inline uint32_t fmix32(register uint32_t h) -+{ -+ h ^= h >> 16; -+ h *= 0x85ebca6b; -+ h ^= h >> 13; -+ h *= 0xc2b2ae35; -+ h ^= h >> 16; -+ return h; -+} -+ -+static inline uint32_t murmur3(const void *key, const uint32_t len, const uint32_t seed) -+{ -+ const uint32_t c1 = 0xcc9e2d51; -+ const uint32_t c2 = 0x1b873593; -+ const uint32_t *blocks; -+ const uint8_t *tail; -+ register uint32_t h1 = seed; -+ uint32_t k1 = 0; -+ uint32_t i; -+ -+ blocks = (const uint32_t *)key; -+ for (i = len / 4; i; --i) { -+ h1 ^= rotl32(*blocks++ * c1, 15) * c2; -+ h1 = rotl32(h1, 13) * 5 + 0xe6546b64; -+ } -+ tail = (const uint8_t*)blocks; -+ switch (len & 3) { -+ case 3: k1 ^= tail[2] << 16; -+ case 2: k1 ^= tail[1] << 8; -+ case 1: k1 ^= tail[0]; -+ h1 ^= rotl32(k1 * c1, 15) * c2; -+ } -+ return fmix32(h1^ len); -+} -+ -diff --git a/raw_promisc_debian_squeeze6.patch b/raw_promisc_debian_squeeze6.patch -new file mode 100644 -index 0000000..69d0d35 ---- /dev/null -+++ b/raw_promisc_debian_squeeze6.patch -@@ -0,0 +1,37 @@ -+ -+ Short manual and patch for Debian Squeeze -+ suggested by Pavel Odintsov: -+ -+On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote: -+> -+> краткий мануал для патчинга Debian Squeeze ядра патчем promisc. -+> -+> cd /usr/src -+> apt-get install -y dpkg-dev -+> apt-get build-dep linux-image-2.6.32-5-amd64 -+> cd linux-2.6-2.6.32/ -+> apt-get source linux-image-2.6.32-5-amd64 -+> -+> wget .... /root/raw_promisc_debian_squeeze6.patch -+> patch -p1 < raw_promisc_debian_squeeze6.patch -+> Накладываем патчи дебияна: -+> debian/rules source -+> -+> Запускаем сборку: -+> debian/rules binary -+> -+ -+diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c -+--- linux-2.6-2.6.32/net/ipv4/ip_input.c 2009-12-03 04:51:21.000000000 +0100 -++++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c 2012-06-25 19:13:49.000000000 +0200 -+@@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n -+ /* When the interface is in promisc. mode, drop all the crap -+ * that it receives, do not try to analyse it. -+ */ -+- if (skb->pkt_type == PACKET_OTHERHOST) -+- goto drop; -++ //if (skb->pkt_type == PACKET_OTHERHOST) -++ // goto drop; -+ -+ -+ IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); |