| From: |
| Amin Azez <azez@ufomechanic.net> |
| To: |
| netfilter-devel@lists.netfilter.org |
| Subject: |
| [PATCH] ipt_account rate patch |
| Date: |
| Fri, 16 Jun 2006 15:24:08 +0100 |
| Archive-link: |
| Article,
Thread
|
Here are some patches I did for ipt_account before ipt_ACCOUNT which
supports larger subnets was re-released.
As well as per-ip packet and byte counting, the patches also does per-ip
rate calculating, so you can use this as part of a rate limiting rule
and drop packets that would exceed the rate.
You get to choose whether or not dropped packets should also be accounted.
Just as ipt_account supports total-subnet accounting, so we also support
total-subnet rate calculations as well.
The rate calculations are an improved version of what is in
ip_conntrack_rate.c, namely we can handle the timer wraps around zero.
Sam
--- extensions/libipt_account.c.rate 2006-05-16 11:14:35.000000000 +0100
+++ extensions/libipt_account.c 2006-05-16 15:23:38.000000000 +0100
@@ -34,14 +34,50 @@
"--ashort\n"
" table will colect only short statistics (only total counters\n"
" without splitting it into protocols.\n"
+ "--non-subnet\n"
+ " Traffic whose src or dst ip is not in the account subnet\n"
+ " should be counted as matching, even though no rate tests\n"
+ " can be done.\n"
+ "--above\n"
+ " Matches if any of the ranges are exceeded\n"
+ "--below\n"
+ " (Default) Matches if none of the ranges are exceeded\n"
+ "[!] --src-rate rate\n"
+ " TRUE if IP-specific rate on packets from that subnets [not] in range\n"
+ "[!] --src-subnet-rate\n"
+ " TRUE if rate on all packets from that subnets [not] in range\n"
+ "[!] --dst-rate rate\n"
+ " TRUE if IP-specific rate on packets to that subnets [not] in range\n"
+ "[!] --dst-subnet-rate rate\n"
+ " TRUE if rate on all packets to that subnets [not] in range\n"
+ "--ignore-src-over\n"
+ " Don't count packets if rate is exceeded.\n"
+ " Perhaps Use this if you intend to drop such packets.\n"
+ "--ignore-dst-over\n"
+ " Don't count packets if rate is exceeded.\n"
+ " Perhaps Use this if you intend to drop such packets.\n"
+ "--check-only\n"
+ " Don't count packets at all, just check decayed rate to\n"
+ " calculate if the packet would have been dropped.\n"
,
IPTABLES_VERSION);
};
static struct option opts[] = {
- { .name = "aaddr", .has_arg = 1, .flag = NULL, .val = 201 },
- { .name = "aname", .has_arg = 1, .flag = NULL, .val = 202 },
- { .name = "ashort", .has_arg = 0, .flag = NULL, .val = 203 },
+ { .name = "aaddr", .has_arg = 1, .flag = NULL, .val = 201 },
+ { .name = "aname", .has_arg = 1, .flag = NULL, .val = 202 },
+ { .name = "ashort", .has_arg = 0, .flag = NULL, .val = 203 },
+ { .name = "src-rate", .has_arg = 1, .flag = NULL, .val = 204 },
+ { .name = "src-subnet-rate", .has_arg = 1, .flag = NULL, .val = 205 },
+ { .name = "dst-rate", .has_arg = 1, .flag = NULL, .val = 206 },
+ { .name = "dst-subnet-rate", .has_arg = 1, .flag = NULL, .val = 207 },
+ { .name = "ignore-src-over", .has_arg = 0, .flag = NULL, .val = 208 },
+ { .name = "ignore-dst-over", .has_arg = 0, .flag = NULL, .val = 209 },
+ { .name = "check-only", .has_arg = 0, .flag = NULL, .val = 210 },
+ { .name = "not", .has_arg = 0, .flag = NULL, .val = 211 },
+ { .name = "above", .has_arg = 0, .flag = NULL, .val = 211 },
+ { .name = "below", .has_arg = 0, .flag = NULL, .val = 212 },
+ { .name = "non-subnet", .has_arg = 0, .flag = NULL, .val = 213 },
{ .name = 0, .has_arg = 0, .flag = 0, .val = 0 }
};
@@ -206,6 +242,39 @@
case 203:
info->shortlisting = 1;
break;
+ case 204:
+ info->src_rate=atoi(optarg);
+ if (invert) info->accounting|=IPT_ACCOUNT_INVERT_src_RATE;
+ break;
+ case 205:
+ info->src_subnet_rate=atoi(optarg);
+ if (invert) info->accounting|=IPT_ACCOUNT_INVERT_src_SUBNET_RATE;
+ break;
+ case 206:
+ info->dest_rate=atoi(optarg);
+ if (invert) info->accounting|=IPT_ACCOUNT_INVERT_dest_RATE;
+ break;
+ case 207:
+ info->dest_subnet_rate=atoi(optarg);
+ if (invert) info->accounting|=IPT_ACCOUNT_INVERT_dest_SUBNET_RATE;
+ break;
+ case 208:
+ info->accounting|=IPT_ACCOUNT_src_OVER;
+ break;
+ case 209:
+ info->accounting|=IPT_ACCOUNT_dest_OVER;
+ break;
+ case 210:
+ info->accounting|=IPT_ACCOUNT_CHECKONLY;
+ break;
+ case 211:
+ info->accounting|=IPT_ACCOUNT_INVERT;
+ break;
+ case 212: /* we default to below */
+ break;
+ case 213:
+ info->accounting|=IPT_ACCOUNT_INVERT_NAME;
+ break;
default:
return 0;
}
@@ -235,6 +304,30 @@
printf("name: %s ", info->name);
if (info->shortlisting)
printf("short-listing ");
+
+ if ((info->accounting & IPT_ACCOUNT_INVERT) && (info->src_rate || info->src_subnet_rate ||
info->dest_rate || info->dest_subnet_rate))
+ printf("NOT ");
+ printf("(");
+ if (info->src_rate)
+ printf("src-rate: %c %u B/s ",(info->accounting &
IPT_ACCOUNT_INVERT_src_RATE)?'>':'<',info->src_rate);
+
+ if (info->src_subnet_rate)
+ printf("src-subnet-rate: %c %u B/s ",(info->accounting &
IPT_ACCOUNT_INVERT_src_SUBNET_RATE)?'>':'<',info->src_subnet_rate);
+
+ if (info->dest_rate)
+ printf("dst-rate: %c %u B/s ",(info->accounting &
IPT_ACCOUNT_INVERT_dest_RATE)?'>':'<',info->dest_rate);
+
+ if (info->dest_subnet_rate)
+ printf("dst-subnet-rate: %c %u B/s ",(info->accounting &
IPT_ACCOUNT_INVERT_dest_SUBNET_RATE)?'>':'<',info->dest_subnet_rate);
+ printf(") ");
+ if (info->accounting & IPT_ACCOUNT_src_OVER)
+ printf("ignore-src-over ");
+
+ if (info->accounting & IPT_ACCOUNT_dest_OVER)
+ printf("ignore-dst-over ");
+
+ if (info->accounting & IPT_ACCOUNT_CHECKONLY)
+ printf("check-only ");
}
/* Function used for saving rule containing account match */
@@ -252,6 +345,41 @@
printf("--aname %s ", info->name);
if (info->shortlisting)
printf("--ashort ");
+
+ if ((info->accounting & IPT_ACCOUNT_INVERT_NAME))
+ printf("--non-subnet ");
+
+ if ((info->accounting & IPT_ACCOUNT_INVERT) && (info->src_rate || info->src_subnet_rate ||
info->dest_rate || info->dest_subnet_rate))
+ printf("--above ");
+
+ if (info->src_rate) {
+ if (info->accounting & IPT_ACCOUNT_INVERT_src_RATE) printf("! ");
+ printf("--src-rate %u ",info->src_rate);
+ }
+
+ if (info->src_subnet_rate) {
+ if (info->accounting & IPT_ACCOUNT_INVERT_src_SUBNET_RATE) printf("! ");
+ printf("--src-subnet-rate %u ",info->src_subnet_rate);
+ }
+
+ if (info->dest_rate) {
+ if (info->accounting & IPT_ACCOUNT_INVERT_dest_RATE) printf("! ");
+ printf("--dst-rate %d ",info->dest_rate);
+ }
+
+ if (info->dest_subnet_rate) {
+ if (info->accounting & IPT_ACCOUNT_INVERT_dest_SUBNET_RATE) printf("! ");
+ printf("--dst-subnet-rate %d ",info->dest_subnet_rate);
+ }
+
+ if (info->accounting & IPT_ACCOUNT_src_OVER)
+ printf("--ignore-src-over ");
+
+ if (info->accounting & IPT_ACCOUNT_dest_OVER)
+ printf("--ignore-dst-over ");
+
+ if (info->accounting & IPT_ACCOUNT_CHECKONLY)
+ printf("--check-only ");
}
static struct iptables_match account = {
--- net/ipv4/netfilter/ipt_account.c 2006-06-16 14:12:09.000000000 +0100
+++ net/ipv4/netfilter/ipt_account.c 2006-05-23 13:04:41.000000000 +0100
@@ -1,14 +1,20 @@
/*
* accounting match (ipt_account.c)
* (C) 2003,2004 by Piotr Gasidlo (quaker@barbara.eu.org)
+ * (C) 2006 by UFO Mechanic <azez@ufomechanic.net>
+ * added rate calculations based on connrate, ip_conntrack_rate.c
+ * - which is Copyright (c) 2004 Nuutti Kotivuori <naked@iki.fi>
*
* Version: 0.1.7
*
* This software is distributed under the terms of GNU GPL
*/
+#include <linux/config.h>
#include <linux/module.h>
+#include <linux/types.h>
#include <linux/skbuff.h>
+#include <linux/jiffies.h>
#include <linux/proc_fs.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
@@ -33,7 +39,7 @@
#endif
static char version[] =
-KERN_INFO IPT_ACCOUNT_NAME " " IPT_ACCOUNT_VERSION " : Piotr Gasid³o <quaker@barbara.eu.org>,
http://www.barbara.eu.org/~quaker/ipt_account/\n";
+KERN_INFO IPT_ACCOUNT_NAME " " IPT_ACCOUNT_VERSION " : Piotr Gasid³o <quaker@barbara.eu.org>,
http://www.barbara.eu.org/~quaker/ipt_account/\n";
/* rights for files created in /proc/net/ipt_account/ */
static int permissions = 0644;
@@ -49,31 +55,118 @@
MODULE_PARM_DESC(permissions,"permissions on /proc/net/ipt_account/* files");
MODULE_PARM_DESC(netmask, "maximum *save* size of one list (netmask)");
+/* Notes from: ip_conntrack_rate.c by Nuutti Kotivuori <naked@iki.fi>
+ I wanted to build a simpler and more robust rate estimator than the
+ one used in sched/estimator.c. After evaluating a few choices I
+ settled with the one given in an example in [RFC2859], which is the
+ rate estimator described in [TON98].
+
+ I will copy the example table from [RFC2859] here:
+
+========================================================================
+|Initially: |
+| |
+| AVG_INTERVAL = a constant; |
+| avg-rate = CTR; |
+| t-front = 0; |
+| |
+|Upon each packet's arrival, the rate estimator updates its variables: |
+| |
+| Bytes_in_win = avg-rate * AVG_INTERVAL; |
+| New_bytes = Bytes_in_win + pkt_size; |
+| avg-rate = New_bytes/( now - t-front + AVG_INTERVAL); |
+| t-front = now; |
+| |
+|Where: |
+| now = The time of the current packet arrival |
+| pkt_size = The packet size in bytes of the arriving packet |
+| avg-rate = Measured Arrival Rate of traffic stream |
+| AVG_INTERVAL = Time window over which history is kept |
+| |
+| |
+| Figure 2. Example Rate Estimator Algorithm |
+| |
+========================================================================
+
+ Additionally we have to be concerned about overflows, remainders
+ and resolution in the algorithm. These are documented in the code
+ below.
+
+ References:
+
+ [RFC2859] W. Fang, N. Seddigh and B. Nandy, "A Time Sliding Window
+ Three Colour Marker (TSWTCM)", RFC 2859, June 2000.
+
+ [TON98] D.D. Clark, W. Fang, "Explicit Allocation of Best Effort
+ Packet Delivery Service", IEEE/ACM Transactions on
+ Networking, August 1998, Vol 6. No. 4, pp. 362-373.
+*/
+
+/* There are three important limits which need to be explored: maximum
+ expressable rate, minimum expressable rate, minimum packet size to
+ be countable.
+
+ Maximum expressable rate depends on the size of the window and the
+ scale we have chosen. It is approximately 2^32 / window /
+ scale. For example with a window of 3 seconds and a scale of 100,
+ the maximum rate is 14 megabytes per second, eg. 115Mbit/s.
+
+ Minimum expressable rate depends on scale and the HZ on the
+ architecture. It is HZ / scale. For example on most platforms where
+ HZ is now 1000, this is 10 bytes per second, eg. 0.08kbit/s.
+
+ Minimum packet size to be countable depends on the window size,
+ scale and HZ. This is basically the smallest packet that when
+ arriving immediately after the previous packet can cause the
+ average rate to rise from zero to one. It is (HZ * window) /
+ scale. For example with a window of 3 seconds, a scale of 100 and a
+ HZ of 1000, this would be 30. That is, a continuous stream of
+ packets less than 30 bytes long would not be able to rise the rate
+ above zero.
+
+ These limitations are a simple consequence of the current
+ implementation using integer arithmetics. */
+
+/* Maximum number of tokens in total that we can have in a window is
+ limited by the range of the u_int32_t datatype. We prevent the
+ overflow of this by first calculating the maximum amount of tokens
+ a single packet can add and substracting that from the maximum
+ value the window can get. */
+#define MAX_PACKET_IN_TOKENS (0x0000ffff * IP_CONNTRACK_RATE_SCALE)
+#define MAX_TOKENS_IN_WINDOW (0xffffffff - MAX_PACKET_IN_TOKENS)
+
+/* estimation interval, in jiffies */
+#define IP_CONNTRACK_RATE_INTERVAL (3 * HZ)
+
+/* scale on how many tokens per byte to generate */
+#define IP_CONNTRACK_RATE_SCALE 100
+
/* structure with statistics counters */
struct t_ipt_account_stat {
u_int64_t b_all, b_tcp, b_udp, b_icmp, b_other; /* byte counters for all/tcp/udp/icmp/other
traffic */
u_int64_t p_all, p_tcp, p_udp, p_icmp, p_other; /* packet counters for all/tcp/udp/icmp/other
traffic */
+ u_int32_t avgrate; /* rate data for all traffic, tokens per jiffy */
+ unsigned long time; /* time when this record was last updated */
};
/* stucture with statistics counters, used when table is created with --ashort switch */
struct t_ipt_account_stat_short {
u_int64_t b_all; /* byte counters for all traffic */
u_int64_t p_all; /* packet counters for all traffic */
+ u_int32_t avgrate; /* rate data for all traffic, tokens per jiffy */
+ unsigned long time; /* time when this record was last updated */
};
/* structure holding to/from statistics for single ip */
struct t_ipt_account_ip_list {
struct t_ipt_account_stat src;
struct t_ipt_account_stat dest;
- unsigned long time; /* time when this record was last updated */
-
};
/* same as above, for tables with --ashort switch */
struct t_ipt_account_ip_list_short {
struct t_ipt_account_stat_short src;
struct t_ipt_account_stat_short dest;
- unsigned long time;
};
/* structure describing single table */
@@ -101,6 +194,44 @@
/* root pointer holding list of the tables */
static struct t_ipt_account_table *account_tables = NULL;
+/* rate calculations taken from conntrack_rate_count in ip_conntrack_rate.c */
+/* NOTE: this is not bits/second tokens/jiffy, use get_rate to convert */
+static inline u_int32_t
+calc_new_rate(u_int32_t avgrate, unsigned int len, unsigned long then, unsigned long now)
+{
+ u_int32_t new_bytes;
+ u_int32_t interval;
+
+ new_bytes = (avgrate * IP_CONNTRACK_RATE_INTERVAL +
+ len * IP_CONNTRACK_RATE_SCALE);
+ if(new_bytes > MAX_TOKENS_IN_WINDOW)
+ new_bytes = MAX_TOKENS_IN_WINDOW;
+
+ /* if timer wrapped back past zero then interval is distance each side of zero */
+ if(time_before(now,then)) interval=now+(MAX_JIFFY_OFFSET-then);
+ else interval=now-then;
+
+ return new_bytes / (interval + IP_CONNTRACK_RATE_INTERVAL);
+}
+
+/* do_rate_count taken from conntrack_rate_count in ip_conntrack_rate.c */
+/* account struct must already be locked for us */
+static inline void
+do_rate_count(u_int32_t *avgrate, unsigned int len, unsigned long then, unsigned long now)
+{
+ *avgrate = calc_new_rate(*avgrate, len, then, now);
+}
+
+/* do rate conversions from tokens per jiffy to bytes per second */
+static inline u_int32_t rate_get(u_int32_t rate) {
+ return rate * HZ / IP_CONNTRACK_RATE_SCALE;
+}
+
+/* use new timestamp, return in b/second */
+static inline u_int32_t rate_now_get(u_int32_t rate, unsigned long then, unsigned long now) {
+ return calc_new_rate(rate,0,then,now) * HZ / IP_CONNTRACK_RATE_SCALE;
+}
+
/* convert ascii to ip */
int atoip(char *buffer, u_int32_t *ip) {
@@ -355,7 +486,8 @@
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n");
spin_lock_bh(&table->ip_list_lock);
/* update counters, do not overwrite time field */
- memcpy(&table->ip_list.l[ip - table->network], &l, sizeof(struct t_ipt_account_ip_list) -
sizeof(unsigned long));
+ memcpy(&table->ip_list.l[ip - table->network].src, &l.src, sizeof(l.src) - sizeof(unsigned
long));
+ memcpy(&table->ip_list.l[ip - table->network].dest, &l.dest, sizeof(l.dest) - sizeof(unsigned
long));
spin_unlock_bh(&table->ip_list_lock);
} else {
memset(&s, 0, sizeof(struct t_ipt_account_ip_list_short));
@@ -436,7 +568,10 @@
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": account_seq_write() updating row.\n");
spin_lock_bh(&table->ip_list_lock);
/* update counters, do not overwrite time field */
- memcpy(&table->ip_list.s[ip - table->network], &s, sizeof(struct t_ipt_account_ip_list_short) -
sizeof(unsigned long));
+ /* Sam - I don't like this implied knowledge that time is the last item in the struct
+ * or that the implied knowledge of the packed-ness of the struct */
+ memcpy(&table->ip_list.s[ip - table->network].src, &s.src, sizeof(s.src) - sizeof(unsigned
long));
+ memcpy(&table->ip_list.s[ip - table->network].dest, &s.dest, sizeof(s.dest) - sizeof(unsigned
long));
spin_unlock_bh(&table->ip_list_lock);
}
}
@@ -453,12 +588,15 @@
unsigned int *bucket = (unsigned int *)v;
u_int32_t address = table->network + *bucket;
+ unsigned long now = jiffies;
struct timespec last;
if (!table->shortlisting) {
- jiffies_to_timespec(jiffies - table->ip_list.l[*bucket].time, &last);
+ unsigned long src_then = table->ip_list.l[*bucket].src.time;
+ unsigned long dest_then = table->ip_list.l[*bucket].dest.time;
+ jiffies_to_timespec(min(now - src_then, now - dest_then), &last);
seq_printf(s,
- "ip = %u.%u.%u.%u bytes_src = %llu %llu %llu %llu %llu packets_src = %llu %llu %llu %llu %llu
bytes_dest = %llu %llu %llu %llu %llu packets_dest = %llu %llu %llu %llu %llu time = %lu\n",
+ "ip = %u.%u.%u.%u bytes_src = %llu %llu %llu %llu %llu packets_src = %llu %llu %llu %llu %llu
rate_src = %u bytes_dest = %llu %llu %llu %llu %llu packets_dest = %llu %llu %llu %llu %llu
rate_dest = %u time = %lu\n",
HIPQUAD(address),
table->ip_list.l[*bucket].src.b_all,
table->ip_list.l[*bucket].src.b_tcp,
@@ -470,6 +608,7 @@
table->ip_list.l[*bucket].src.p_udp,
table->ip_list.l[*bucket].src.p_icmp,
table->ip_list.l[*bucket].src.p_other,
+ rate_now_get(table->ip_list.l[*bucket].src.avgrate,src_then,now),
table->ip_list.l[*bucket].dest.b_all,
table->ip_list.l[*bucket].dest.b_tcp,
table->ip_list.l[*bucket].dest.b_udp,
@@ -480,17 +619,22 @@
table->ip_list.l[*bucket].dest.p_udp,
table->ip_list.l[*bucket].dest.p_icmp,
table->ip_list.l[*bucket].dest.p_other,
+ rate_now_get(table->ip_list.l[*bucket].dest.avgrate,dest_then,now),
last.tv_sec
);
} else {
- jiffies_to_timespec(jiffies - table->ip_list.s[*bucket].time, &last);
+ unsigned long src_then = table->ip_list.s[*bucket].src.time;
+ unsigned long dest_then = table->ip_list.s[*bucket].dest.time;
+ jiffies_to_timespec(min(now - src_then, now - dest_then), &last);
seq_printf(s,
- "ip = %u.%u.%u.%u bytes_src = %llu packets_src = %llu bytes_dest = %llu packets_dest = %llu
time = %lu\n",
+ "ip = %u.%u.%u.%u bytes_src = %llu packets_src = %llu rate_src = %u bytes_dest = %llu
packets_dest = %llu rate_dest = %u time = %lu\n",
HIPQUAD(address),
table->ip_list.s[*bucket].src.b_all,
table->ip_list.s[*bucket].src.p_all,
+ rate_now_get(table->ip_list.s[*bucket].src.avgrate,src_then,now),
table->ip_list.s[*bucket].dest.b_all,
table->ip_list.s[*bucket].dest.p_all,
+ rate_now_get(table->ip_list.s[*bucket].dest.avgrate,dest_then,now),
last.tv_sec
);
}
@@ -530,7 +674,7 @@
/* update packet & bytes counters in *stat structure */
stat->b_all += skb->len;
stat->p_all++;
-
+
switch (skb->nh.iph->protocol) {
case IPPROTO_TCP:
stat->b_tcp += skb->len;
@@ -548,6 +692,7 @@
stat->b_other += skb->len;
stat->p_other++;
}
+
}
static inline void do_account_short(struct t_ipt_account_stat_short *stat, const struct sk_buff
*skb) {
@@ -557,6 +702,64 @@
stat->p_all++;
}
+/* This block of code would have been repeated 4 times but was factored to
+ * make a #parameterized #define block. The algorithm is kept simple clear
+ * and implemented accurately in each place */
+
+/* assemble proper mask name based on direction */
+#define IPT_ACCOUNT_RULE(DIRECTION) IPT_ACCOUNT_ ## DIRECTION ## _OVER
+/* DIRECTION = src/dest
+ * SL=s/l for short or long
+ * ADDRESS=src_address/dest_address
+ * SHORT=_short if short */
+#define DO_ACCOUNT(DIRECTION,SL,ADDRESS,SHORT) do { /* gives us scope as well as ; */ \
+ /* update counters this host */ \
+ int limit=0; \
+ u_int32_t ip_index=ADDRESS - table->network; \
+\
+ /* If we are limiting, calc what decayed rate would be by now */ \
+ if (info->DIRECTION ## _rate) { \
+ /* these rates are tokens/jiffy */ \
+ u_int32_t rate=rate_get(calc_new_rate(table->ip_list.SL[ip_index].DIRECTION.avgrate,0, \
+ table->ip_list.SL[ip_index].DIRECTION.time,now)); \
+ if (((info->accounting & IPT_ACCOUNT_INVERT_ ## DIRECTION ## _RATE) != 0) ^ \
+ (rate > info->DIRECTION ## _rate) ) { \
+ limit++; /* record failure */ \
+ } \
+ } \
+ /* If we are subnet limiting, calc what decayed rate would be by now */ \
+ if (info->DIRECTION ## _subnet_rate) { \
+ /* these rates are tokens/jiffy */ \
+ u_int32_t subnet_rate=rate_get(calc_new_rate(table->ip_list.SL[0].DIRECTION.avgrate,0, \
+ table->ip_list.SL[0].DIRECTION.time,now)); \
+ if (((info->accounting & IPT_ACCOUNT_INVERT_ ## DIRECTION ## _SUBNET_RATE) != 0) ^ \
+ (subnet_rate > info->DIRECTION ## _subnet_rate) ) { \
+ limit++; /* record failure */ \
+ } \
+ } \
+\
+ /* Now account for this data, unless over limit AND told not to count over-limit */ \
+ if (! limit || 0==(info->accounting & IPT_ACCOUNT_RULE(DIRECTION))) { \
+ do_account ## SHORT(&table->ip_list.SL[ip_index].DIRECTION, skb); \
+ do_rate_count(&table->ip_list.SL[ip_index].DIRECTION.avgrate, \
+ skb->len,table->ip_list.SL[ip_index].DIRECTION.time, now); \
+ table->ip_list.SL[ip_index].DIRECTION.time = now; \
+\
+ /* update also counters for all hosts in this table (network address) */ \
+ if (table->netmask != INADDR_BROADCAST) { \
+ do_account ## SHORT(&table->ip_list.SL[0].DIRECTION, skb); \
+ do_rate_count(&table->ip_list.SL[0].DIRECTION.avgrate, \
+ skb->len,table->ip_list.SL[0].DIRECTION.time, now); \
+ table->ip_list.SL[0].DIRECTION.time = now; \
+ } \
+ } \
+ failed+=limit; /* combine failures */ \
+} while(0)
+
+/* Change of semantics here when rate is added.
+ * Normally TRUE is returned if the packet is accounted in the subnet otherwise FALSE
+ * Now, if a rate arguments are passed then FALSE is returned if that rate check fails
+ */
static int match(const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -568,10 +771,18 @@
const struct t_ipt_account_info *info = (struct t_ipt_account_info*)matchinfo;
struct t_ipt_account_table *table;
int ret;
+ int failed=0;
+ int sets=0; /* count how many times we are found in the set, 0,1,2 */
unsigned long now;
- u_int32_t address;
-
+ /* Two complex cases:
+ * 1. the src and dest ip are in the same subnet - if so we must not update the subnet time in
src
+ * 2. the src and dest ip are the same - if so we must not update the ip time in src
+ * These cases require us to pre-calculate some values */
+
+ u_int32_t src_address;
+ u_int32_t dest_address;
+
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() entered.\n");
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() match name = %s.\n", info->name);
@@ -592,68 +803,60 @@
/* lock table while updating statistics */
spin_lock_bh(&table->ip_list_lock);
- /* default: no match */
+ /* default: false */
ret = 0;
/* get current time */
now = jiffies;
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() got packet src = %u.%u.%u.%u, dst = %u.%u.%u.%u,
proto = %u.\n", NIPQUAD(skb->nh.iph->saddr), NIPQUAD(skb->nh.iph->daddr), skb->nh.iph->protocol);
-
+
/* check whether traffic from source ip address ... */
- address = ntohl(skb->nh.iph->saddr);
+ src_address = ntohl(skb->nh.iph->saddr);
+ dest_address = ntohl(skb->nh.iph->daddr);
/* ... is being accounted by this table */
- if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) {
+ if (src_address && ((u_int32_t)(src_address & table->netmask) == (u_int32_t)table->network)) {
/* yes, account this packet */
- dprintk(KERN_INFO "ipt_account: match() accounting packet src = %u.%u.%u.%u, proto = %u.\n",
HIPQUAD(address), skb->nh.iph->protocol);
+ dprintk(KERN_INFO "ipt_account: match() accounting packet src = %u.%u.%u.%u, proto = %u.\n",
HIPQUAD(src_address), skb->nh.iph->protocol);
/* update counters this host */
if (!table->shortlisting) {
- do_account(&table->ip_list.l[address - table->network].src, skb);
- table->ip_list.l[address - table->network].time = now;
- /* update also counters for all hosts in this table (network address) */
- if (table->netmask != INADDR_BROADCAST) {
- do_account(&table->ip_list.l[0].src, skb);
- table->ip_list.l[0].time = now;
- }
+ DO_ACCOUNT(src,l,src_address,);
} else {
- do_account_short(&table->ip_list.s[address - table->network].src, skb);
- table->ip_list.s[address - table->network].time = now;
- /* update also counters for all hosts in this table (network address) */
- if (table->netmask != INADDR_BROADCAST) {
- do_account_short(&table->ip_list.s[0].src, skb);
- table->ip_list.s[0].time = now;
- }
+ DO_ACCOUNT(src,s,src_address,_short);
}
/* yes, it's a match */
- ret = 1;
+ sets ++;
}
/* do the same thing with destination ip address */
- address = ntohl(skb->nh.iph->daddr);
- if (address && ((u_int32_t)(address & table->netmask) == (u_int32_t)table->network)) {
- dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() accounting packet dst = %u.%u.%u.%u, proto =
%u.\n", HIPQUAD(address), skb->nh.iph->protocol);
+ if (dest_address && ((u_int32_t)(dest_address & table->netmask) == (u_int32_t)table->network)) {
+ dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() accounting packet dst = %u.%u.%u.%u, proto =
%u.\n", HIPQUAD(dest_address), skb->nh.iph->protocol);
if (!table->shortlisting) {
- do_account(&table->ip_list.l[address - table->network].dest, skb);
- table->ip_list.l[address - table->network].time = now;
- if (table->netmask != INADDR_BROADCAST) {
- do_account(&table->ip_list.l[0].dest, skb);
- table->ip_list.s[0].time = now;
- }
+ DO_ACCOUNT(dest,l,dest_address,);
} else {
- do_account_short(&table->ip_list.s[address - table->network].dest, skb);
- table->ip_list.s[address - table->network].time = now;
- if (table->netmask != INADDR_BROADCAST) {
- do_account_short(&table->ip_list.s[0].dest, skb);
- table->ip_list.s[0].time = now;
- }
+ DO_ACCOUNT(dest,s,dest_address,_short);
}
- ret = 1;
+ sets++;
}
spin_unlock_bh(&table->ip_list_lock);
dprintk(KERN_INFO IPT_ACCOUNT_NAME ": match() left.\n");
- return ret;
+ /* if sets is zero it means we did no rate tests */
+ if (sets) {
+ /* failed is >1 if any rate tests failed, now normalize this with IPT_ACCOUNT_INVERT */
+ failed=((failed!=0) ^ ((info->accounting & IPT_ACCOUNT_INVERT)!=0));
+ } else {
+/* let INVERT_NAME be whether no sets is a failure or not */
+ failed=((sets==0) ^ ((info->accounting & IPT_ACCOUNT_INVERT_NAME)!=0));
+ }
+
+ if (failed) {
+ return 0;
+ } else {
+ return 1;
+ }
+ return 1;
}
static int checkentry(const char *tablename,
--- include/linux/netfilter_ipv4/ipt_account.h 2005-07-08 13:11:25.000000000 +0100
+++ include/linux/netfilter_ipv4/ipt_account.h 2006-05-16 10:58:00.000000000 +0100
@@ -13,13 +13,32 @@
#define IPT_ACCOUNT_NAME_LEN 64
#define IPT_ACCOUNT_NAME "ipt_account"
-#define IPT_ACCOUNT_VERSION "0.1.7"
+#define IPT_ACCOUNT_VERSION "0.1.7-rate"
+
+/* Whether or not to account for packets which go over permitted rate
+ * If these are going to be dropped, it may be better not to count */
+
+#define IPT_ACCOUNT_src_OVER 1 << 0
+#define IPT_ACCOUNT_dest_OVER 1 << 1
+#define IPT_ACCOUNT_CHECKONLY 1 << 2
+#define IPT_ACCOUNT_INVERT_src_RATE 1 << 3
+#define IPT_ACCOUNT_INVERT_src_SUBNET_RATE 1 << 4
+#define IPT_ACCOUNT_INVERT_dest_RATE 1 << 5
+#define IPT_ACCOUNT_INVERT_dest_SUBNET_RATE 1 << 6
+/* this IPT_ACCOUNT_INVERT is the most useful, the other INVERT are for insane people */
+#define IPT_ACCOUNT_INVERT 1 << 7
+#define IPT_ACCOUNT_INVERT_NAME 1 << 8
struct t_ipt_account_info {
char name[IPT_ACCOUNT_NAME_LEN];
u_int32_t network;
u_int32_t netmask;
int shortlisting:1;
+ int accounting:9;
+ u_int32_t src_rate;
+ u_int32_t src_subnet_rate;
+ u_int32_t dest_rate;
+ u_int32_t dest_subnet_rate;
};
#endif