1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
|
diff --git a/include/linux/mroute.h b/include/linux/mroute.h
index 0d45b4e..406ef6f 100644
--- a/include/linux/mroute.h
+++ b/include/linux/mroute.h
@@ -33,7 +33,7 @@
#define SIOCGETSGCNT (SIOCPROTOPRIVATE+1)
#define SIOCGETRPF (SIOCPROTOPRIVATE+2)
-#define MAXVIFS 32
+#define MAXVIFS 256
typedef unsigned long vifbitmap_t; /* User mode code depends on this lot */
typedef unsigned short vifi_t;
#define ALL_VIFS ((vifi_t)(-1))
@@ -66,6 +66,7 @@ struct vifctl {
#define VIFF_TUNNEL 0x1 /* IPIP tunnel */
#define VIFF_SRCRT 0x2 /* NI */
#define VIFF_REGISTER 0x4 /* register vif */
+#define VIFF_NBMA 0x10
/*
* Cache manipulation structures for mrouted and PIMd
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 13e9dd3..43c988b 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -105,6 +105,31 @@ static struct net_protocol pim_protocol;
static struct timer_list ipmr_expire_timer;
+static __be32 ipmr_get_skb_nbma(struct sk_buff *skb)
+{
+ union {
+ char addr[MAX_ADDR_LEN];
+ __be32 inaddr;
+ } u;
+
+ if (dev_parse_header(skb, u.addr) != 4)
+ return INADDR_ANY;
+
+ return u.inaddr;
+}
+
+static int ip_mr_match_vif_skb(struct vif_device *vif, struct sk_buff *skb)
+{
+ if (vif->dev != skb->dev)
+ return 0;
+
+ if (vif->flags & VIFF_NBMA)
+ return ipmr_get_skb_nbma(skb) == vif->remote;
+
+ return 1;
+}
+
+
/* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
@@ -470,6 +495,7 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
return err;
}
break;
+ case VIFF_NBMA:
case 0:
dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
if (!dev)
@@ -504,7 +530,7 @@ static int vif_add(struct net *net, struct vifctl *vifc, int mrtsock)
v->pkt_in = 0;
v->pkt_out = 0;
v->link = dev->ifindex;
- if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
+ if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER|VIFF_NBMA))
v->link = dev->iflink;
/* And finish update writing critical data */
@@ -1212,12 +1238,15 @@ static inline int ipmr_forward_finish(struct sk_buff *skb)
{
struct ip_options * opt = &(IPCB(skb)->opt);
- IP_INC_STATS_BH(dev_net(skb->dst->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
+ IP_INC_STATS_BH(dev_net(skb->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
if (unlikely(opt->optlen))
ip_forward_options(skb);
- return dst_output(skb);
+ if (skb->dst != NULL)
+ return dst_output(skb);
+ else
+ return dev_queue_xmit(skb);
}
/*
@@ -1230,7 +1259,8 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
const struct iphdr *iph = ip_hdr(skb);
struct vif_device *vif = &net->ipv4.vif_table[vifi];
struct net_device *dev;
- struct rtable *rt;
+ struct net_device *fromdev = skb->dev;
+ struct rtable *rt = NULL;
int encap = 0;
if (vif->dev == NULL)
@@ -1257,6 +1287,19 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
if (ip_route_output_key(net, &rt, &fl))
goto out_free;
encap = sizeof(struct iphdr);
+ dev = rt->u.dst.dev;
+ } else if (vif->flags&VIFF_NBMA) {
+ /* Fixme, we should take tunnel source address from the
+ * tunnel device binding if it exists */
+ struct flowi fl = { .oif = vif->link,
+ .nl_u = { .ip4_u =
+ { .daddr = vif->remote,
+ .tos = RT_TOS(iph->tos) } },
+ .proto = IPPROTO_GRE };
+ if (ip_route_output_key(&init_net, &rt, &fl))
+ goto out_free;
+ encap = LL_RESERVED_SPACE(rt->u.dst.dev);
+ dev = vif->dev;
} else {
struct flowi fl = { .oif = vif->link,
.nl_u = { .ip4_u =
@@ -1265,34 +1308,39 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
.proto = IPPROTO_IPIP };
if (ip_route_output_key(net, &rt, &fl))
goto out_free;
+ dev = rt->u.dst.dev;
}
- dev = rt->u.dst.dev;
+ if (!(vif->flags & VIFF_NBMA)) {
+ if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+ /* Do not fragment multicasts. Alas, IPv4 does not
+ allow to send ICMP, so that packets will disappear
+ to blackhole.
+ */
- if (skb->len+encap > dst_mtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
- /* Do not fragment multicasts. Alas, IPv4 does not
- allow to send ICMP, so that packets will disappear
- to blackhole.
- */
-
- IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
- ip_rt_put(rt);
- goto out_free;
+ IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
+ goto out_free_rt;
+ }
}
encap += LL_RESERVED_SPACE(dev) + rt->u.dst.header_len;
- if (skb_cow(skb, encap)) {
- ip_rt_put(rt);
- goto out_free;
- }
+ if (skb_cow(skb, encap))
+ goto out_free_rt;
vif->pkt_out++;
vif->bytes_out += skb->len;
dst_release(skb->dst);
- skb->dst = &rt->u.dst;
+ if (vif->flags & VIFF_NBMA) {
+ ip_rt_put(rt);
+ skb->dst = NULL;
+ rt = NULL;
+ } else {
+ skb->dst = &rt->u.dst;
+ }
ip_decrease_ttl(ip_hdr(skb));
+ skb->dev = dev;
/* FIXME: forward and output firewalls used to be called here.
* What do we do with netfilter? -- RR */
@@ -1301,6 +1349,10 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
/* FIXME: extra output firewall step used to be here. --RR */
vif->dev->stats.tx_packets++;
vif->dev->stats.tx_bytes += skb->len;
+ } else if (vif->flags & VIFF_NBMA) {
+ if (dev_hard_header(skb, dev, ntohs(skb->protocol),
+ &vif->remote, NULL, 4) < 0)
+ goto out_free_rt;
}
IPCB(skb)->flags |= IPSKB_FORWARDED;
@@ -1316,21 +1368,30 @@ static void ipmr_queue_xmit(struct sk_buff *skb, struct mfc_cache *c, int vifi)
* not mrouter) cannot join to more than one interface - it will
* result in receiving multiple packets.
*/
- NF_HOOK(PF_INET, NF_INET_FORWARD, skb, skb->dev, dev,
+ NF_HOOK(PF_INET, NF_INET_FORWARD, skb, fromdev, dev,
ipmr_forward_finish);
return;
+out_free_rt:
+ if (rt != NULL)
+ ip_rt_put(rt);
out_free:
kfree_skb(skb);
return;
}
-static int ipmr_find_vif(struct net_device *dev)
+static int ipmr_find_vif(struct net_device *dev, __be32 nbma_origin)
{
struct net *net = dev_net(dev);
int ct;
for (ct = net->ipv4.maxvif-1; ct >= 0; ct--) {
- if (net->ipv4.vif_table[ct].dev == dev)
+ if (net->ipv4.vif_table[ct].dev != dev)
+ continue;
+
+ if (net->ipv4.vif_table[ct].flags & VIFF_NBMA) {
+ if (net->ipv4.vif_table[ct].remote == nbma_origin)
+ break;
+ } else if (nbma_origin == INADDR_ANY)
break;
}
return ct;
@@ -1351,7 +1412,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
/*
* Wrong interface: drop packet and (maybe) send PIM assert.
*/
- if (net->ipv4.vif_table[vif].dev != skb->dev) {
+ if (!ip_mr_match_vif_skb(&net->ipv4.vif_table[vif], skb)) {
int true_vifi;
if (skb->rtable->fl.iif == 0) {
@@ -1370,7 +1431,7 @@ static int ip_mr_forward(struct sk_buff *skb, struct mfc_cache *cache, int local
}
cache->mfc_un.res.wrong_if++;
- true_vifi = ipmr_find_vif(skb->dev);
+ true_vifi = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
if (true_vifi >= 0 && net->ipv4.mroute_do_assert &&
/* pimsm uses asserts, when switching from RPT to SPT,
@@ -1479,7 +1540,7 @@ int ip_mr_input(struct sk_buff *skb)
skb = skb2;
}
- vif = ipmr_find_vif(skb->dev);
+ vif = ipmr_find_vif(skb->dev, ipmr_get_skb_nbma(skb));
if (vif >= 0) {
int err = ipmr_cache_unresolved(net, vif, skb);
read_unlock(&mrt_lock);
@@ -1663,7 +1724,7 @@ int ipmr_get_route(struct net *net,
}
dev = skb->dev;
- if (dev == NULL || (vif = ipmr_find_vif(dev)) < 0) {
+ if (dev == NULL || (vif = ipmr_find_vif(dev, INADDR_ANY)) < 0) {
read_unlock(&mrt_lock);
return -ENODEV;
}
|