--- v2.4.16/linux/include/net/ip_fib.h Tue Nov 13 01:24:05 2001 +++ linux/include/net/ip_fib.h Fri Dec 14 00:03:49 2001 @@ -162,7 +162,8 @@ static inline void fib_select_default(const struct rt_key *key, struct fib_result *res) { - if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) + if ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) || + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST) main_table->tb_select_default(main_table, key, res); } @@ -174,6 +175,7 @@ extern int fib_lookup(const struct rt_key *key, struct fib_result *res); extern struct fib_table *__fib_new_table(int id); extern void fib_rule_put(struct fib_rule *r); +extern __inline__ int fib_result_table(struct fib_result *res); static inline struct fib_table *fib_get_table(int id) { --- v2.4.16/linux/include/net/route.h Tue Nov 13 01:30:31 2001 +++ linux/include/net/route.h Thu Dec 13 23:57:33 2001 @@ -48,6 +48,8 @@ { __u32 dst; __u32 src; + __u32 lsrc; + __u32 gw; int iif; int oif; #ifdef CONFIG_IP_ROUTE_FWMARK @@ -120,7 +122,7 @@ extern void ip_rt_advice(struct rtable **rp, int advice); extern void rt_cache_flush(int how); extern int ip_route_output_key(struct rtable **, const struct rt_key *key); -extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); +extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin, u32 lsrc); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); @@ -133,9 +135,10 @@ /* Deprecated: use ip_route_output_key directly */ static inline int ip_route_output(struct rtable **rp, - u32 daddr, u32 saddr, u32 tos, int oif) + u32 daddr, u32 saddr, u32 tos, int oif, u32 gw) { - struct rt_key key = { dst:daddr, src:saddr, oif:oif, tos:tos }; + struct rt_key key = { dst:daddr, src:saddr, gw:gw, + oif:oif, tos:tos }; return ip_route_output_key(rp, &key); } @@ -159,14 +162,14 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif) { int err; - err = ip_route_output(rp, dst, src, tos, oif); + err = ip_route_output(rp, dst, src, tos, oif, 0); if (err || (dst && src)) return err; dst = (*rp)->rt_dst; src = (*rp)->rt_src; ip_rt_put(*rp); *rp = NULL; - return ip_route_output(rp, dst, src, tos, oif); + return ip_route_output(rp, dst, src, tos, oif, 0); } extern void rt_bind_peer(struct rtable *rt, int create); --- v2.4.16/linux/include/linux/netfilter_ipv4/ip_nat.h Tue Nov 13 01:25:22 2001 +++ linux/include/linux/netfilter_ipv4/ip_nat.h Fri Dec 14 00:09:07 2001 @@ -124,5 +124,13 @@ extern u_int16_t ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck); + +/* Call input routing for SNAT-ed traffic */ +extern unsigned int ip_nat_route_input(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)); + #endif /*__KERNEL__*/ #endif --- v2.4.16/linux/include/linux/rtnetlink.h Tue Nov 13 01:27:21 2001 +++ linux/include/linux/rtnetlink.h Thu Dec 13 23:57:24 2001 @@ -228,6 +228,8 @@ #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_SUSPECT 8 /* We don't know the real state */ +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT) /* Macros to handle hexthops */ --- v2.4.16/linux/net/atm/clip.c Tue Nov 13 01:26:34 2001 +++ linux/net/atm/clip.c Thu Dec 13 23:35:50 2001 @@ -525,7 +525,7 @@ unlink_clip_vcc(clip_vcc); return 0; } - error = ip_route_output(&rt,ip,0,1,0); + error = ip_route_output(&rt,ip,0,1,0,0); if (error) return error; neigh = __neigh_lookup(&clip_tbl,&ip,rt->u.dst.dev,1); ip_rt_put(rt); --- v2.4.16/linux/net/ipv4/arp.c Tue Nov 13 01:28:51 2001 +++ linux/net/ipv4/arp.c Thu Dec 13 23:35:50 2001 @@ -316,16 +316,19 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) { + struct rtable *rt; u32 saddr; u8 *dst_ha = NULL; struct net_device *dev = neigh->dev; u32 target = *(u32*)neigh->primary_key; int probes = atomic_read(&neigh->probes); - if (skb && inet_addr_type(skb->nh.iph->saddr) == RTN_LOCAL) - saddr = skb->nh.iph->saddr; - else - saddr = inet_select_addr(dev, target, RT_SCOPE_LINK); + if (ip_route_output(&rt, target, 0, 0, dev->ifindex, 0) < 0) + return; + saddr = rt->rt_src; + ip_rt_put(rt); + if (!saddr) + return; if ((probes -= neigh->parms->ucast_probes) < 0) { if (!(neigh->nud_state&NUD_VALID)) @@ -351,7 +354,7 @@ int flag = 0; /*unsigned long now; */ - if (ip_route_output(&rt, sip, tip, 0, 0) < 0) + if (ip_route_output(&rt, sip, tip, 0, 0, 0) < 0) return 1; if (rt->u.dst.dev != dev) { NET_INC_STATS_BH(ArpFilter); @@ -748,7 +751,7 @@ } if (arp->ar_op == __constant_htons(ARPOP_REQUEST) && - ip_route_input(skb, tip, sip, 0, dev) == 0) { + ip_route_input(skb, tip, sip, 0, dev, 0) == 0) { rt = (struct rtable*)skb->dst; addr_type = rt->rt_type; @@ -878,7 +881,7 @@ r->arp_flags |= ATF_COM; if (dev == NULL) { struct rtable * rt; - if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0) + if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0, 0)) != 0) return err; dev = rt->u.dst.dev; ip_rt_put(rt); @@ -961,7 +964,7 @@ if (dev == NULL) { struct rtable * rt; - if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0) + if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0, 0)) != 0) return err; dev = rt->u.dst.dev; ip_rt_put(rt); --- v2.4.16/linux/net/ipv4/fib_frontend.c Tue Nov 13 01:26:34 2001 +++ linux/net/ipv4/fib_frontend.c Thu Dec 13 23:35:50 2001 @@ -54,6 +54,8 @@ struct fib_table *local_table; struct fib_table *main_table; +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN) + #else #define RT_TABLE_MIN 1 @@ -71,6 +73,7 @@ return tb; } +#define FIB_RES_TABLE(r) (fib_result_table(r)) #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -209,6 +212,9 @@ struct in_device *in_dev; struct rt_key key; struct fib_result res; + int table; + unsigned char prefixlen; + unsigned char scope; int no_addr, rpf; int ret; @@ -216,6 +222,7 @@ key.src = dst; key.tos = tos; key.oif = 0; + key.gw = 0; key.iif = oif; key.scope = RT_SCOPE_UNIVERSE; @@ -237,31 +244,35 @@ goto e_inval_res; *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) -#else if (FIB_RES_DEV(res) == dev) -#endif { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; fib_res_put(&res); return ret; } + table = FIB_RES_TABLE(&res); + prefixlen = res.prefixlen; + scope = res.scope; fib_res_put(&res); if (no_addr) goto last_resort; - if (rpf) - goto e_inval; key.oif = dev->ifindex; ret = 0; if (fib_lookup(&key, &res) == 0) { - if (res.type == RTN_UNICAST) { + if (res.type == RTN_UNICAST && + ((table == FIB_RES_TABLE(&res) && + res.prefixlen >= prefixlen && res.scope >= scope) || + !rpf)) { *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; + fib_res_put(&res); + return ret; } fib_res_put(&res); } + if (rpf) + goto e_inval; return ret; last_resort: @@ -583,6 +594,8 @@ switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); + if (ifa->ifa_dev && ifa->ifa_dev->dev) + fib_sync_up(ifa->ifa_dev->dev); rt_cache_flush(-1); break; case NETDEV_DOWN: @@ -613,9 +626,7 @@ for_ifa(in_dev) { fib_add_ifaddr(ifa); } endfor_ifa(in_dev); -#ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); -#endif rt_cache_flush(-1); break; case NETDEV_DOWN: --- v2.4.16/linux/net/ipv4/fib_hash.c Fri Jun 2 07:22:18 2000 +++ linux/net/ipv4/fib_hash.c Thu Dec 13 23:35:50 2001 @@ -71,6 +71,7 @@ struct fib_info *fn_info; #define FIB_INFO(f) ((f)->fn_info) fn_key_t fn_key; + int fn_last_dflt; u8 fn_tos; u8 fn_type; u8 fn_scope; @@ -312,72 +313,113 @@ return err; } -static int fn_hash_last_dflt=-1; - -static int fib_detect_death(struct fib_info *fi, int order, - struct fib_info **last_resort, int *last_idx) +static int fib_detect_death(struct fib_info *fi, int order, int last_dflt, + struct fib_info **last_resort, int *last_idx, + int *last_nhsel, const struct rt_key *key) { struct neighbour *n; - int state = NUD_NONE; + int nhsel; + int state; + struct fib_nh * nh; + u32 dst; + int dead = 1; + + /* change_nexthops(fi) { */ + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { + if (key->oif && key->oif != nh->nh_oif) + continue; + if (key->gw && key->gw != nh->nh_gw && nh->nh_gw) + continue; + if (nh->nh_flags & RTNH_F_DEAD) + continue; + + nh->nh_flags &= ~RTNH_F_SUSPECT; + if (nh->nh_dev->flags & IFF_NOARP) { + dead = 0; + continue; + } - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); - if (n) { - state = n->nud_state; - neigh_release(n); - } - if (state==NUD_REACHABLE) - return 0; - if ((state&NUD_VALID) && order != fn_hash_last_dflt) - return 0; - if ((state&NUD_VALID) || - (*last_idx<0 && order > fn_hash_last_dflt)) { - *last_resort = fi; - *last_idx = order; + dst = nh->nh_gw; + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK) + dst = key->dst; + + state = NUD_NONE; + n = neigh_lookup(&arp_tbl, &dst, nh->nh_dev); + if (n) { + state = n->nud_state; + neigh_release(n); + } + if (state==NUD_REACHABLE || + ((state&NUD_VALID) && order != last_dflt)) { + dead = 0; + continue; + } + if (!(state&NUD_VALID)) { + nh->nh_flags |= RTNH_F_SUSPECT; + } + if (!dead) continue; + if ((state&NUD_VALID) || + (*last_idx<0 && order >= last_dflt)) { + *last_resort = fi; + *last_idx = order; + *last_nhsel = nhsel; + } } - return 1; + /* } endfor_nexthops(fi) */ + + return dead; } static void fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res) { - int order, last_idx; - struct fib_node *f; + int order, last_idx, last_dflt, last_nhsel; + struct fib_node *f, *first_node; struct fib_info *fi = NULL; struct fib_info *last_resort; struct fn_hash *t = (struct fn_hash*)tb->tb_data; - struct fn_zone *fz = t->fn_zones[0]; + struct fn_zone *fz = t->fn_zones[res->prefixlen]; + fn_key_t k; if (fz == NULL) return; + k = fz_key(key->dst, fz); + last_dflt = -2; + first_node = NULL; last_idx = -1; last_resort = NULL; + last_nhsel = 0; order = -1; read_lock(&fib_hash_lock); - for (f = fz->fz_hash[0]; f; f = f->fn_next) { + for (f = fz_chain(k, fz); f; f = f->fn_next) { struct fib_info *next_fi = FIB_INFO(f); - if ((f->fn_state&FN_S_ZOMBIE) || + if (!fn_key_eq(k, f->fn_key) || + (f->fn_state&FN_S_ZOMBIE) || f->fn_scope != res->scope || +#ifdef CONFIG_IP_ROUTE_TOS + (f->fn_tos && f->fn_tos != key->tos) || +#endif f->fn_type != RTN_UNICAST) continue; if (next_fi->fib_priority > res->fi->fib_priority) break; - if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; f->fn_state |= FN_S_ACCESSED; - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) { + if (!first_node) { + last_dflt = f->fn_last_dflt; + first_node = f; + } + if (fi && !fib_detect_death(fi, order, last_dflt, + &last_resort, &last_idx, &last_nhsel, key)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + first_node->fn_last_dflt = order; goto out; } fi = next_fi; @@ -385,16 +427,23 @@ } if (order<=0 || fi==NULL) { - fn_hash_last_dflt = -1; + if (fi && fi->fib_nhs > 1 && + fib_detect_death(fi, order, last_dflt, + &last_resort, &last_idx, &last_nhsel, key) && + last_resort == fi) { + fi->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; + } + if (first_node) first_node->fn_last_dflt = -1; goto out; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx)) { + if (!fib_detect_death(fi, order, last_dflt, &last_resort, &last_idx, + &last_nhsel, key)) { if (res->fi) fib_info_put(res->fi); res->fi = fi; atomic_inc(&fi->fib_clntref); - fn_hash_last_dflt = order; + first_node->fn_last_dflt = order; goto out; } @@ -404,8 +453,9 @@ res->fi = last_resort; if (last_resort) atomic_inc(&last_resort->fib_clntref); + last_resort->fib_nh[last_nhsel].nh_flags &= ~RTNH_F_SUSPECT; + first_node->fn_last_dflt = last_idx; } - fn_hash_last_dflt = last_idx; out: read_unlock(&fib_hash_lock); } @@ -572,6 +622,7 @@ memset(new_f, 0, sizeof(struct fib_node)); + new_f->fn_last_dflt = -1; new_f->fn_key = key; #ifdef CONFIG_IP_ROUTE_TOS new_f->fn_tos = tos; --- v2.4.16/linux/net/ipv4/fib_rules.c Tue Nov 13 01:25:53 2001 +++ linux/net/ipv4/fib_rules.c Thu Dec 13 23:34:13 2001 @@ -307,6 +307,11 @@ } } +int fib_result_table(struct fib_result *res) +{ + return res->r->r_table; +} + int fib_lookup(const struct rt_key *key, struct fib_result *res) { int err; @@ -372,7 +377,8 @@ void fib_select_default(const struct rt_key *key, struct fib_result *res) { if (res->r && res->r->r_action == RTN_UNICAST && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + ((FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) || + FIB_RES_NH(*res).nh_scope == RT_SCOPE_HOST)) { struct fib_table *tb; if ((tb = fib_get_table(res->r->r_table)) != NULL) tb->tb_select_default(tb, key, res); --- v2.4.16/linux/net/ipv4/fib_semantics.c Tue Oct 17 20:43:14 2000 +++ linux/net/ipv4/fib_semantics.c Thu Dec 13 23:35:50 2001 @@ -148,7 +148,7 @@ #ifdef CONFIG_NET_CLS_ROUTE nh->nh_tclassid != onh->nh_tclassid || #endif - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE)) return -1; onh++; } endfor_nexthops(fi); @@ -164,7 +164,7 @@ nfi->fib_prefsrc == fi->fib_prefsrc && nfi->fib_priority == fi->fib_priority && memcmp(nfi->fib_metrics, fi->fib_metrics, sizeof(fi->fib_metrics)) == 0 && - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; } endfor_fib_info(); @@ -378,11 +378,25 @@ if (key.scope < RT_SCOPE_LINK) key.scope = RT_SCOPE_LINK; - if ((err = fib_lookup(&key, &res)) != 0) - return err; - nh->nh_scope = res.scope; - nh->nh_oif = FIB_RES_OIF(res); - nh->nh_dev = FIB_RES_DEV(res); + err = fib_lookup(&key, &res); + if (err) { + if (err == -ENETUNREACH && + fi->fib_protocol == RTPROT_STATIC) { + struct net_device *dev; + + dev = __dev_get_by_index(nh->nh_oif); + if (dev == NULL || dev->flags & IFF_UP || + inet_addr_type(nh->nh_gw) == RTN_LOCAL) + return err; + nh->nh_flags |= RTNH_F_DEAD; + nh->nh_scope = RT_SCOPE_LINK; + nh->nh_dev = dev; + } else return err; + } else { + nh->nh_scope = res.scope; + nh->nh_oif = FIB_RES_OIF(res); + nh->nh_dev = FIB_RES_DEV(res); + } if (nh->nh_dev) atomic_inc(&nh->nh_dev->refcnt); fib_res_put(&res); @@ -396,8 +410,11 @@ if (in_dev == NULL) return -ENODEV; if (!(in_dev->dev->flags&IFF_UP)) { - in_dev_put(in_dev); - return -ENETDOWN; + if (fi->fib_protocol != RTPROT_STATIC) { + in_dev_put(in_dev); + return -ENETDOWN; + } + nh->nh_flags |= RTNH_F_DEAD; } nh->nh_dev = in_dev->dev; atomic_inc(&nh->nh_dev->refcnt); @@ -581,6 +598,10 @@ #ifdef CONFIG_IP_ROUTE_NAT case RTN_NAT: FIB_RES_RESET(*res); + if (FIB_RES_NH(*res).nh_flags & RTNH_F_DEAD) { + res->fi = NULL; + return 1; + } atomic_inc(&fi->fib_clntref); return 0; #endif @@ -592,8 +613,12 @@ for_nexthops(fi) { if (nh->nh_flags&RTNH_F_DEAD) continue; - if (!key->oif || key->oif == nh->nh_oif) - break; + if (key->oif && key->oif != nh->nh_oif) + continue; + if (key->gw && key->gw != nh->nh_gw && + nh->nh_gw) + continue; + break; } #ifdef CONFIG_IP_ROUTE_MULTIPATH if (nhsel < fi->fib_nhs) { @@ -867,16 +892,28 @@ int dead = 0; change_nexthops(fi) { - if (nh->nh_flags&RTNH_F_DEAD) - dead++; - else if (nh->nh_dev == dev && + if (nh->nh_flags&RTNH_F_DEAD) { + if (fi->fib_protocol!=RTPROT_STATIC || + nh->nh_dev == NULL || + !__in_dev_get(nh->nh_dev) || + nh->nh_dev->flags&IFF_UP) + dead++; + } else if (nh->nh_dev == dev && nh->nh_scope != scope) { nh->nh_flags |= RTNH_F_DEAD; #ifdef CONFIG_IP_ROUTE_MULTIPATH fi->fib_power -= nh->nh_power; nh->nh_power = 0; #endif - dead++; + if (fi->fib_protocol!=RTPROT_STATIC || + force || + __in_dev_get(dev) == NULL) + dead++; + } + if (nh->nh_flags&RTNH_F_DEAD && force && + nh->nh_dev == dev) { + dev_put(nh->nh_dev); + nh->nh_dev = NULL; } } endfor_nexthops(fi) if (dead == fi->fib_nhs) { @@ -888,34 +925,50 @@ return ret; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - /* Dead device goes up. We wake up dead nexthops. - It takes sense only on multipath routes. */ int fib_sync_up(struct net_device *dev) { - int ret = 0; + struct rt_key key; + struct fib_result res; + int ret, rep; +repeat: if (!(dev->flags&IFF_UP)) return 0; + ret = 0; + rep = 0; for_fib_info() { int alive = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { - alive++; + if (!(nh->nh_flags&RTNH_F_DEAD)) continue; - } if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; if (nh->nh_dev != dev || __in_dev_get(dev) == NULL) continue; + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) { + memset(&key, 0, sizeof(key)); + key.dst = nh->nh_gw; + key.oif = nh->nh_oif; + key.scope = nh->nh_scope; + if (fib_lookup(&key, &res) != 0) + continue; + if (res.type != RTN_UNICAST) { + fib_res_put(&res); + continue; + } + fib_res_put(&res); + rep = 1; + } alive++; +#ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_power = 0; +#endif nh->nh_flags &= ~RTNH_F_DEAD; } endfor_nexthops(fi) @@ -924,9 +977,13 @@ ret++; } } endfor_fib_info(); + if (rep) + goto repeat; return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* The algorithm is suboptimal, but it provides really fair weighted route distribution. @@ -935,12 +992,37 @@ void fib_select_multipath(const struct rt_key *key, struct fib_result *res) { struct fib_info *fi = res->fi; - int w; + int w, alive; + + if (key->oif) { + int sel = -1; + w = -1; + change_nexthops(fi) { + if (key->oif != nh->nh_oif) + continue; + if (key->gw && key->gw != nh->nh_gw && + nh->nh_gw) + continue; + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { + if (nh->nh_power > w) { + w = nh->nh_power; + sel = nhsel; + } + } + } endfor_nexthops(fi); + if (sel >= 0) { + res->nh_sel = sel; + return; + } + goto last_resort; + } + +repeat: if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { power += nh->nh_weight; nh->nh_power = nh->nh_weight; } @@ -948,8 +1030,9 @@ fi->fib_power = power; #if 1 if (power <= 0) { - printk(KERN_CRIT "impossible 777\n"); - return; + goto last_resort; + /* printk(KERN_CRIT "impossible 777\n"); */ + /* return; */ } #endif } @@ -961,14 +1044,34 @@ w = jiffies % fi->fib_power; + alive = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) { if ((w -= nh->nh_power) <= 0) { nh->nh_power--; fi->fib_power--; res->nh_sel = nhsel; return; } + alive = 1; + } + } endfor_nexthops(fi); + if (alive) { + fi->fib_power = 0; + goto repeat; + } + +last_resort: + + for_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) { + if (key->oif && key->oif != nh->nh_oif) + continue; + if (key->gw && key->gw != nh->nh_gw && + nh->nh_gw) + continue; + res->nh_sel = nhsel; + return; } } endfor_nexthops(fi); --- v2.4.16/linux/net/ipv4/icmp.c Mon Nov 26 23:06:06 2001 +++ linux/net/ipv4/icmp.c Thu Dec 13 23:35:50 2001 @@ -361,7 +361,7 @@ if (ipc.opt->srr) daddr = icmp_param->replyopts.faddr; } - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) + if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0, 0)) goto out; if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, icmp_param->data.icmph.code)) { @@ -478,7 +478,7 @@ ((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) : iph->tos; - if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) + if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0, 0)) goto out; if (ip_options_echo(&icmp_param.replyopts, skb_in)) @@ -502,7 +502,7 @@ ipc.opt = &icmp_param.replyopts; if (icmp_param.replyopts.srr) { ip_rt_put(rt); - if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0)) + if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0, 0)) goto out; } --- v2.4.16/linux/net/ipv4/igmp.c Tue Nov 13 01:27:21 2001 +++ linux/net/ipv4/igmp.c Thu Dec 13 23:35:50 2001 @@ -207,7 +207,7 @@ if (type == IGMP_HOST_LEAVE_MESSAGE) dst = IGMP_ALL_ROUTER; - if (ip_route_output(&rt, dst, 0, 0, dev->ifindex)) + if (ip_route_output(&rt, dst, 0, 0, dev->ifindex, 0)) return -1; if (rt->rt_src == 0) { ip_rt_put(rt); @@ -622,7 +622,7 @@ __dev_put(dev); } - if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) { + if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0, 0)) { dev = rt->u.dst.dev; ip_rt_put(rt); } --- v2.4.16/linux/net/ipv4/ipip.c Tue Nov 13 01:30:08 2001 +++ linux/net/ipv4/ipip.c Thu Dec 13 23:35:50 2001 @@ -417,7 +417,7 @@ skb2->nh.raw = skb2->data; /* Try to guess incoming interface */ - if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { + if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0, 0)) { kfree_skb(skb2); return; } @@ -427,7 +427,7 @@ if (rt->rt_flags&RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; - if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || + if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0, 0) || rt->u.dst.dev->type != ARPHRD_IPGRE) { ip_rt_put(rt); kfree_skb(skb2); @@ -435,7 +435,7 @@ } } else { ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev, 0) || skb2->dst->dev->type != ARPHRD_IPGRE) { kfree_skb(skb2); return; @@ -560,7 +560,7 @@ goto tx_error_icmp; } - if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link, 0)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } @@ -819,7 +819,7 @@ if (iph->daddr) { struct rtable *rt; - if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link, 0)) { tdev = rt->u.dst.dev; ip_rt_put(rt); } --- v2.4.16/linux/net/ipv4/ipmr.c Tue Nov 13 01:28:51 2001 +++ linux/net/ipv4/ipmr.c Thu Dec 13 23:35:50 2001 @@ -1145,11 +1145,11 @@ #endif if (vif->flags&VIFF_TUNNEL) { - if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link)) + if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link, 0)) return; encap = sizeof(struct iphdr); } else { - if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link)) + if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link, 0)) return; } --- v2.4.16/linux/net/ipv4/ip_gre.c Tue Nov 13 01:30:32 2001 +++ linux/net/ipv4/ip_gre.c Thu Dec 13 23:35:50 2001 @@ -486,7 +486,7 @@ skb2->nh.raw = skb2->data; /* Try to guess incoming interface */ - if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) { + if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0, 0)) { kfree_skb(skb2); return; } @@ -496,7 +496,7 @@ if (rt->rt_flags&RTCF_LOCAL) { ip_rt_put(rt); rt = NULL; - if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) || + if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0, 0) || rt->u.dst.dev->type != ARPHRD_IPGRE) { ip_rt_put(rt); kfree_skb(skb2); @@ -504,7 +504,7 @@ } } else { ip_rt_put(rt); - if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || + if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev, 0) || skb2->dst->dev->type != ARPHRD_IPGRE) { kfree_skb(skb2); return; @@ -747,7 +747,7 @@ tos &= ~1; } - if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link, 0)) { tunnel->stat.tx_carrier_errors++; goto tx_error; } @@ -1102,7 +1102,7 @@ struct rtable *rt; if (ip_route_output(&rt, t->parms.iph.daddr, t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), - t->parms.link)) { + t->parms.link, 0)) { MOD_DEC_USE_COUNT; return -EADDRNOTAVAIL; } @@ -1173,7 +1173,7 @@ if (iph->daddr) { struct rtable *rt; - if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link, 0)) { tdev = rt->u.dst.dev; ip_rt_put(rt); } --- v2.4.16/linux/net/ipv4/ip_input.c Mon Nov 26 23:06:06 2001 +++ linux/net/ipv4/ip_input.c Thu Dec 13 23:35:50 2001 @@ -317,7 +317,7 @@ * how the packet travels inside Linux networking. */ if (skb->dst == NULL) { - if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev)) + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, dev, 0)) goto drop; } --- v2.4.16/linux/net/ipv4/ip_nat_dumb.c Tue Nov 13 01:25:26 2001 +++ linux/net/ipv4/ip_nat_dumb.c Thu Dec 13 23:35:50 2001 @@ -124,6 +124,7 @@ key.dst = ciph->saddr; key.iif = skb->dev->ifindex; key.oif = 0; + key.gw = 0; #ifdef CONFIG_IP_ROUTE_TOS key.tos = RT_TOS(ciph->tos); #endif --- v2.4.16/linux/net/ipv4/ip_options.c Tue Nov 13 01:28:51 2001 +++ linux/net/ipv4/ip_options.c Thu Dec 13 23:35:50 2001 @@ -597,7 +597,7 @@ rt = (struct rtable*)skb->dst; skb->dst = NULL; - err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev); + err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev, 0); rt2 = (struct rtable*)skb->dst; if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) { ip_rt_put(rt2); --- v2.4.16/linux/net/ipv4/ip_output.c Tue Nov 13 01:30:08 2001 +++ linux/net/ipv4/ip_output.c Thu Dec 13 23:35:50 2001 @@ -368,7 +368,7 @@ */ if (ip_route_output(&rt, daddr, sk->saddr, RT_CONN_FLAGS(sk), - sk->bound_dev_if)) + sk->bound_dev_if, 0)) goto no_route; __sk_dst_set(sk, &rt->u.dst); sk->route_caps = rt->u.dst.dev->features; @@ -964,7 +964,7 @@ daddr = replyopts.opt.faddr; } - if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0)) + if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0, 0)) return; /* And let IP do all the hard work. --- v2.4.16/linux/net/ipv4/netfilter/ip_fw_compat.c Tue Nov 13 01:25:26 2001 +++ linux/net/ipv4/netfilter/ip_fw_compat.c Thu Dec 13 23:35:50 2001 @@ -141,7 +141,7 @@ if ((*pskb)->dst != NULL || ip_route_input(*pskb, iph->daddr, iph->saddr, iph->tos, - (struct net_device *)in) == 0) + (struct net_device *)in, 0) == 0) icmp_send(*pskb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); return NF_DROP; --- v2.4.16/linux/net/ipv4/netfilter/ip_fw_compat_masq.c Tue Nov 13 01:28:51 2001 +++ linux/net/ipv4/netfilter/ip_fw_compat_masq.c Thu Dec 13 23:35:50 2001 @@ -40,6 +40,10 @@ enum ip_conntrack_info ctinfo; struct ip_conntrack *ct; unsigned int ret; + struct rtable *rt, *skb_rt; + struct net_device *skb_dev; + __u32 saddr; + int new; /* Sorry, only ICMP, TCP and UDP. */ if (iph->protocol != IPPROTO_ICMP @@ -63,22 +67,28 @@ } info = &ct->nat.info; + iph = (*pskb)->nh.iph; + saddr = iph->saddr; + new = 0; WRITE_LOCK(&ip_nat_lock); /* Setup the masquerade, if not already */ if (!info->initialized) { u_int32_t newsrc; - struct rtable *rt; struct ip_nat_multi_range range; + skb_rt = (struct rtable *) (*pskb)->dst; + skb_dev = skb_rt->u.dst.dev; /* Pass 0 instead of saddr, since it's going to be changed anyway. */ - if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) { + if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), + skb_dev? skb_dev->ifindex : 0, + skb_dev? skb_rt->rt_gateway : 0) != 0) { + WRITE_UNLOCK(&ip_nat_lock); DEBUGP("ipnat_rule_masquerade: Can't reroute.\n"); return NF_DROP; } - newsrc = inet_select_addr(rt->u.dst.dev, rt->rt_gateway, - RT_SCOPE_UNIVERSE); + newsrc = rt->rt_src; ip_rt_put(rt); range = ((struct ip_nat_multi_range) { 1, @@ -93,12 +103,31 @@ } place_in_hashes(ct, info); - info->initialized = 1; + new = info->initialized = 1; } else DEBUGP("Masquerading already done on this conn.\n"); WRITE_UNLOCK(&ip_nat_lock); - return do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); + ret = do_bindings(ct, ctinfo, info, NF_IP_POST_ROUTING, pskb); + if (ret != NF_ACCEPT || saddr == (*pskb)->nh.iph->saddr || new) + return ret; + + iph = (*pskb)->nh.iph; + if (ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), 0, 0) != 0) + return NF_DROP; + + skb_rt = (struct rtable *) (*pskb)->dst; + skb_dev = skb_rt->u.dst.dev; + if (skb_dev != rt->u.dst.dev || rt->rt_gateway != skb_rt->rt_gateway) { + if (skb_dev != rt->u.dst.dev) { + /* TODO: check the new mtu and reply FRAG_NEEDED */ + } + dst_release((*pskb)->dst); + (*pskb)->dst = &rt->u.dst; + } else { + ip_rt_put(rt); + } + return NF_ACCEPT; } void --- v2.4.16/linux/net/ipv4/netfilter/ip_nat_core.c Tue Nov 13 01:25:53 2001 +++ linux/net/ipv4/netfilter/ip_nat_core.c Thu Dec 13 23:35:50 2001 @@ -206,7 +206,7 @@ struct rtable *rt; /* FIXME: IPTOS_TOS(iph->tos) --RR */ - if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) { + if (ip_route_output(&rt, var_ip, 0, 0, 0, 0) != 0) { DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n", NIPQUAD(var_ip)); return 0; @@ -852,6 +852,60 @@ hdr->checksum = 0; hdr->checksum = ip_compute_csum((unsigned char *)hdr, sizeof(*hdr) + datalen); + + return NF_ACCEPT; +} + +unsigned int +ip_nat_route_input(unsigned int hooknum, + struct sk_buff **pskb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *skb = *pskb; + struct iphdr *iph; + struct ip_conntrack *ct; + enum ip_conntrack_info ctinfo; + struct ip_nat_info *info; + enum ip_conntrack_dir dir; + __u32 saddr; + int i; + + if (!(ct = ip_conntrack_get(skb, &ctinfo))) + return NF_ACCEPT; + + info = &ct->nat.info; + if (!info->initialized) + return NF_ACCEPT; + + if (skb->dst) + return NF_ACCEPT; + + if (skb->len < sizeof(struct iphdr)) + return NF_ACCEPT; + + iph = skb->nh.iph; + saddr = iph->saddr; + hooknum = NF_IP_POST_ROUTING; + dir = CTINFO2DIR(ctinfo); + + READ_LOCK(&ip_nat_lock); + for (i = 0; i < info->num_manips; i++) { + if (info->manips[i].direction == dir + && info->manips[i].hooknum == hooknum + && info->manips[i].maniptype == IP_NAT_MANIP_SRC) { + saddr = info->manips[i].manip.ip; + } + } + READ_UNLOCK(&ip_nat_lock); + + if (saddr == iph->saddr) + return NF_ACCEPT; + + if (ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, + skb->dev, saddr)) + return NF_DROP; return NF_ACCEPT; } --- v2.4.16/linux/net/ipv4/netfilter/ip_nat_standalone.c Tue Nov 13 01:29:33 2001 +++ linux/net/ipv4/netfilter/ip_nat_standalone.c Thu Dec 13 23:35:50 2001 @@ -225,6 +225,9 @@ /* Before packet filtering, change destination */ static struct nf_hook_ops ip_nat_in_ops = { { NULL, NULL }, ip_nat_fn, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_NAT_DST }; +/* Before routing, route before mangling */ +static struct nf_hook_ops ip_nat_inr_ops += { { NULL, NULL }, ip_nat_route_input, PF_INET, NF_IP_PRE_ROUTING, NF_IP_PRI_LAST-1 }; /* After packet filtering, change source */ static struct nf_hook_ops ip_nat_out_ops = { { NULL, NULL }, ip_nat_out, PF_INET, NF_IP_POST_ROUTING, NF_IP_PRI_NAT_SRC}; @@ -290,10 +293,15 @@ printk("ip_nat_init: can't register in hook.\n"); goto cleanup_nat; } + ret = nf_register_hook(&ip_nat_inr_ops); + if (ret < 0) { + printk("ip_nat_init: can't register inr hook.\n"); + goto cleanup_inops; + } ret = nf_register_hook(&ip_nat_out_ops); if (ret < 0) { printk("ip_nat_init: can't register out hook.\n"); - goto cleanup_inops; + goto cleanup_inrops; } ret = nf_register_hook(&ip_nat_local_out_ops); if (ret < 0) { @@ -310,6 +318,8 @@ nf_unregister_hook(&ip_nat_local_out_ops); cleanup_outops: nf_unregister_hook(&ip_nat_out_ops); + cleanup_inrops: + nf_unregister_hook(&ip_nat_inr_ops); cleanup_inops: nf_unregister_hook(&ip_nat_in_ops); cleanup_nat: --- v2.4.16/linux/net/ipv4/netfilter/ipt_MASQUERADE.c Tue Nov 13 01:29:33 2001 +++ linux/net/ipv4/netfilter/ipt_MASQUERADE.c Thu Dec 13 23:35:50 2001 @@ -88,6 +88,7 @@ key.src = 0; /* Unknown: that's what we're trying to establish */ key.tos = RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN; key.oif = out->ifindex; + key.gw = ((struct rtable *) (*pskb)->dst)->rt_gateway; #ifdef CONFIG_IP_ROUTE_FWMARK key.fwmark = (*pskb)->nfmark; #endif --- v2.4.16/linux/net/ipv4/netfilter/ipt_MIRROR.c Tue Nov 13 01:29:33 2001 +++ linux/net/ipv4/netfilter/ipt_MIRROR.c Thu Dec 13 23:35:50 2001 @@ -44,7 +44,7 @@ /* Backwards */ if (ip_route_output(&rt, iph->saddr, iph->daddr, RT_TOS(iph->tos) | RTO_CONN, - 0)) { + 0, 0)) { return 0; } --- v2.4.16/linux/net/ipv4/netfilter/ipt_REJECT.c Tue Nov 13 01:29:33 2001 +++ linux/net/ipv4/netfilter/ipt_REJECT.c Thu Dec 13 23:35:50 2001 @@ -130,7 +130,7 @@ if (ip_route_output(&rt, nskb->nh.iph->daddr, local ? nskb->nh.iph->saddr : 0, RT_TOS(nskb->nh.iph->tos) | RTO_CONN, - 0) != 0) + 0, 0) != 0) goto free_nskb; dst_release(nskb->dst); @@ -203,7 +203,7 @@ tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL; - if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0)) + if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0, 0)) return; /* RFC says return as much as we can without exceeding 576 bytes. */ --- v2.4.16/linux/net/ipv4/raw.c Tue Nov 13 01:27:00 2001 +++ linux/net/ipv4/raw.c Thu Dec 13 23:35:50 2001 @@ -408,7 +408,7 @@ rfh.saddr = sk->protinfo.af_inet.mc_addr; } - err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif); + err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif, 0); if (err) goto done; --- v2.4.16/linux/net/ipv4/route.c Tue Nov 13 01:30:32 2001 +++ linux/net/ipv4/route.c Thu Dec 13 23:35:50 2001 @@ -835,6 +835,7 @@ /* Gateway is different ... */ rt->rt_gateway = new_gw; + if (rt->key.gw) rt->key.gw = new_gw; /* Redirect received -> path was valid */ dst_confirm(&rth->u.dst); @@ -1258,6 +1259,7 @@ rth->key.fwmark = skb->nfmark; #endif rth->key.src = saddr; + rth->key.lsrc = 0; rth->rt_src = saddr; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = daddr; @@ -1271,6 +1273,7 @@ rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->key.oif = 0; + rth->key.gw = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->rt_type = RTN_MULTICAST; @@ -1310,7 +1313,7 @@ */ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, u32 lsrc) { struct rt_key key; struct fib_result res; @@ -1330,16 +1333,18 @@ goto out; key.dst = daddr; - key.src = saddr; + key.src = lsrc? : saddr; + key.lsrc = lsrc; key.tos = tos; #ifdef CONFIG_IP_ROUTE_FWMARK key.fwmark = skb->nfmark; #endif - key.iif = dev->ifindex; + key.iif = lsrc? loopback_dev.ifindex : dev->ifindex; key.oif = 0; + key.gw = 0; key.scope = RT_SCOPE_UNIVERSE; - hash = rt_hash_code(daddr, saddr ^ (key.iif << 5), tos); + hash = rt_hash_code(daddr, saddr ^ (dev->ifindex << 5), tos); /* Check for the most weird martians, which can be not detected by fib_lookup. @@ -1360,6 +1365,20 @@ if (BADCLASS(daddr) || ZERONET(daddr) || LOOPBACK(daddr)) goto martian_destination; + if (lsrc) { + struct net_device *dev_out; + + if (MULTICAST(lsrc) || BADCLASS(lsrc) || + ZERONET(lsrc) || LOOPBACK(lsrc)) + goto e_inval; + + /* It is equivalent to inet_addr_type(lsrc) == RTN_LOCAL */ + dev_out = ip_dev_find(lsrc); + if (dev_out == NULL) + goto e_inval; + dev_put(dev_out); + } + /* * Now we are ready to route packet. */ @@ -1369,6 +1388,10 @@ goto no_route; } free_res = 1; + if (lsrc && res.type != RTN_UNICAST) + goto e_inval; + key.iif = dev->ifindex; + key.src = saddr; rt_cache_stat[smp_processor_id()].in_slow_tot++; @@ -1418,8 +1441,9 @@ if (res.type != RTN_UNICAST) goto martian_destination; + fib_select_default(&key, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && key.oif == 0) + if (res.fi->fib_nhs > 1) fib_select_multipath(&key, &res); #endif out_dev = in_dev_get(FIB_RES_DEV(res)); @@ -1439,6 +1463,7 @@ flags |= RTCF_DIRECTSRC; if (out_dev == in_dev && err && !(flags & (RTCF_NAT | RTCF_MASQ)) && + !lsrc && (IN_DEV_SHARED_MEDIA(out_dev) || inet_addr_onlink(out_dev, saddr, FIB_RES_GW(res)))) flags |= RTCF_DOREDIRECT; @@ -1465,6 +1490,7 @@ #endif rth->key.src = saddr; rth->rt_src = saddr; + rth->key.lsrc = lsrc; rth->rt_gateway = daddr; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_src_map = key.src; @@ -1477,6 +1503,7 @@ rth->u.dst.dev = out_dev->dev; dev_hold(rth->u.dst.dev); rth->key.oif = 0; + rth->key.gw = 0; rth->rt_spec_dst= spec_dst; rth->u.dst.input = ip_forward; @@ -1487,7 +1514,8 @@ rth->rt_flags = flags; #ifdef CONFIG_NET_FASTROUTE - if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT))) { + if (netdev_fastroute && !(flags&(RTCF_NAT|RTCF_MASQ|RTCF_DOREDIRECT)) && + !lsrc) { struct net_device *odev = rth->u.dst.dev; if (odev != dev && dev->accept_fastpath && @@ -1510,6 +1538,8 @@ brd_input: if (skb->protocol != __constant_htons(ETH_P_IP)) goto e_inval; + if (lsrc) + goto e_inval; if (ZERONET(saddr)) spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK); @@ -1542,6 +1572,7 @@ #endif rth->key.src = saddr; rth->rt_src = saddr; + rth->key.lsrc = 0; #ifdef CONFIG_IP_ROUTE_NAT rth->rt_dst_map = key.dst; rth->rt_src_map = key.src; @@ -1554,6 +1585,7 @@ rth->u.dst.dev = &loopback_dev; dev_hold(rth->u.dst.dev); rth->key.oif = 0; + rth->key.gw = 0; rth->rt_gateway = daddr; rth->rt_spec_dst= spec_dst; rth->u.dst.input= ip_local_deliver; @@ -1620,7 +1652,7 @@ } int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, - u8 tos, struct net_device *dev) + u8 tos, struct net_device *dev, u32 lsrc) { struct rtable * rth; unsigned hash; @@ -1634,6 +1666,7 @@ if (rth->key.dst == daddr && rth->key.src == saddr && rth->key.iif == iif && + rth->key.lsrc == lsrc && rth->key.oif == 0 && #ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark == skb->nfmark && @@ -1680,7 +1713,7 @@ read_unlock(&inetdev_lock); return -EINVAL; } - return ip_route_input_slow(skb, daddr, saddr, tos, dev); + return ip_route_input_slow(skb, daddr, saddr, tos, dev, lsrc); } /* @@ -1705,6 +1738,7 @@ key.tos = tos & IPTOS_RT_MASK; key.iif = loopback_dev.ifindex; key.oif = oldkey->oif; + key.gw = oldkey->gw; #ifdef CONFIG_IP_ROUTE_FWMARK key.fwmark = oldkey->fwmark; #endif @@ -1794,6 +1828,7 @@ dev_out = &loopback_dev; dev_hold(dev_out); key.oif = loopback_dev.ifindex; + key.gw = 0; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; @@ -1801,7 +1836,7 @@ if (fib_lookup(&key, &res)) { res.fi = NULL; - if (oldkey->oif) { + if (oldkey->oif && dev_out->flags&IFF_UP) { /* Apparently, routing tables are wrong. Assume, that the destination is on link. @@ -1844,6 +1879,7 @@ dev_out = &loopback_dev; dev_hold(dev_out); key.oif = dev_out->ifindex; + key.gw = 0; if (res.fi) fib_info_put(res.fi); res.fi = NULL; @@ -1851,13 +1887,12 @@ goto make_route; } + if (res.type == RTN_UNICAST) + fib_select_default(&key, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (res.fi->fib_nhs > 1 && key.oif == 0) + if (res.fi->fib_nhs > 1) fib_select_multipath(&key, &res); - else #endif - if (!res.prefixlen && res.type == RTN_UNICAST && !key.oif) - fib_select_default(&key, &res); if (!key.src) key.src = FIB_RES_PREFSRC(res); @@ -1915,7 +1950,9 @@ rth->key.tos = tos; rth->key.src = oldkey->src; rth->key.iif = 0; + rth->key.lsrc = 0; rth->key.oif = oldkey->oif; + rth->key.gw = oldkey->gw; #ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark = oldkey->fwmark; #endif @@ -1994,6 +2031,7 @@ rth->key.src == key->src && rth->key.iif == 0 && rth->key.oif == key->oif && + rth->key.gw == key->gw && #ifdef CONFIG_IP_ROUTE_FWMARK rth->key.fwmark == key->fwmark && #endif @@ -2146,7 +2184,7 @@ skb->protocol = __constant_htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); - err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); + err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev, 0); local_bh_enable(); rt = (struct rtable*)skb->dst; if (!err && rt->u.dst.error) @@ -2155,7 +2193,7 @@ int oif = 0; if (rta[RTA_OIF - 1]) memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif); + err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif, 0); } if (err) { kfree_skb(skb); --- v2.4.16/linux/net/ipv4/syncookies.c Tue Nov 13 01:30:32 2001 +++ linux/net/ipv4/syncookies.c Thu Dec 13 23:35:50 2001 @@ -176,7 +176,7 @@ opt->srr ? opt->faddr : req->af.v4_req.rmt_addr, req->af.v4_req.loc_addr, RT_CONN_FLAGS(sk), - 0)) { + 0, 0)) { tcp_openreq_free(req); goto out; } --- v2.4.16/linux/net/ipv4/tcp_ipv4.c Tue Nov 13 01:30:33 2001 +++ linux/net/ipv4/tcp_ipv4.c Thu Dec 13 23:35:50 2001 @@ -1154,7 +1154,7 @@ opt->faddr : req->af.v4_req.rmt_addr), req->af.v4_req.loc_addr, - RT_CONN_FLAGS(sk), sk->bound_dev_if)) { + RT_CONN_FLAGS(sk), sk->bound_dev_if, 0)) { IP_INC_STATS_BH(IpOutNoRoutes); return NULL; } @@ -1782,7 +1782,7 @@ daddr = sk->protinfo.af_inet.opt->faddr; err = ip_route_output(&rt, daddr, sk->saddr, - RT_CONN_FLAGS(sk), sk->bound_dev_if); + RT_CONN_FLAGS(sk), sk->bound_dev_if, 0); if (!err) { __sk_dst_set(sk, &rt->u.dst); sk->route_caps = rt->u.dst.dev->features; --- v2.4.16/linux/net/ipv4/udp.c Tue Nov 13 01:30:08 2001 +++ linux/net/ipv4/udp.c Thu Dec 13 23:35:50 2001 @@ -517,7 +517,7 @@ rt = (struct rtable*)sk_dst_check(sk, 0); if (rt == NULL) { - err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif); + err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif, 0); if (err) goto out; --- v2.4.16/linux/net/ipv6/sit.c Tue Nov 13 01:29:33 2001 +++ linux/net/ipv6/sit.c Thu Dec 13 23:35:50 2001 @@ -501,7 +501,7 @@ dst = addr6->s6_addr32[3]; } - if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) { + if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link, 0)) { tunnel->stat.tx_carrier_errors++; goto tx_error_icmp; } @@ -773,7 +773,7 @@ if (iph->daddr) { struct rtable *rt; - if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) { + if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link, 0)) { tdev = rt->u.dst.dev; ip_rt_put(rt); }