--- v2.2.19/linux/include/net/ip_fib.h Sat Oct 21 12:10:47 2000 +++ linux/include/net/ip_fib.h Tue Sep 4 22:40:34 2001 @@ -171,6 +171,7 @@ extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(const struct rt_key *key, struct fib_result *res); extern struct fib_table *__fib_new_table(int id); +extern __inline__ int fib_result_table(struct fib_result *res); extern __inline__ struct fib_table *fib_get_table(int id) { @@ -214,6 +215,7 @@ extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi); +extern int fib_num_down_nh_devs(struct fib_info *fi); extern int fib_sync_down(u32 local, struct device *dev, int force); extern int fib_sync_up(struct device *dev); extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, --- v2.2.19/linux/include/linux/rtnetlink.h Sat Oct 21 12:11:58 2000 +++ linux/include/linux/rtnetlink.h Tue Sep 4 22:40:34 2001 @@ -230,6 +230,8 @@ #define RTNH_F_DEAD 1 /* Nexthop is dead (used by multipath) */ #define RTNH_F_PERVASIVE 2 /* Do recursive gateway lookup */ #define RTNH_F_ONLINK 4 /* Gateway is forced on link */ +#define RTNH_F_SUSPECT 8 /* We don't know the real state */ +#define RTNH_F_BADSTATE (RTNH_F_DEAD | RTNH_F_SUSPECT) /* Macros to handle hexthops */ --- v2.2.19/linux/net/ipv4/fib_hash.c Sat Oct 21 12:10:50 2000 +++ linux/net/ipv4/fib_hash.c Tue Sep 4 22:40:34 2001 @@ -93,6 +93,7 @@ int fz_order; /* Zone order */ u32 fz_mask; #define FZ_MASK(fz) ((fz)->fz_mask) + int last_dflt; /* Last default */ }; /* NOTE. On fast computers evaluation of fz_hashmask and fz_mask @@ -226,6 +227,7 @@ return NULL; memset(fz, 0, sizeof(struct fn_zone)); + fz->last_dflt = -1; if (z) { fz->fz_divisor = 16; fz->fz_hashmask = 0xF; @@ -302,35 +304,55 @@ return 1; } -static int fn_hash_last_dflt=-1; - -static int fib_detect_death(struct fib_info *fi, int order, +static int fib_detect_death(struct fib_info *fi, int order, int last_dflt, struct fib_info **last_resort, int *last_idx) { struct neighbour *n; - int state = NUD_NONE; + int nhsel; + int state; + struct fib_nh * nh; + int dead = 1; + + /* change_nexthops(fi) { */ + for (nhsel = 0, nh = fi->fib_nh; nhsel < fi->fib_nhs; nh++, nhsel++) { + if (nh->nh_flags & RTNH_F_DEAD) + continue; + if (!nh->nh_gw || nh->nh_scope != RT_SCOPE_LINK) { + dead = 0; + continue; + } - n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev); - if (n) { - state = n->nud_state; - neigh_release(n); - } - if (state==NUD_REACHABLE) - return 0; - if ((state&NUD_VALID) && order != fn_hash_last_dflt) - return 0; - if ((state&NUD_VALID) || - (*last_idx<0 && order > fn_hash_last_dflt)) { - *last_resort = fi; - *last_idx = order; + nh->nh_flags &= ~RTNH_F_SUSPECT; + state = NUD_NONE; + n = neigh_lookup(&arp_tbl, &nh->nh_gw, nh->nh_dev); + if (n) { + state = n->nud_state; + neigh_release(n); + } + if (state==NUD_REACHABLE || + ((state&NUD_VALID) && order != last_dflt)) { + dead = 0; + continue; + } + if (!(state&NUD_VALID)) { + nh->nh_flags |= RTNH_F_SUSPECT; + } + if (!dead) continue; + if ((state&NUD_VALID) || + (*last_idx<0 && order > last_dflt)) { + *last_resort = fi; + *last_idx = order; + } } - return 1; + /* } endfor_nexthops(fi) */ + + return dead; } static void fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res) { - int order, last_idx; + int order, last_idx, last_dflt; struct fib_node *f; struct fib_info *fi = NULL; struct fib_info *last_resort; @@ -340,6 +362,7 @@ if (fz == NULL) return; + last_dflt = fz->last_dflt; last_idx = -1; last_resort = NULL; order = -1; @@ -354,16 +377,12 @@ if (next_fi->fib_priority > res->fi->fib_priority) break; - if (!next_fi->fib_nh[0].nh_gw || next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK) - continue; f->fn_state |= FN_S_ACCESSED; - if (fi == NULL) { - if (next_fi != res->fi) - break; - } else if (!fib_detect_death(fi, order, &last_resort, &last_idx)) { + if (fi && !fib_detect_death(fi, order, last_dflt, + &last_resort, &last_idx)) { res->fi = fi; - fn_hash_last_dflt = order; + fz->last_dflt = order; return; } fi = next_fi; @@ -371,19 +390,19 @@ } if (order<=0 || fi==NULL) { - fn_hash_last_dflt = -1; + fz->last_dflt = -1; return; } - if (!fib_detect_death(fi, order, &last_resort, &last_idx)) { + if (!fib_detect_death(fi, order, last_dflt, &last_resort, &last_idx)) { res->fi = fi; - fn_hash_last_dflt = order; + fz->last_dflt = order; return; } if (last_idx >= 0) res->fi = last_resort; - fn_hash_last_dflt = last_idx; + fz->last_dflt = last_idx; } #define FIB_SCAN(f, fp) \ @@ -686,7 +705,10 @@ while ((f = *fp) != NULL) { struct fib_info *fi = FIB_INFO(f); - if (fi && ((f->fn_state&FN_S_ZOMBIE) || (fi->fib_flags&RTNH_F_DEAD))) { + if (fi && ((f->fn_state&FN_S_ZOMBIE) || + (fi->fib_flags&RTNH_F_DEAD && + (fi->fib_protocol != RTPROT_STATIC || + !fib_num_down_nh_devs(fi))))) { *fp = f->fn_next; synchronize_bh(); --- v2.2.19/linux/net/ipv4/fib_rules.c Wed Dec 13 11:19:12 2000 +++ linux/net/ipv4/fib_rules.c Tue Sep 4 22:40:34 2001 @@ -265,6 +265,11 @@ } } +int fib_result_table(struct fib_result *res) +{ + return res->r->r_table; +} + int fib_lookup(const struct rt_key *key, struct fib_result *res) { int err; --- v2.2.19/linux/net/ipv4/fib_semantics.c Sat Oct 21 12:10:47 2000 +++ linux/net/ipv4/fib_semantics.c Tue Sep 4 22:40:34 2001 @@ -127,7 +127,7 @@ #ifdef CONFIG_NET_CLS_ROUTE nh->nh_tclassid != onh->nh_tclassid || #endif - ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD)) + ((nh->nh_flags^onh->nh_flags)&~RTNH_F_BADSTATE)) return -1; onh++; } endfor_nexthops(fi); @@ -145,7 +145,7 @@ nfi->fib_mtu == fi->fib_mtu && nfi->fib_rtt == fi->fib_rtt && nfi->fib_window == fi->fib_window && - ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 && + ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_BADSTATE) == 0 && (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0)) return fi; } endfor_fib_info(); @@ -170,6 +170,30 @@ return -1; } +/* + * Return 0 only when we are sure that the preferred source is deleted + * or when all nexthop devices are removed + */ + +int fib_num_down_nh_devs(struct fib_info *fi) +{ +struct in_device *in_dev; +struct device *dev; +int dead = 0; + + change_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD)) + return 0; + dev = dev_get_by_index(nh->nh_oif); + if (dev && !(dev->flags&IFF_UP) && + ((in_dev = dev->ip_ptr) != NULL) && + in_dev->ifa_list) + dead ++; + } endfor_nexthops(fi) + /* dead>0: All are marked DEAD but there is one in DOWN state */ + return dead; +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) @@ -354,11 +378,25 @@ if (key.scope < RT_SCOPE_LINK) key.scope = RT_SCOPE_LINK; - if ((err = fib_lookup(&key, &res)) != 0) - return err; - nh->nh_scope = res.scope; - nh->nh_oif = FIB_RES_OIF(res); - nh->nh_dev = FIB_RES_DEV(res); + err = fib_lookup(&key, &res); + if (err) { + if (err == -ENETUNREACH && + fi->fib_protocol == RTPROT_STATIC) { + struct device *dev; + + dev = dev_get_by_index(nh->nh_oif); + if (dev == NULL || dev->flags & IFF_UP || + inet_addr_type(nh->nh_gw) == RTN_LOCAL) + return err; + nh->nh_flags |= RTNH_F_DEAD; + nh->nh_scope = RT_SCOPE_LINK; + nh->nh_dev = dev; + } else return err; + } else { + nh->nh_scope = res.scope; + nh->nh_oif = FIB_RES_OIF(res); + nh->nh_dev = FIB_RES_DEV(res); + } } else { struct in_device *in_dev; @@ -368,8 +406,11 @@ in_dev = inetdev_by_index(nh->nh_oif); if (in_dev == NULL) return -ENODEV; - if (!(in_dev->dev->flags&IFF_UP)) - return -ENETDOWN; + if (!(in_dev->dev->flags&IFF_UP)) { + if (fi->fib_protocol != RTPROT_STATIC) + return -ENETDOWN; + nh->nh_flags |= RTNH_F_DEAD; + } nh->nh_dev = in_dev->dev; nh->nh_scope = RT_SCOPE_HOST; } @@ -490,10 +531,16 @@ if (nh->nh_dev == NULL) goto failure; } else { + int dead = 0; change_nexthops(fi) { if ((err = fib_check_nh(r, fi, nh)) != 0) goto failure; + if (nh->nh_flags & RTNH_F_DEAD) + dead ++; } endfor_nexthops(fi) + if (dead >= fi->fib_nhs) { + fi->fib_flags |= RTNH_F_DEAD; + } } if (fi->fib_prefsrc) { @@ -858,8 +905,6 @@ return ret; } -#ifdef CONFIG_IP_ROUTE_MULTIPATH - /* Dead device goes up. We wake up dead nexthops. It takes sense only on multipath routes. @@ -867,6 +912,8 @@ int fib_sync_up(struct device *dev) { + struct rt_key key; + struct fib_result res; int ret = 0; if (!(dev->flags&IFF_UP)) @@ -880,16 +927,24 @@ alive++; continue; } - if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) + if (nh->nh_oif != dev->ifindex || dev->ip_ptr == NULL) continue; - if (nh->nh_dev != dev || dev->ip_ptr == NULL) + if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP)) continue; + if (nh->nh_gw && fi->fib_protocol == RTPROT_STATIC) { + memset(&key, 0, sizeof(key)); + key.dst = nh->nh_gw; + key.oif = nh->nh_oif; + key.scope = nh->nh_scope; + if (fib_lookup(&key, &res) != 0) + continue; + } alive++; nh->nh_power = 0; nh->nh_flags &= ~RTNH_F_DEAD; } endfor_nexthops(fi) - if (alive == fi->fib_nhs) { + if (alive > 0) { fi->fib_flags &= ~RTNH_F_DEAD; ret++; } @@ -897,6 +952,8 @@ return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* The algorithm is suboptimal, but it provides really fair weighted route distribution. @@ -910,7 +967,7 @@ if (fi->fib_power <= 0) { int power = 0; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD)) { + if (!(nh->nh_flags&RTNH_F_BADSTATE)) { power += nh->nh_weight; nh->nh_power = nh->nh_weight; } @@ -918,8 +975,9 @@ fi->fib_power = power; #if 1 if (power <= 0) { - printk(KERN_CRIT "impossible 777\n"); - return; + goto last_resort; + /* printk(KERN_CRIT "impossible 777\n"); */ + /* return; */ } #endif } @@ -932,13 +990,22 @@ w = jiffies % fi->fib_power; change_nexthops(fi) { - if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { + if (!(nh->nh_flags&RTNH_F_BADSTATE) && nh->nh_power) { if ((w -= nh->nh_power) <= 0) { nh->nh_power--; fi->fib_power--; res->nh_sel = nhsel; return; } + } + } endfor_nexthops(fi); + +last_resort: + + for_nexthops(fi) { + if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) { + res->nh_sel = nhsel; + return; } } endfor_nexthops(fi); --- v2.2.19/linux/net/ipv4/fib_frontend.c Sat Oct 21 12:10:47 2000 +++ linux/net/ipv4/fib_frontend.c Tue Sep 4 22:40:34 2001 @@ -54,6 +54,8 @@ struct fib_table *local_table; struct fib_table *main_table; +#define FIB_RES_TABLE(r) (RT_TABLE_MAIN) + #else #define RT_TABLE_MIN 1 @@ -71,6 +73,7 @@ return tb; } +#define FIB_RES_TABLE(r) (fib_result_table(r)) #endif /* CONFIG_IP_MULTIPLE_TABLES */ @@ -194,6 +197,9 @@ struct in_device *in_dev = dev->ip_ptr; struct rt_key key; struct fib_result res; + int table; + unsigned char prefixlen; + unsigned char scope; key.dst = src; key.src = dst; @@ -209,24 +215,25 @@ if (res.type != RTN_UNICAST) return -EINVAL; *spec_dst = FIB_RES_PREFSRC(res); - if (itag) - fib_combine_itag(itag, &res); -#ifdef CONFIG_IP_ROUTE_MULTIPATH - if (FIB_RES_DEV(res) == dev || res.fi->fib_nhs > 1) -#else + fib_combine_itag(itag, &res); if (FIB_RES_DEV(res) == dev) -#endif return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; if (in_dev->ifa_list == NULL) goto last_resort; - if (IN_DEV_RPFILTER(in_dev)) - return -EINVAL; + table = FIB_RES_TABLE(&res); + prefixlen = res.prefixlen; + scope = res.scope; key.oif = dev->ifindex; - if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST) { + if (fib_lookup(&key, &res) == 0 && res.type == RTN_UNICAST && + ((table == FIB_RES_TABLE(&res) && res.prefixlen >= prefixlen && + res.scope >= scope) || + !IN_DEV_RPFILTER(in_dev))) { *spec_dst = FIB_RES_PREFSRC(res); return FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } + if (IN_DEV_RPFILTER(in_dev)) + return -EINVAL; return 0; last_resort: @@ -543,6 +550,8 @@ switch (event) { case NETDEV_UP: fib_add_ifaddr(ifa); + if (ifa->ifa_dev && ifa->ifa_dev->dev) + fib_sync_up(ifa->ifa_dev->dev); rt_cache_flush(-1); break; case NETDEV_DOWN: @@ -573,9 +582,7 @@ for_ifa(in_dev) { fib_add_ifaddr(ifa); } endfor_ifa(in_dev); -#ifdef CONFIG_IP_ROUTE_MULTIPATH fib_sync_up(dev); -#endif rt_cache_flush(-1); break; case NETDEV_DOWN: --- v2.2.19/linux/net/ipv4/route.c Sat Aug 4 12:52:33 2001 +++ linux/net/ipv4/route.c Tue Sep 4 22:40:34 2001 @@ -1195,6 +1195,8 @@ if (res.type != RTN_UNICAST) goto martian_destination; + if (res.prefixlen==0 && key.oif == 0) + fib_select_default(&key, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && key.oif == 0) fib_select_multipath(&key, &res); @@ -1568,13 +1570,12 @@ goto make_route; } + if (res.prefixlen==0 && res.type == RTN_UNICAST && key.oif == 0) + fib_select_default(&key, &res); #ifdef CONFIG_IP_ROUTE_MULTIPATH if (res.fi->fib_nhs > 1 && key.oif == 0) fib_select_multipath(&key, &res); - else #endif - if (res.prefixlen==0 && res.type == RTN_UNICAST && key.oif == 0) - fib_select_default(&key, &res); if (!key.src) key.src = FIB_RES_PREFSRC(res);