--- v2.2.20/linux/include/net/ip_masq.h Sat Aug 4 12:52:32 2001 +++ linux/include/net/ip_masq.h Sun Feb 3 16:44:50 2002 @@ -76,6 +76,7 @@ #define IP_MASQ_F_NO_SPORT 0x0008 /* no sport set yet */ #define IP_MASQ_F_DLOOSE 0x0010 /* loose dest binding */ +#define IP_MASQ_F_NOREROUTE 0x0020 /* rerouting is not needed */ #define IP_MASQ_F_NO_REPLY 0x0080 /* no reply yet from outside */ #define IP_MASQ_F_HASHED 0x0100 /* hashed entry */ @@ -179,7 +180,7 @@ extern struct list_head ip_masq_d_table[IP_MASQ_TAB_SIZE]; extern const char * ip_masq_state_name(int state); extern struct ip_masq_hook *ip_masq_user_hook; -extern u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope); +extern int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr, struct ip_masq *ms); /* * * IP_MASQ_APP: IP application masquerading definitions --- v2.2.20/linux/net/ipv4/ip_masq.c Sat Aug 4 12:52:33 2001 +++ linux/net/ipv4/ip_masq.c Sun Feb 3 16:45:47 2002 @@ -50,6 +50,8 @@ * Kai Bankett : do not toss other IP protos in proto_doff() * Dan Kegel : pointed correct NAT behavior for UDP streams * Julian Anastasov : use daddr and dport as hash keys + * Julian Anastasov : connection rerouting + * Julian Anastasov : incremental checksum updates * */ @@ -1119,6 +1121,108 @@ return ret; } +static int ip_masq_check_tcpudp(struct sk_buff *skb, + struct iphdr *iph, + union ip_masq_tphdr *h, + int size, int doff) +{ + int csum; + + if (h->uh->check == 0 && iph->protocol == IPPROTO_UDP) + return 0; + +#ifdef CONFIG_IP_MASQ_DEBUG + if (ip_masq_get_debug_level() > 3) { + skb->ip_summed = CHECKSUM_NONE; + } +#endif + /* Check that the checksum is OK */ + switch (skb->ip_summed) + { + case CHECKSUM_NONE: + csum = csum_partial(h->raw + doff, size - doff, 0); + skb->csum = csum_partial(h->raw, doff, csum); + case CHECKSUM_HW: + if (csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, skb->csum)) + { + IP_MASQ_DEBUG(0, "Wrong %s checksum in %u.%u.%u.%u->%u.%u.%u.%u (size=%d)!\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + size); + return -1; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + default: + /* CHECKSUM_UNNECESSARY */ + } + return 0; +} + +static inline u16 ip_masq_check_diff(u32 old, u32 new, u16 oldsum) +{ + u32 diff[2] = { old, new }; + + return csum_fold(csum_partial((char *) diff, sizeof(diff), + oldsum ^ 0xFFFF)); +} + +/* Incremental checksum update */ + +static inline void ip_masq_check_inc_update(union ip_masq_tphdr *h, + u32 oldip, u32 newip, u16 oldport, u16 newport, u8 protocol) +{ + u16 *checkp; + + if (protocol == IPPROTO_TCP) + checkp = &h->th->check; + else + checkp = &h->uh->check; + *checkp = ip_masq_check_diff(~oldip, newip, + ip_masq_check_diff(oldport ^ 0xFFFF, newport, *checkp)); + if (!*checkp && protocol == IPPROTO_UDP) + *checkp = 0xFFFF; +} + +/* Full checksum update */ +static inline void ip_masq_check_full_update(struct iphdr *iph, + union ip_masq_tphdr *h, + int size, int doff, int csum) +{ + switch (iph->protocol) { + case IPPROTO_TCP: + h->th->check = 0; + h->th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, + csum_partial(h->raw , doff, csum)); + IP_MASQ_DEBUG(3, "%s %u.%u.%u.%u->%u.%u.%u.%u csum=%d (+%d)\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + h->th->check, + (char*) & (h->th->check) - (char*) h->raw); + + break; + case IPPROTO_UDP: + h->uh->check = 0; + h->uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, + csum_partial(h->raw , doff, csum)); + if (h->uh->check == 0) + h->uh->check = 0xFFFF; + IP_MASQ_DEBUG(3, "%s %u.%u.%u.%u->%u.%u.%u.%u csum=%d (+%d)\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + h->uh->check, + (char*) &(h->uh->check)- (char*) h->raw); + break; + } +} + + int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb = *skb_p; @@ -1130,11 +1234,9 @@ /* * doff holds transport protocol data offset * csum holds its checksum - * csum_ok says if csum is valid */ int doff = 0; int csum = 0; - int csum_ok = 0; /* * We can only masquerade protocols with ports... and hack some ICMPs @@ -1143,7 +1245,6 @@ h.raw = (char*) iph + iph->ihl * 4; size = ntohs(iph->tot_len) - (iph->ihl * 4); - doff = proto_doff(iph->protocol, h.raw, size); if (doff <= 0) { /* @@ -1153,70 +1254,8 @@ return -1; } - /* Lets determine our maddr now, shall we? */ - if (maddr == 0) { - struct rtable *rt; - struct rtable *skb_rt = (struct rtable*)skb->dst; - struct device *skb_dev = skb_rt->u.dst.dev; - - if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos)|RTO_CONN, skb_dev?skb_dev->ifindex:0)) { - /* Fallback on old method */ - /* This really shouldn't happen... */ - maddr = inet_select_addr(skb_dev, skb_rt->rt_gateway, RT_SCOPE_UNIVERSE); - } else { - /* Route lookup succeeded */ - maddr = rt->rt_src; - ip_rt_put(rt); - } - } - - switch (iph->protocol) { - case IPPROTO_ICMP: + if (iph->protocol == IPPROTO_ICMP) return(ip_fw_masq_icmp(skb_p, maddr)); - case IPPROTO_UDP: - if (h.uh->check == 0) - /* No UDP checksum */ - break; - case IPPROTO_TCP: - /* Make sure packet is in the masq range */ - IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n", - masq_proto_name(iph->protocol), - size); - -#ifdef CONFIG_IP_MASQ_DEBUG - if (ip_masq_get_debug_level() > 3) { - skb->ip_summed = CHECKSUM_NONE; - } -#endif - /* Check that the checksum is OK */ - switch (skb->ip_summed) - { - case CHECKSUM_NONE: - { - csum = csum_partial(h.raw + doff, size - doff, 0); - IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n", - masq_proto_name(iph->protocol), - csum); - - skb->csum = csum_partial(h.raw , doff, csum); - } - case CHECKSUM_HW: - if (csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, skb->csum)) - { - IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n", - masq_proto_name(iph->protocol), - NIPQUAD(iph->saddr), - size); - return -1; - } - default: - /* CHECKSUM_UNNECESSARY */ - } - break; - default: - return -1; - } /* * Now hunt the list to see if we have an old entry */ @@ -1231,6 +1270,16 @@ ms = ip_masq_out_get_iph(iph); if (ms!=NULL) { + if (ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + ip_masq_put(ms); + return -1; + } + /* * If sysctl !=0 and no pkt has been received yet * in this tunnel and routing iface address has changed... @@ -1238,6 +1287,11 @@ */ if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) { + if (!ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + if (sysctl_ip_dynaddr > 1) { IP_MASQ_INFO( "ip_fw_masquerade(): change masq.addr from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(ms->maddr),NIPQUAD(maddr)); @@ -1260,6 +1314,11 @@ if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) { + if (!ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -1284,6 +1343,9 @@ * Nope, not found, create a new entry for it */ + if (!maddr && (ip_masq_select_addr(skb,&maddr,0) < 0)) + return -1; + #ifdef CONFIG_IP_MASQUERADE_MOD if (!(ms = ip_masq_mod_out_create(skb, iph, maddr))) #endif @@ -1296,6 +1358,10 @@ return -1; if (!ms->app && skb->fwmark) ip_masq_bind_app_fwmark(ms, skb->fwmark); + if (ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } } /* @@ -1312,19 +1378,20 @@ size = skb->len - (h.raw - skb->nh.raw); + if (!ms->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { + /* Only port and addr are changed, make fast csum update */ + ip_masq_check_inc_update(&h, iph->saddr, ms->maddr, + h.portp[0], ms->mport, iph->protocol); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + /* * Set iph addr and port from ip_masq obj. */ iph->saddr = ms->maddr; h.portp[0] = ms->mport; - /* - * Invalidate csum saving if tunnel has masq helper - */ - - if (ms->app) - csum_ok = 0; - /* * Attempt ip_masq_app call. * will fix ip_masq and iph seq stuff @@ -1349,44 +1416,16 @@ * Transport's payload partial csum */ - if (!csum_ok) { + if (ms->app) { csum = csum_partial(h.raw + doff, size - doff, 0); + skb->csum = csum; + IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n", + masq_proto_name(iph->protocol), + size, + csum); + ip_masq_check_full_update(iph, &h, size, doff, csum); } - skb->csum = csum; - - IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n", - masq_proto_name(iph->protocol), - size, - csum); - - /* - * Protocol csum - */ - switch (iph->protocol) { - case IPPROTO_TCP: - h.th->check = 0; - h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", - masq_proto_name(iph->protocol), - h.th->check, - (char*) & (h.th->check) - (char*) h.raw); - break; - case IPPROTO_UDP: - h.uh->check = 0; - h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - if (h.uh->check == 0) - h.uh->check = 0xFFFF; - IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", - masq_proto_name(iph->protocol), - h.uh->check, - (char*) &(h.uh->check)- (char*) h.raw); - break; - } ip_send_check(iph); IP_MASQ_DEBUG(2, "O-routed from %08X:%04X with masq.addr %08X\n", @@ -1511,6 +1550,8 @@ iph->daddr, icmp_hv_req(icmph)); if (ms == NULL) { + if (!maddr && (ip_masq_select_addr(skb,&maddr,0) < 0)) + return -1; ms = ip_masq_new(iph->protocol, maddr, 0, iph->saddr, icmp_id(icmph), @@ -1520,6 +1561,10 @@ return (-1); IP_MASQ_DEBUG(1, "Created new icmp entry\n"); } + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + ip_masq_put(ms); + return -1; + } /* Rewrite source address */ /* @@ -1611,6 +1656,11 @@ if (ms == NULL) return 0; + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + __ip_masq_put(ms); + return -1; + } + /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = ms->maddr; @@ -1689,6 +1739,11 @@ if (ms == NULL) return 0; + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + __ip_masq_put(ms); + return -1; + } + /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = ms->maddr; @@ -1996,7 +2051,6 @@ unsigned short size; int doff = 0; int csum = 0; - int csum_ok = 0; __u32 maddr; /* @@ -2051,37 +2105,6 @@ #endif && atomic_read(&mport_count) == 0 ) return 0; - - /* Check that the checksum is OK */ - if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0)) - /* No UDP checksum */ - break; -#ifdef CONFIG_IP_MASQ_DEBUG - if (ip_masq_get_debug_level() > 3) { - skb->ip_summed = CHECKSUM_NONE; - } -#endif - - switch (skb->ip_summed) - { - case CHECKSUM_NONE: - csum = csum_partial(h.raw + doff, size - doff, 0); - csum_ok++; - skb->csum = csum_partial(h.raw , doff, csum); - - case CHECKSUM_HW: - if (csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, skb->csum)) - { - IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n", - masq_proto_name(iph->protocol), - NIPQUAD(iph->saddr), - size); - return -1; - } - default: - /* CHECKSUM_UNNECESSARY */ - } break; default: return 0; @@ -2120,6 +2143,12 @@ if (ms != NULL) { + if (ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + /* * got reply, so clear flag */ @@ -2139,6 +2168,12 @@ } else { if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /* && ms->protocol == IPPROTO_TCP ) { */ + if (!ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -2154,6 +2189,12 @@ } if (ms->flags & IP_MASQ_F_NO_DADDR ) { /* && ms->protocol == IPPROTO_TCP) { */ + if (!ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -2172,15 +2213,17 @@ ip_masq_put(ms); return -1; } - iph->daddr = ms->saddr; - h.portp[1] = ms->sport; - /* - * Invalidate csum saving if tunnel has masq helper - */ + if (!ms->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { + /* Only port and addr are changed, make fast csum update */ + ip_masq_check_inc_update(&h, iph->daddr, ms->saddr, + h.portp[1], ms->sport, iph->protocol); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } - if (ms->app) - csum_ok = 0; + iph->daddr = ms->saddr; + h.portp[1] = ms->sport; /* * Attempt ip_masq_app call. @@ -2199,37 +2242,10 @@ size = ntohs(iph->tot_len) - (iph->ihl * 4); } - /* - * Yug! adjust UDP/TCP checksums - */ - - /* - * Transport's payload partial csum - */ - - if (!csum_ok) { + if (ms->app) { csum = csum_partial(h.raw + doff, size - doff, 0); - } - skb->csum = csum; - - /* - * Protocol csum - */ - switch (iph->protocol) { - case IPPROTO_TCP: - h.th->check = 0; - h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - break; - case IPPROTO_UDP: - h.uh->check = 0; - h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - if (h.uh->check == 0) - h.uh->check = 0xFFFF; - break; + skb->csum = csum; + ip_masq_check_full_update(iph, &h, size, doff, csum); } ip_send_check(iph); @@ -2530,11 +2546,58 @@ } #endif /* CONFIG_PROC_FS */ /* - * Wrapper over inet_select_addr() + * Determine maddr and optionally reroute the packet */ -u32 ip_masq_select_addr(struct device *dev, u32 dst, int scope) +int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr, struct ip_masq *ms) { - return inet_select_addr(dev, dst, scope); + struct rtable *rt; + struct rtable *skb_rt = (struct rtable*)skb->dst; + struct device *skb_dev = skb_rt->u.dst.dev; + struct iphdr *iph = skb->nh.iph; + + if (ms && !(ms->flags & IP_MASQ_F_NO_REPLY)) + *maddr = ms->maddr; + + /* + * For now we call ip_route_output almost each time, i.e. we + * are not sure when the route cache entries expire + * probably after a route change (bad for multipath). + * The route lookup is avoided if: + * - we already know maddr and + * - the route to the destination is not gatewayed (still link + * can fail, so may be this is disabled) + * i.e. usually only for local networks which is not so good. + * We don't have a way to determine whether the skb_rt uses + * multipath route. In any case, these optimizations are + * still questionable when route changes take place. + */ + if ((ms && !(ms->flags & IP_MASQ_F_NOREROUTE)) || !*maddr) { + if (ip_route_output(&rt, iph->daddr, *maddr, + RT_TOS(iph->tos)|RTO_CONN, + (!*maddr && skb_dev)?skb_dev->ifindex:0)) + return -1; + if (RTN_UNICAST != rt->rt_type && RTN_NAT != rt->rt_type) { + ip_rt_put(rt); + return -1; + } + *maddr = rt->rt_src; + if (rt->rt_gateway != skb_rt->rt_gateway || + skb_dev != rt->u.dst.dev) { + dst_release(skb->dst); + skb->dst = &rt->u.dst; + if (ms) + ms->flags &= ~IP_MASQ_F_NOREROUTE; + } else { + /* Sorry, for now we always use ip_route_output */ + /* + if (ms && !(ms->flags & IP_MASQ_F_NO_REPLY) && + skb_rt->rt_gateway == skb_rt->rt_dst) + ms->flags |= IP_MASQ_F_NOREROUTE; + */ + ip_rt_put(rt); + } + } + return 0; } /* --- v2.2.20/linux/net/ipv4/ip_forward.c Sat Oct 21 12:11:59 2000 +++ linux/net/ipv4/ip_forward.c Sun Feb 3 16:44:50 2002 @@ -182,9 +182,20 @@ return -1; } - if (fw_res) + if (fw_res) { + rt = (struct rtable*)skb->dst; + dev2 = rt->u.dst.dev; + mtu = rt->u.dst.pmtu; + if ((skb = skb_cow(skb, dev2->hard_header_len)) == NULL) + return -1; + iph = skb->nh.iph; + opt = &(IPCB(skb)->opt); + if (opt->is_strictroute && + rt->rt_dst != rt->rt_gateway) + goto sr_failed; /* ICMP matched - skip firewall */ goto skip_call_fw_firewall; + } #ifdef CONFIG_IP_MASQUERADE_ICMP } #endif @@ -230,8 +241,16 @@ /* * Masquerader may have changed skb */ + rt = (struct rtable*)skb->dst; + dev2 = rt->u.dst.dev; + mtu = rt->u.dst.pmtu; + if ((skb = skb_cow(skb, dev2->hard_header_len)) == NULL) + return -1; iph = skb->nh.iph; opt = &(IPCB(skb)->opt); + if (opt->is_strictroute && + rt->rt_dst != rt->rt_gateway) + goto sr_failed; } } #endif