--- v2.2.20-1.0.8/linux/include/net/ip_masq.h Sat Nov 10 00:19:32 2001 +++ linux/include/net/ip_masq.h Sat Nov 10 00:18:12 2001 @@ -79,6 +79,7 @@ #define IP_MASQ_F_NO_SPORT 0x0008 /* no sport set yet */ #define IP_MASQ_F_DLOOSE 0x0010 /* loose dest binding */ +#define IP_MASQ_F_NOREROUTE 0x0020 /* rerouting is not needed */ #define IP_MASQ_F_NO_REPLY 0x0080 /* no reply yet from outside */ #define IP_MASQ_F_HASHED 0x0100 /* hashed entry */ @@ -202,7 +203,7 @@ extern struct list_head ip_masq_d_table[IP_MASQ_TAB_SIZE]; extern const char * ip_masq_state_name(int state); extern struct ip_masq_hook *ip_masq_user_hook; -extern int ip_masq_select_addr(struct sk_buff *skb,__u32 *maddr); +extern int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr, struct ip_masq *ms); /* * * IP_MASQ_APP: IP application masquerading definitions --- v2.2.20-1.0.8/linux/net/ipv4/ip_masq.c Sat Nov 10 00:19:32 2001 +++ linux/net/ipv4/ip_masq.c Sat Nov 10 01:20:10 2001 @@ -55,6 +55,8 @@ * Julian Anastasov : step to mSR after SYN in INPUT_ONLY table * Julian Anastasov : fixed huge expire bug for IPVS after bad checksum * Wensong Zhang : added server status checking for IPVS + * Julian Anastasov : connection rerouting + * Julian Anastasov : incremental checksum updates * */ @@ -1414,6 +1416,108 @@ return ret; } +static int ip_masq_check_tcpudp(struct sk_buff *skb, + struct iphdr *iph, + union ip_masq_tphdr *h, + int size, int doff) +{ + int csum; + + if (h->uh->check == 0 && iph->protocol == IPPROTO_UDP) + return 0; + +#ifdef CONFIG_IP_MASQ_DEBUG + if (ip_masq_get_debug_level() > 3) { + skb->ip_summed = CHECKSUM_NONE; + } +#endif + /* Check that the checksum is OK */ + switch (skb->ip_summed) + { + case CHECKSUM_NONE: + csum = csum_partial(h->raw + doff, size - doff, 0); + skb->csum = csum_partial(h->raw, doff, csum); + case CHECKSUM_HW: + if (csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, skb->csum)) + { + IP_MASQ_DEBUG(0, "Wrong %s checksum in %u.%u.%u.%u->%u.%u.%u.%u (size=%d)!\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + size); + return -1; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; + default: + /* CHECKSUM_UNNECESSARY */ + } + return 0; +} + +static inline u16 ip_masq_check_diff(u32 old, u32 new, u16 oldsum) +{ + u32 diff[2] = { old, new }; + + return csum_fold(csum_partial((char *) diff, sizeof(diff), + oldsum ^ 0xFFFF)); +} + +/* Incremental checksum update */ + +static inline void ip_masq_check_inc_update(union ip_masq_tphdr *h, + u32 oldip, u32 newip, u16 oldport, u16 newport, u8 protocol) +{ + u16 *checkp; + + if (protocol == IPPROTO_TCP) + checkp = &h->th->check; + else + checkp = &h->uh->check; + *checkp = ip_masq_check_diff(~oldip, newip, + ip_masq_check_diff(oldport ^ 0xFFFF, newport, *checkp)); + if (!*checkp && protocol == IPPROTO_UDP) + *checkp = 0xFFFF; +} + +/* Full checksum update */ +static inline void ip_masq_check_full_update(struct iphdr *iph, + union ip_masq_tphdr *h, + int size, int doff, int csum) +{ + switch (iph->protocol) { + case IPPROTO_TCP: + h->th->check = 0; + h->th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, + csum_partial(h->raw , doff, csum)); + IP_MASQ_DEBUG(3, "%s %u.%u.%u.%u->%u.%u.%u.%u csum=%d (+%d)\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + h->th->check, + (char*) & (h->th->check) - (char*) h->raw); + + break; + case IPPROTO_UDP: + h->uh->check = 0; + h->uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, + size, iph->protocol, + csum_partial(h->raw , doff, csum)); + if (h->uh->check == 0) + h->uh->check = 0xFFFF; + IP_MASQ_DEBUG(3, "%s %u.%u.%u.%u->%u.%u.%u.%u csum=%d (+%d)\n", + masq_proto_name(iph->protocol), + NIPQUAD(iph->saddr), + NIPQUAD(iph->daddr), + h->uh->check, + (char*) &(h->uh->check)- (char*) h->raw); + break; + } +} + + int ip_fw_masquerade(struct sk_buff **skb_p, __u32 maddr) { struct sk_buff *skb = *skb_p; @@ -1425,11 +1529,9 @@ /* * doff holds transport protocol data offset * csum holds its checksum - * csum_ok says if csum is valid */ int doff = 0; int csum = 0; - int csum_ok = 0; /* * We can only masquerade protocols with ports... and hack some ICMPs @@ -1438,7 +1540,6 @@ h.raw = (char*) iph + iph->ihl * 4; size = ntohs(iph->tot_len) - (iph->ihl * 4); - doff = proto_doff(iph->protocol, h.raw, size); if (doff <= 0) { /* @@ -1448,65 +1549,8 @@ return -1; } -#ifndef CONFIG_IP_MASQUERADE_VS - /* Lets determine our maddr now, shall we? */ - if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) { - return -1; - } -#endif - - switch (iph->protocol) { - case IPPROTO_ICMP: -#ifdef CONFIG_IP_MASQUERADE_VS - if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) { - return -1; - } -#endif + if (iph->protocol == IPPROTO_ICMP) return(ip_fw_masq_icmp(skb_p, maddr)); - case IPPROTO_UDP: - if (h.uh->check == 0) - /* No UDP checksum */ - break; - case IPPROTO_TCP: - /* Make sure packet is in the masq range */ - IP_MASQ_DEBUG(3, "O-pkt: %s size=%d\n", - masq_proto_name(iph->protocol), - size); - -#ifdef CONFIG_IP_MASQ_DEBUG - if (ip_masq_get_debug_level() > 3) { - skb->ip_summed = CHECKSUM_NONE; - } -#endif - /* Check that the checksum is OK */ - switch (skb->ip_summed) - { - case CHECKSUM_NONE: - { - csum = csum_partial(h.raw + doff, size - doff, 0); - IP_MASQ_DEBUG(3, "O-pkt: %s I-datacsum=%d\n", - masq_proto_name(iph->protocol), - csum); - - skb->csum = csum_partial(h.raw , doff, csum); - } - case CHECKSUM_HW: - if (csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, skb->csum)) - { - IP_MASQ_DEBUG(0, "Outgoing failed %s checksum from %d.%d.%d.%d (size=%d)!\n", - masq_proto_name(iph->protocol), - NIPQUAD(iph->saddr), - size); - return -1; - } - default: - /* CHECKSUM_UNNECESSARY */ - } - break; - default: - return -1; - } /* * Now hunt the list to see if we have an old entry */ @@ -1520,17 +1564,16 @@ ms = ip_masq_out_get_iph(iph); if (ms!=NULL) { -#ifdef CONFIG_IP_MASQUERADE_VS - if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) { - /* - * Drop this packet but don't - * start the timer from the beginning - */ - __ip_masq_put(ms); - add_sltimer(&ms->timer); + + if (ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + ip_masq_put(ms); return -1; } -#endif /* * If sysctl !=0 and no pkt has been received yet @@ -1539,6 +1582,11 @@ */ if ( sysctl_ip_dynaddr && ms->flags & IP_MASQ_F_NO_REPLY && maddr != ms->maddr) { + if (!ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + if (sysctl_ip_dynaddr > 1) { IP_MASQ_INFO( "ip_fw_masquerade(): change masq.addr from %d.%d.%d.%d to %d.%d.%d.%d\n", NIPQUAD(ms->maddr),NIPQUAD(maddr)); @@ -1561,6 +1609,11 @@ if ( ms->flags & IP_MASQ_F_NO_SPORT && ms->protocol == IPPROTO_TCP ) { + if (!ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -1592,6 +1645,9 @@ iph->saddr, h.portp[0]); read_unlock(&__ip_vs_lock); if (dest) { + if (ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) + return -1; + /* * Notify the real server: there is * no existing entry if it is not RST packet @@ -1602,11 +1658,10 @@ ICMP_PORT_UNREACH, 0); return -1; } +#endif - if (!maddr && (ip_masq_select_addr(skb,&maddr) < 0)) { + if (!maddr && (ip_masq_select_addr(skb,&maddr,0) < 0)) return -1; - } -#endif /* * Nope, not found, create a new entry for it @@ -1624,6 +1679,10 @@ return -1; if (!ms->app && skb->fwmark) ip_masq_bind_app_fwmark(ms, skb->fwmark); + if (ms->app && ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } } /* @@ -1640,19 +1699,20 @@ size = skb->len - (h.raw - skb->nh.raw); + if (!ms->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { + /* Only port and addr are changed, make fast csum update */ + ip_masq_check_inc_update(&h, iph->saddr, ms->maddr, + h.portp[0], ms->mport, iph->protocol); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + /* * Set iph addr and port from ip_masq obj. */ iph->saddr = ms->maddr; h.portp[0] = ms->mport; - /* - * Invalidate csum saving if tunnel has masq helper - */ - - if (ms->app) - csum_ok = 0; - /* * Attempt ip_masq_app call. * will fix ip_masq and iph seq stuff @@ -1677,44 +1737,16 @@ * Transport's payload partial csum */ - if (!csum_ok) { + if (ms->app) { csum = csum_partial(h.raw + doff, size - doff, 0); + skb->csum = csum; + IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n", + masq_proto_name(iph->protocol), + size, + csum); + ip_masq_check_full_update(iph, &h, size, doff, csum); } - skb->csum = csum; - - IP_MASQ_DEBUG(3, "O-pkt: %s size=%d O-datacsum=%d\n", - masq_proto_name(iph->protocol), - size, - csum); - - /* - * Protocol csum - */ - switch (iph->protocol) { - case IPPROTO_TCP: - h.th->check = 0; - h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", - masq_proto_name(iph->protocol), - h.th->check, - (char*) & (h.th->check) - (char*) h.raw); - break; - case IPPROTO_UDP: - h.uh->check = 0; - h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - if (h.uh->check == 0) - h.uh->check = 0xFFFF; - IP_MASQ_DEBUG(3, "O-pkt: %s O-csum=%d (+%d)\n", - masq_proto_name(iph->protocol), - h.uh->check, - (char*) &(h.uh->check)- (char*) h.raw); - break; - } ip_send_check(iph); IP_MASQ_DEBUG(2, "O-routed from %08X:%04X with masq.addr %08X\n", @@ -1857,6 +1889,8 @@ iph->daddr, icmp_hv_req(icmph)); if (ms == NULL) { + if (!maddr && (ip_masq_select_addr(skb,&maddr,0) < 0)) + return -1; ms = ip_masq_new(iph->protocol, maddr, 0, iph->saddr, icmp_id(icmph), @@ -1866,6 +1900,10 @@ return (-1); IP_MASQ_DEBUG(1, "Created new icmp entry\n"); } + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + ip_masq_put(ms); + return -1; + } /* Rewrite source address */ /* @@ -1957,6 +1995,11 @@ if (ms == NULL) return 0; + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + __ip_masq_put(ms); + return -1; + } + /* Now we do real damage to this packet...! */ /* First change the source IP address, and recalc checksum */ iph->saddr = ms->maddr; @@ -2042,6 +2085,11 @@ if (ms == NULL) return 0; + if (!maddr && (ip_masq_select_addr(skb,&maddr,ms) < 0)) { + __ip_masq_put(ms); + return -1; + } + #ifdef CONFIG_IP_MASQUERADE_VS if (IP_MASQ_VS_FWD(ms) != 0) { IP_VS_INFO("shouldn't get here, because tun/dr is on the half connection\n"); @@ -2462,7 +2510,6 @@ unsigned short size; int doff = 0; int csum = 0; - int csum_ok = 0; __u32 maddr; #ifdef CONFIG_IP_MASQUERADE_VS struct ip_vs_service *svc = NULL; @@ -2528,37 +2575,6 @@ #endif && atomic_read(&mport_count) == 0 ) return 0; - - /* Check that the checksum is OK */ - if ((iph->protocol == IPPROTO_UDP) && (h.uh->check == 0)) - /* No UDP checksum */ - break; -#ifdef CONFIG_IP_MASQ_DEBUG - if (ip_masq_get_debug_level() > 3) { - skb->ip_summed = CHECKSUM_NONE; - } -#endif - - switch (skb->ip_summed) - { - case CHECKSUM_NONE: - csum = csum_partial(h.raw + doff, size - doff, 0); - csum_ok++; - skb->csum = csum_partial(h.raw , doff, csum); - - case CHECKSUM_HW: - if (csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, skb->csum)) - { - IP_MASQ_DEBUG(0, "Incoming failed %s checksum from %d.%d.%d.%d (size=%d)!\n", - masq_proto_name(iph->protocol), - NIPQUAD(iph->saddr), - size); - return -1; - } - default: - /* CHECKSUM_UNNECESSARY */ - } break; default: return 0; @@ -2624,8 +2640,11 @@ * masquerading entry. */ ms = ip_vs_schedule(svc, iph); - if (!ms) + if (!ms) { + if (ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) + return -1; return ip_vs_leave(svc, skb); + } ip_vs_conn_stats(ms, svc); } #endif /* CONFIG_IP_MASQUERADE_VS */ @@ -2633,6 +2652,12 @@ if (ms != NULL) { + if (ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + /* * got reply, so clear flag */ @@ -2652,6 +2677,12 @@ } else { if ( ms->flags & IP_MASQ_F_NO_DPORT ) { /* && ms->protocol == IPPROTO_TCP ) { */ + if (!ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -2667,6 +2698,12 @@ } if (ms->flags & IP_MASQ_F_NO_DADDR ) { /* && ms->protocol == IPPROTO_TCP) { */ + if (!ms->app && + ip_masq_check_tcpudp(skb, iph, &h, size, doff) < 0) { + ip_masq_put(ms); + return -1; + } + write_lock(&__ip_masq_lock); ip_masq_unhash(ms); @@ -2715,16 +2752,17 @@ return -1; } + if (!ms->app && (iph->protocol != IPPROTO_UDP || h.uh->check != 0)) { + /* Only port and addr are changed, make fast csum update */ + ip_masq_check_inc_update(&h, iph->daddr, ms->saddr, + h.portp[1], ms->sport, iph->protocol); + if (skb->ip_summed == CHECKSUM_HW) + skb->ip_summed = CHECKSUM_NONE; + } + iph->daddr = ms->saddr; h.portp[1] = ms->sport; - /* - * Invalidate csum saving if tunnel has masq helper - */ - - if (ms->app) - csum_ok = 0; - /* * Attempt ip_masq_app call. * will fix ip_masq and iph ack_seq stuff @@ -2742,37 +2780,10 @@ size = ntohs(iph->tot_len) - (iph->ihl * 4); } - /* - * Yug! adjust UDP/TCP checksums - */ - - /* - * Transport's payload partial csum - */ - - if (!csum_ok) { + if (ms->app) { csum = csum_partial(h.raw + doff, size - doff, 0); - } - skb->csum = csum; - - /* - * Protocol csum - */ - switch (iph->protocol) { - case IPPROTO_TCP: - h.th->check = 0; - h.th->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - break; - case IPPROTO_UDP: - h.uh->check = 0; - h.uh->check=csum_tcpudp_magic(iph->saddr, iph->daddr, - size, iph->protocol, - csum_partial(h.raw , doff, csum)); - if (h.uh->check == 0) - h.uh->check = 0xFFFF; - break; + skb->csum = csum; + ip_masq_check_full_update(iph, &h, size, doff, csum); } ip_send_check(iph); @@ -2790,7 +2801,8 @@ * is found or created. Furthermore, send DEST_UNREACH icmp * packet to clients if it is not RST or it is not TCP. */ - if (!h.th->rst || iph->protocol != IPPROTO_TCP) { + if ((!h.th->rst || iph->protocol != IPPROTO_TCP) && + !ip_masq_check_tcpudp(skb, iph, &h, size, doff)) { icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); } return -1; @@ -3146,23 +3158,55 @@ #endif /* CONFIG_PROC_FS */ /* - * Determine maddr from skb + * Determine maddr and optionally reroute the packet */ -int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr) +int ip_masq_select_addr(struct sk_buff *skb, __u32 *maddr, struct ip_masq *ms) { - struct rtable *rt; - struct rtable *skb_rt = (struct rtable*)skb->dst; - struct device *skb_dev = skb_rt->u.dst.dev; - struct iphdr *iph = skb->nh.iph; + struct rtable *rt; + struct rtable *skb_rt = (struct rtable*)skb->dst; + struct device *skb_dev = skb_rt->u.dst.dev; + struct iphdr *iph = skb->nh.iph; - if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos)|RTO_CONN, skb_dev?skb_dev->ifindex:0)) { - return -1; - } else { - /* Route lookup succeeded */ + if (ms && !(ms->flags & IP_MASQ_F_NO_REPLY)) + *maddr = ms->maddr; + + /* + * For now we call ip_route_output almost each time, i.e. we + * are not sure when the route cache entries expire + * probably after a route change (bad for multipath). + * The route lookup is avoided if: + * - we already know maddr and + * - the route to the destination is not gatewayed (still link + * can fail, so may be this is disabled) + * i.e. usually only for local networks which is not so good. + * We don't have a way to determine whether the skb_rt uses + * multipath route. In any case, these optimizations are + * still questionable when route changes take place. + */ + if ((ms && !(ms->flags & IP_MASQ_F_NOREROUTE)) || !*maddr) { + if (ip_route_output(&rt, iph->daddr, *maddr, + RT_TOS(iph->tos)|RTO_CONN, + (!*maddr && skb_dev)?skb_dev->ifindex:0) || + RTN_UNICAST != rt->rt_type) + return -1; *maddr = rt->rt_src; - ip_rt_put(rt); - return 0; + if (rt->rt_gateway != skb_rt->rt_gateway || + skb_dev != rt->u.dst.dev) { + dst_release(skb->dst); + skb->dst = &rt->u.dst; + if (ms) + ms->flags &= ~IP_MASQ_F_NOREROUTE; + } else { + /* Sorry, for now we always use ip_route_output */ + /* + if (ms && !(ms->flags & IP_MASQ_F_NO_REPLY) && + skb_rt->rt_gateway == skb_rt->rt_dst) + ms->flags |= IP_MASQ_F_NOREROUTE; + */ + ip_rt_put(rt); + } } + return 0; } /* --- v2.2.20-1.0.8/linux/net/ipv4/ip_forward.c Sat Nov 10 00:19:32 2001 +++ linux/net/ipv4/ip_forward.c Sat Nov 10 00:51:01 2001 @@ -193,9 +193,20 @@ return -1; } - if (fw_res) + if (fw_res) { + rt = (struct rtable*)skb->dst; + dev2 = rt->u.dst.dev; + mtu = rt->u.dst.pmtu; + if ((skb = skb_cow(skb, dev2->hard_header_len)) == NULL) + return -1; + iph = skb->nh.iph; + opt = &(IPCB(skb)->opt); + if (opt->is_strictroute && + rt->rt_dst != rt->rt_gateway) + goto sr_failed; /* ICMP matched - skip firewall */ goto skip_call_fw_firewall; + } #ifdef CONFIG_IP_MASQUERADE_ICMP } #endif @@ -241,8 +252,16 @@ /* * Masquerader may have changed skb */ + rt = (struct rtable*)skb->dst; + dev2 = rt->u.dst.dev; + mtu = rt->u.dst.pmtu; + if ((skb = skb_cow(skb, dev2->hard_header_len)) == NULL) + return -1; iph = skb->nh.iph; opt = &(IPCB(skb)->opt); + if (opt->is_strictroute && + rt->rt_dst != rt->rt_gateway) + goto sr_failed; } } #endif