📄 ipv4.c
字号:
/* * net/dccp/ipv4.c * * An implementation of the DCCP protocol * Arnaldo Carvalho de Melo <acme@conectiva.com.br> * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */#include <linux/dccp.h>#include <linux/icmp.h>#include <linux/module.h>#include <linux/skbuff.h>#include <linux/random.h>#include <net/icmp.h>#include <net/inet_common.h>#include <net/inet_hashtables.h>#include <net/inet_sock.h>#include <net/protocol.h>#include <net/sock.h>#include <net/timewait_sock.h>#include <net/tcp_states.h>#include <net/xfrm.h>#include "ackvec.h"#include "ccid.h"#include "dccp.h"#include "feat.h"/* * This is the global socket data structure used for responding to * the Out-of-the-blue (OOTB) packets. A control sock will be created * for this socket at the initialization time. */static struct socket *dccp_v4_ctl_socket;static int dccp_v4_get_port(struct sock *sk, const unsigned short snum){ return inet_csk_get_port(&dccp_hashinfo, sk, snum, inet_csk_bind_conflict);}int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len){ struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; __be32 daddr, nexthop; int tmp; int err; dp->dccps_role = DCCP_ROLE_CLIENT; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; if (usin->sin_family != AF_INET) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; if (inet->opt != NULL && inet->opt->srr) { if (daddr == 0) return -EINVAL; nexthop = inet->opt->faddr; } tmp = ip_route_connect(&rt, nexthop, inet->saddr, RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, IPPROTO_DCCP, inet->sport, usin->sin_port, sk, 1); if (tmp < 0) return tmp; if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } if (inet->opt == NULL || !inet->opt->srr) daddr = rt->rt_dst; if (inet->saddr == 0) inet->saddr = rt->rt_src; inet->rcv_saddr = inet->saddr; inet->dport = usin->sin_port; inet->daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; if (inet->opt != NULL) inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket * lock select source port, enter ourselves into the hash tables and * complete initialization after this. */ dccp_set_state(sk, DCCP_REQUESTING); err = inet_hash_connect(&dccp_death_row, sk); if (err != 0) goto failure; err = ip_route_newports(&rt, IPPROTO_DCCP, inet->sport, inet->dport, sk); if (err != 0) goto failure; /* OK, now commit destination to socket. */ sk_setup_caps(sk, &rt->u.dst); dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr, inet->sport, inet->dport); inet->id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; if (err != 0) goto failure;out: return err;failure: /* * This unhashes the socket and releases the local port, if necessary. */ dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; inet->dport = 0; goto out;}EXPORT_SYMBOL_GPL(dccp_v4_connect);/* * This routine does path mtu discovery as defined in RFC1191. */static inline void dccp_do_pmtu_discovery(struct sock *sk, const struct iphdr *iph, u32 mtu){ struct dst_entry *dst; const struct inet_sock *inet = inet_sk(sk); const struct dccp_sock *dp = dccp_sk(sk); /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs * send out by Linux are always < 576bytes so they should go through * unfragmented). */ if (sk->sk_state == DCCP_LISTEN) return; /* We don't check in the destentry if pmtu discovery is forbidden * on this route. We just assume that no packet_to_big packets * are send back when pmtu discovery is not active. * There is a small race when the user changes this flag in the * route, but I think that's acceptable. */ if ((dst = __sk_dst_check(sk, 0)) == NULL) return; dst->ops->update_pmtu(dst, mtu); /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst)) sk->sk_err_soft = EMSGSIZE; mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); /* * From RFC 4340, sec. 14.1: * * DCCP-Sync packets are the best choice for upward * probing, since DCCP-Sync probes do not risk application * data loss. */ dccp_send_sync(sk, dp->dccps_gsr, DCCP_PKT_SYNC); } /* else let the usual retransmit timer handle it */}/* * This routine is called by the ICMP module when it gets some sort of error * condition. If err < 0 then the socket should be closed and the error * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code. * After adjustment header points to the first 8 bytes of the tcp header. We * need to find the appropriate port. * * The locking strategy used here is very "optimistic". When someone else * accesses the socket the ICMP is just dropped and for some paths there is no * check at all. A more general error queue to queue errors for later handling * is probably better. */static void dccp_v4_err(struct sk_buff *skb, u32 info){ const struct iphdr *iph = (struct iphdr *)skb->data; const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2)); struct dccp_sock *dp; struct inet_sock *inet; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; if (skb->len < (iph->ihl << 2) + 8) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { inet_twsk_put(inet_twsk(sk)); return; } bh_lock_sock(sk); /* If too many ICMPs get dropped on busy * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); seq = dccp_hdr_seq(dh); if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && !between48(seq, dp->dccps_swl, dp->dccps_swh)) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } switch (type) { case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; case ICMP_PARAMETERPROB: err = EPROTO; break; case ICMP_DEST_UNREACH: if (code > NR_ICMP_UNREACH) goto out; if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */ if (!sock_owned_by_user(sk)) dccp_do_pmtu_discovery(sk, iph, info); goto out; } err = icmp_err_convert[code].errno; break; case ICMP_TIME_EXCEEDED: err = EHOSTUNREACH; break; default: goto out; } switch (sk->sk_state) { struct request_sock *req , **prev; case DCCP_LISTEN: if (sock_owned_by_user(sk)) goto out; req = inet_csk_search_req(sk, &prev, dh->dccph_dport, iph->daddr, iph->saddr); if (!req) goto out; /* * ICMPs are not backlogged, hence we cannot get an established * socket here. */ BUG_TRAP(!req->sk); if (seq != dccp_rsk(req)->dreq_iss) { NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* * Still in RESPOND, just remove it silently. * There is no good way to pass the error to the newly * created socket, and POSIX does not want network * errors returned from accept(). */ inet_csk_reqsk_queue_drop(sk, req, prev); goto out; case DCCP_REQUESTING: case DCCP_RESPOND: if (!sock_owned_by_user(sk)) { DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); sk->sk_err = err; sk->sk_error_report(sk); dccp_done(sk); } else sk->sk_err_soft = err; goto out; } /* If we've already connected we will keep trying * until we time out, or the user gives up. * * rfc1122 4.2.3.9 allows to consider as hard errors * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too, * but it is obsoleted by pmtu discovery). * * Note, that in modern internet, where routing is unreliable * and in each dark corner broken firewalls sit, sending random * errors ordered by their masters even this two messages finally lose * their original sense (even Linux sends invalid PORT_UNREACHs) * * Now we are in compliance with RFCs. * --ANK (980905) */ inet = inet_sk(sk); if (!sock_owned_by_user(sk) && inet->recverr) { sk->sk_err = err; sk->sk_error_report(sk); } else /* Only an error on timeout */ sk->sk_err_soft = err;out: bh_unlock_sock(sk); sock_put(sk);}static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, __be32 src, __be32 dst){ return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);}void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb){ const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); dccp_csum_outgoing(skb); dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr);}EXPORT_SYMBOL_GPL(dccp_v4_send_check);static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb){ return secure_dccp_sequence_number(ip_hdr(skb)->daddr, ip_hdr(skb)->saddr, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport);}/* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. * * This is the equivalent of TCP's tcp_v4_syn_recv_sock */struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct dst_entry *dst){ struct inet_request_sock *ireq; struct inet_sock *newinet; struct sock *newsk; if (sk_acceptq_is_full(sk)) goto exit_overflow; if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) goto exit; newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) goto exit; sk_setup_caps(newsk, dst); newinet = inet_sk(newsk); ireq = inet_rsk(req); newinet->daddr = ireq->rmt_addr; newinet->rcv_saddr = ireq->loc_addr; newinet->saddr = ireq->loc_addr; newinet->opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->id = jiffies; dccp_sync_mss(newsk, dst_mtu(dst)); __inet_hash(&dccp_hashinfo, newsk, 0); __inet_inherit_port(&dccp_hashinfo, sk, newsk); return newsk;exit_overflow: NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);exit: NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); dst_release(dst); return NULL;}EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock);static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb){ const struct dccp_hdr *dh = dccp_hdr(skb); const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ struct request_sock *req = inet_csk_search_req(sk, &prev, dh->dccph_sport, iph->saddr, iph->daddr); if (req != NULL) return dccp_check_req(sk, skb, req, prev); nsk = inet_lookup_established(&dccp_hashinfo, iph->saddr, dh->dccph_sport, iph->daddr, dh->dccph_dport, inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); return nsk; } inet_twsk_put(inet_twsk(nsk)); return NULL; } return sk;}static struct dst_entry* dccp_v4_route_skb(struct sock *sk, struct sk_buff *skb){ struct rtable *rt; struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, .nl_u = { .ip4_u = { .daddr = ip_hdr(skb)->saddr, .saddr = ip_hdr(skb)->daddr, .tos = RT_CONN_FLAGS(sk) } }, .proto = sk->sk_protocol, .uli_u = { .ports = { .sport = dccp_hdr(skb)->dccph_dport, .dport = dccp_hdr(skb)->dccph_sport } } }; security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; } return &rt->u.dst;}static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, struct dst_entry *dst){ int err = -1; struct sk_buff *skb; /* First, grab a route. */ if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) goto out; skb = dccp_make_response(sk, dst, req); if (skb != NULL) { const struct inet_request_sock *ireq = inet_rsk(req); struct dccp_hdr *dh = dccp_hdr(skb); dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr, ireq->rmt_addr); memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, ireq->rmt_addr, ireq->opt); err = net_xmit_eval(err); }out: dst_release(dst); return err;}static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb){ int err; const struct iphdr *rxiph; struct sk_buff *skb; struct dst_entry *dst; /* Never send a reset in response to a reset. */ if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) return; dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -