📄 icmp.c

📁 linux 内核源代码
💻 C
📖 第 1 页 / 共 2 页
字号:
12 下一页
/* *	NET3:	Implementation of the ICMP protocol layer. * *		Alan Cox, <alan@redhat.com> * *	Version: $Id: icmp.c,v 1.85 2002/02/01 22:01:03 davem Exp $ * *	This program is free software; you can redistribute it and/or *	modify it under the terms of the GNU General Public License *	as published by the Free Software Foundation; either version *	2 of the License, or (at your option) any later version. * *	Some of the function names and the icmp unreach table for this *	module were derived from [icmp.c 1.0.11 06/02/93] by *	Ross Biro, Fred N. van Kempen, Mark Evans, Alan Cox, Gerhard Koerting. *	Other than that this module is a complete rewrite. * *	Fixes: *	Clemens Fruhwirth	:	introduce global icmp rate limiting *					with icmp type masking ability instead *					of broken per type icmp timeouts. *		Mike Shaver	:	RFC1122 checks. *		Alan Cox	:	Multicast ping reply as self. *		Alan Cox	:	Fix atomicity lockup in ip_build_xmit *					call. *		Alan Cox	:	Added 216,128 byte paths to the MTU *					code. *		Martin Mares	:	RFC1812 checks. *		Martin Mares	:	Can be configured to follow redirects *					if acting as a router _without_ a *					routing protocol (RFC 1812). *		Martin Mares	:	Echo requests may be configured to *					be ignored (RFC 1812). *		Martin Mares	:	Limitation of ICMP error message *					transmit rate (RFC 1812). *		Martin Mares	:	TOS and Precedence set correctly *					(RFC 1812). *		Martin Mares	:	Now copying as much data from the *					original packet as we can without *					exceeding 576 bytes (RFC 1812). *	Willy Konynenberg	:	Transparent proxying support. *		Keith Owens	:	RFC1191 correction for 4.2BSD based *					path MTU bug. *		Thomas Quinot	:	ICMP Dest Unreach codes up to 15 are *					valid (RFC 1812). *		Andi Kleen	:	Check all packet lengths properly *					and moved all kfree_skb() up to *					icmp_rcv. *		Andi Kleen	:	Move the rate limit bookkeeping *					into the dest entry and use a token *					bucket filter (thanks to ANK). Make *					the rates sysctl configurable. *		Yu Tianli	:	Fixed two ugly bugs in icmp_send *					- IP option length was accounted wrongly *					- ICMP header length was not accounted *					  at all. *              Tristan Greaves :       Added sysctl option to ignore bogus *              			broadcast responses from broken routers. * * To Fix: * *	- Should use skb_pull() instead of all the manual checking. *	  This would also greatly simply some upper layer error handlers. --AK * */#include <linux/module.h>#include <linux/types.h>#include <linux/jiffies.h>#include <linux/kernel.h>#include <linux/fcntl.h>#include <linux/socket.h>#include <linux/in.h>#include <linux/inet.h>#include <linux/inetdevice.h>#include <linux/netdevice.h>#include <linux/string.h>#include <linux/netfilter_ipv4.h>#include <net/snmp.h>#include <net/ip.h>#include <net/route.h>#include <net/protocol.h>#include <net/icmp.h>#include <net/tcp.h>#include <net/udp.h>#include <net/raw.h>#include <linux/skbuff.h>#include <net/sock.h>#include <linux/errno.h>#include <linux/timer.h>#include <linux/init.h>#include <asm/system.h>#include <asm/uaccess.h>#include <net/checksum.h>/* *	Build xmit assembly blocks */struct icmp_bxm {	struct sk_buff *skb;	int offset;	int data_len;	struct {		struct icmphdr icmph;		__be32	       times[3];	} data;	int head_len;	struct ip_options replyopts;	unsigned char  optbuf[40];};/* *	Statistics */DEFINE_SNMP_STAT(struct icmp_mib, icmp_statistics) __read_mostly;DEFINE_SNMP_STAT(struct icmpmsg_mib, icmpmsg_statistics) __read_mostly;/* An array of errno for error messages from dest unreach. *//* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */struct icmp_err icmp_err_convert[] = {	{		.errno = ENETUNREACH,	/* ICMP_NET_UNREACH */		.fatal = 0,	},	{		.errno = EHOSTUNREACH,	/* ICMP_HOST_UNREACH */		.fatal = 0,	},	{		.errno = ENOPROTOOPT	/* ICMP_PROT_UNREACH */,		.fatal = 1,	},	{		.errno = ECONNREFUSED,	/* ICMP_PORT_UNREACH */		.fatal = 1,	},	{		.errno = EMSGSIZE,	/* ICMP_FRAG_NEEDED */		.fatal = 0,	},	{		.errno = EOPNOTSUPP,	/* ICMP_SR_FAILED */		.fatal = 0,	},	{		.errno = ENETUNREACH,	/* ICMP_NET_UNKNOWN */		.fatal = 1,	},	{		.errno = EHOSTDOWN,	/* ICMP_HOST_UNKNOWN */		.fatal = 1,	},	{		.errno = ENONET,	/* ICMP_HOST_ISOLATED */		.fatal = 1,	},	{		.errno = ENETUNREACH,	/* ICMP_NET_ANO	*/		.fatal = 1,	},	{		.errno = EHOSTUNREACH,	/* ICMP_HOST_ANO */		.fatal = 1,	},	{		.errno = ENETUNREACH,	/* ICMP_NET_UNR_TOS */		.fatal = 0,	},	{		.errno = EHOSTUNREACH,	/* ICMP_HOST_UNR_TOS */		.fatal = 0,	},	{		.errno = EHOSTUNREACH,	/* ICMP_PKT_FILTERED */		.fatal = 1,	},	{		.errno = EHOSTUNREACH,	/* ICMP_PREC_VIOLATION */		.fatal = 1,	},	{		.errno = EHOSTUNREACH,	/* ICMP_PREC_CUTOFF */		.fatal = 1,	},};/* Control parameters for ECHO replies. */int sysctl_icmp_echo_ignore_all __read_mostly;int sysctl_icmp_echo_ignore_broadcasts __read_mostly = 1;/* Control parameter - ignore bogus broadcast responses? */int sysctl_icmp_ignore_bogus_error_responses __read_mostly = 1;/* * 	Configurable global rate limit. * *	ratelimit defines tokens/packet consumed for dst->rate_token bucket *	ratemask defines which icmp types are ratelimited by setting * 	it's bit position. * *	default: *	dest unreachable (3), source quench (4), *	time exceeded (11), parameter problem (12) */int sysctl_icmp_ratelimit __read_mostly = 1 * HZ;int sysctl_icmp_ratemask __read_mostly = 0x1818;int sysctl_icmp_errors_use_inbound_ifaddr __read_mostly;/* *	ICMP control array. This specifies what to do with each ICMP. */struct icmp_control {	void (*handler)(struct sk_buff *skb);	short   error;		/* This ICMP is classed as an error message */};static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];/* *	The ICMP socket(s). This is the most convenient way to flow control *	our ICMP output as well as maintain a clean interface throughout *	all layers. All Socketless IP sends will soon be gone. * *	On SMP we have one ICMP socket per-cpu. */static DEFINE_PER_CPU(struct socket *, __icmp_socket) = NULL;#define icmp_socket	__get_cpu_var(__icmp_socket)static __inline__ int icmp_xmit_lock(void){	local_bh_disable();	if (unlikely(!spin_trylock(&icmp_socket->sk->sk_lock.slock))) {		/* This can happen if the output path signals a		 * dst_link_failure() for an outgoing ICMP packet.		 */		local_bh_enable();		return 1;	}	return 0;}static void icmp_xmit_unlock(void){	spin_unlock_bh(&icmp_socket->sk->sk_lock.slock);}/* *	Send an ICMP frame. *//* *	Check transmit rate limitation for given message. *	The rate information is held in the destination cache now. *	This function is generic and could be used for other purposes *	too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. * *	Note that the same dst_entry fields are modified by functions in *	route.c too, but these work for packet destinations while xrlim_allow *	works for icmp destinations. This means the rate limiting information *	for one "ip object" is shared - and these ICMPs are twice limited: *	by source and by destination. * *	RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate *			  SHOULD allow setting of rate limits * * 	Shared between ICMPv4 and ICMPv6. */#define XRLIM_BURST_FACTOR 6int xrlim_allow(struct dst_entry *dst, int timeout){	unsigned long now;	int rc = 0;	now = jiffies;	dst->rate_tokens += now - dst->rate_last;	dst->rate_last = now;	if (dst->rate_tokens > XRLIM_BURST_FACTOR * timeout)		dst->rate_tokens = XRLIM_BURST_FACTOR * timeout;	if (dst->rate_tokens >= timeout) {		dst->rate_tokens -= timeout;		rc = 1;	}	return rc;}static inline int icmpv4_xrlim_allow(struct rtable *rt, int type, int code){	struct dst_entry *dst = &rt->u.dst;	int rc = 1;	if (type > NR_ICMP_TYPES)		goto out;	/* Don't limit PMTU discovery. */	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED)		goto out;	/* No rate limit on loopback */	if (dst->dev && (dst->dev->flags&IFF_LOOPBACK))		goto out;	/* Limit if icmp type is enabled in ratemask. */	if ((1 << type) & sysctl_icmp_ratemask)		rc = xrlim_allow(dst, sysctl_icmp_ratelimit);out:	return rc;}/* *	Maintain the counters used in the SNMP statistics for outgoing ICMP */void icmp_out_count(unsigned char type){	ICMPMSGOUT_INC_STATS(type);	ICMP_INC_STATS(ICMP_MIB_OUTMSGS);}/* *	Checksum each fragment, and on the first include the headers and final *	checksum. */static int icmp_glue_bits(void *from, char *to, int offset, int len, int odd,			  struct sk_buff *skb){	struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;	__wsum csum;	csum = skb_copy_and_csum_bits(icmp_param->skb,				      icmp_param->offset + offset,				      to, len, 0);	skb->csum = csum_block_add(skb->csum, csum, odd);	if (icmp_pointers[icmp_param->data.icmph.type].error)		nf_ct_attach(skb, icmp_param->skb);	return 0;}static void icmp_push_reply(struct icmp_bxm *icmp_param,			    struct ipcm_cookie *ipc, struct rtable *rt){	struct sk_buff *skb;	if (ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,			   icmp_param->data_len+icmp_param->head_len,			   icmp_param->head_len,			   ipc, rt, MSG_DONTWAIT) < 0)		ip_flush_pending_frames(icmp_socket->sk);	else if ((skb = skb_peek(&icmp_socket->sk->sk_write_queue)) != NULL) {		struct icmphdr *icmph = icmp_hdr(skb);		__wsum csum = 0;		struct sk_buff *skb1;		skb_queue_walk(&icmp_socket->sk->sk_write_queue, skb1) {			csum = csum_add(csum, skb1->csum);		}		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,						 (char *)icmph,						 icmp_param->head_len, csum);		icmph->checksum = csum_fold(csum);		skb->ip_summed = CHECKSUM_NONE;		ip_push_pending_frames(icmp_socket->sk);	}}/* *	Driving logic for building and sending ICMP messages. */static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb){	struct sock *sk = icmp_socket->sk;	struct inet_sock *inet = inet_sk(sk);	struct ipcm_cookie ipc;	struct rtable *rt = (struct rtable *)skb->dst;	__be32 daddr;	if (ip_options_echo(&icmp_param->replyopts, skb))		return;	if (icmp_xmit_lock())		return;	icmp_param->data.icmph.checksum = 0;	inet->tos = ip_hdr(skb)->tos;	daddr = ipc.addr = rt->rt_src;	ipc.opt = NULL;	if (icmp_param->replyopts.optlen) {		ipc.opt = &icmp_param->replyopts;		if (ipc.opt->srr)			daddr = icmp_param->replyopts.faddr;	}	{		struct flowi fl = { .nl_u = { .ip4_u =					      { .daddr = daddr,						.saddr = rt->rt_spec_dst,						.tos = RT_TOS(ip_hdr(skb)->tos) } },				    .proto = IPPROTO_ICMP };		security_skb_classify_flow(skb, &fl);		if (ip_route_output_key(&rt, &fl))			goto out_unlock;	}	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type,			       icmp_param->data.icmph.code))		icmp_push_reply(icmp_param, &ipc, rt);	ip_rt_put(rt);out_unlock:	icmp_xmit_unlock();}/* *	Send an ICMP message in response to a situation * *	RFC 1122: 3.2.2	MUST send at least the IP header and 8 bytes of header. *		  MAY send more (we do). *			MUST NOT change this header information. *			MUST NOT reply to a multicast/broadcast IP address. *			MUST NOT reply to a multicast/broadcast MAC address. *			MUST reply to only the first fragment. */void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info){	struct iphdr *iph;	int room;	struct icmp_bxm icmp_param;	struct rtable *rt = (struct rtable *)skb_in->dst;	struct ipcm_cookie ipc;	__be32 saddr;	u8  tos;	if (!rt)		goto out;	/*	 *	Find the original header. It is expected to be valid, of course.	 *	Check this, icmp_send is called from the most obscure devices	 *	sometimes.	 */	iph = ip_hdr(skb_in);	if ((u8 *)iph < skb_in->head ||	    (skb_in->network_header + sizeof(*iph)) > skb_in->tail)		goto out;	/*	 *	No replies to physical multicast/broadcast	 */	if (skb_in->pkt_type != PACKET_HOST)		goto out;	/*	 *	Now check at the protocol level	 */	if (rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))		goto out;	/*	 *	Only reply to fragment 0. We byte re-order the constant	 *	mask for efficiency.	 */	if (iph->frag_off & htons(IP_OFFSET))		goto out;	/*	 *	If we send an ICMP error to an ICMP error a mess would result..	 */	if (icmp_pointers[type].error) {		/*		 *	We are an error, check if we are replying to an		 *	ICMP error		 */		if (iph->protocol == IPPROTO_ICMP) {			u8 _inner_type, *itp;			itp = skb_header_pointer(skb_in,						 skb_network_header(skb_in) +						 (iph->ihl << 2) +						 offsetof(struct icmphdr,							  type) -						 skb_in->data,						 sizeof(_inner_type),						 &_inner_type);			if (itp == NULL)				goto out;			/*			 *	Assume any unknown ICMP type is an error. This			 *	isn't specified by the RFC, but think about it..			 */			if (*itp > NR_ICMP_TYPES ||			    icmp_pointers[*itp].error)				goto out;		}	}	if (icmp_xmit_lock())		return;	/*	 *	Construct source address and options.	 */	saddr = iph->daddr;	if (!(rt->rt_flags & RTCF_LOCAL)) {		struct net_device *dev = NULL;		if (rt->fl.iif && sysctl_icmp_errors_use_inbound_ifaddr)			dev = dev_get_by_index(&init_net, rt->fl.iif);		if (dev) {			saddr = inet_select_addr(dev, 0, RT_SCOPE_LINK);			dev_put(dev);		} else			saddr = 0;	}	tos = icmp_pointers[type].error ? ((iph->tos & IPTOS_TOS_MASK) |					   IPTOS_PREC_INTERNETCONTROL) :					  iph->tos;	if (ip_options_echo(&icmp_param.replyopts, skb_in))		goto out_unlock;	/*	 *	Prepare data for ICMP header.	 */	icmp_param.data.icmph.type	 = type;	icmp_param.data.icmph.code	 = code;	icmp_param.data.icmph.un.gateway = info;	icmp_param.data.icmph.checksum	 = 0;	icmp_param.skb	  = skb_in;	icmp_param.offset = skb_network_offset(skb_in);	inet_sk(icmp_socket->sk)->tos = tos;	ipc.addr = iph->saddr;	ipc.opt = &icmp_param.replyopts;	{		struct flowi fl = {			.nl_u = {				.ip4_u = {					.daddr = icmp_param.replyopts.srr ?						icmp_param.replyopts.faddr :						iph->saddr,					.saddr = saddr,
12 下一页
💿 文件大小 57701 K
👤 上传用户 huanzhudev
📂 所属分类网络
🏷️ 相关标签

#linux #内核 #源代码
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -