页次: 1
前面介绍了用来管理存放网络数据包的sk_buff,以及描述通信协议的socket和sock结构体,现在终于轮到怎么和远程的计算机通信了!从常识上讲,通信之前必须要建立连接,比如有线的键盘给电脑发送信号,需要先让键盘通过usb接口连接到电脑,否则电脑怎么接受键盘的电信号了?同理:我要想使用鼠标,比如先把鼠标插入电脑的usb接口,移动鼠标后鼠标才会给电脑发送电信号,这两个都需要先建立物理连接!那么两台相距十万八千里的主机互相通信,这个连接该怎么建立了?物理上的连接当然是通过交换机、路由器以及电缆、光纤这些设备完成的,逻辑上的连接又是怎么建立的了?本文以tcp协议为例说明!
1、tcp协议在业界使用了这么多年,已经非常成熟,三次握手的原理我就不再赘述!3次握手的流程如下图所示:
从上图可以看出,client调用connect、server调用listen函数就完成了3次握手,app的开发人员完全不需要关心这3次握手是怎么完成的!第一次时client给server发消息,表示我想和你通信,并给了一个数字M;第二次是server给client回复,表示我同意和你通信,回复的内容包括了M+1,表示这次回复是你上次发送的SYN包,同时也附上自己的N!第三次是client给server发消息,附上M+1,表示回复的是server的SYN N的包!至此双方来回一共3次通信后连接建立完毕!接下来我们挨个拆解看看每一步具体都干了啥!
(1)client给server发送SYN M数据包,用wireshark抓包可以看到SYN数据包的内容,如下:
linux内核构造并发送SYN包的函数叫tcp_v4_connect,代码如下:代码不算多,重要部分加了中文注释
/* This will initiate an outgoing connection. */
int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
struct inet_sock *inet = inet_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
__be16 orig_sport, orig_dport;
__be32 daddr, nexthop;
struct flowi4 *fl4;
struct rtable *rt;
int err;
struct ip_options_rcu *inet_opt;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
/*AF_INET是用户调用socket函数创建套接字时传入的参数,
这里校验地址长度和协议簇*/
if (usin->sin_family != AF_INET)
return -EAFNOSUPPORT;
/*将下一跳地址和目的地址的临时变量都暂时设为用户提交的地址*/
nexthop = daddr = usin->sin_addr.s_addr;
inet_opt = rcu_dereference_protected(inet->inet_opt,
lockdep_sock_is_held(sk));
/* 如果使用了来源地址路由,选择一个合适的下一跳地址。*/
if (inet_opt && inet_opt->opt.srr) {
if (!daddr)
return -EINVAL;
nexthop = inet_opt->opt.faddr;
}
/*获取数据包的ip层路由信息*/
orig_sport = inet->inet_sport;
orig_dport = usin->sin_port;
fl4 = &inet->cork.fl.u.ip4;
/*一台主机可能有多个ip地址,用哪个ip地址发送数据包了?
根据nexthop参数(也就是connect传递下来的服务器ip)查路由表,
命中的路由表项中包含有本地ip地址*/
rt = ip_route_connect(fl4, nexthop, inet->inet_saddr,
RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
IPPROTO_TCP,
orig_sport, orig_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
if (err == -ENETUNREACH)
IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES);
return err;
}
/* 进行路由查找,并校验返回的路由的类型,
TCP是不被允许使用多播和广播的。*/
if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
ip_rt_put(rt);
return -ENETUNREACH;
}
if (!inet_opt || !inet_opt->opt.srr)
daddr = fl4->daddr;
/*如果用户之前没有bind源,inet->inet_saddr将会是0.
此处判断如果saddr是0,就把查路由返回的fl4->saddr赋值给inet->inet_saddr。
inet->inet_saddr的值将来会作为syn报文的源ip;
1、如果在这里更改源ip,用随机数填充后发送给服务器,
服务器会不会不停地发送ack数据包导致达到DDOS的效果了?
2、如果把这里的源ip改成受害服务器的地址,然后随机发给第三方服务器,第三方服务器收到
SYN数据包后分分给受害服务器发送ack数据包,是不是达到了反射DDOS的效果了?
*/
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
sk_rcv_saddr_set(sk, inet->inet_saddr);
if (tp->rx_opt.ts_recent_stamp && inet->inet_daddr != daddr) {
/* Reset inherited state */
tp->rx_opt.ts_recent = 0;
tp->rx_opt.ts_recent_stamp = 0;
if (likely(!tp->repair))
tp->write_seq = 0;
}
if (tcp_death_row.sysctl_tw_recycle &&
!tp->rx_opt.ts_recent_stamp && fl4->daddr == daddr)
tcp_fetch_timewait_stamp(sk, &rt->dst);
inet->inet_dport = usin->sin_port;
sk_daddr_set(sk, daddr);
inet_csk(sk)->icsk_ext_hdr_len = 0;
if (inet_opt)
inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen;
tp->rx_opt.mss_clamp = TCP_MSS_DEFAULT;
/* Socket identity is still unknown (sport may be zero).
* However we set state to SYN-SENT and not releasing socket
* lock select source port, enter ourselves into the hash tables and
* complete initialization after this.
将TCP的状态设为TCP_SYN_SENT,动态绑定一个本地端口
*/
tcp_set_state(sk, TCP_SYN_SENT);
err = inet_hash_connect(&tcp_death_row, sk);
if (err)
goto failure;
sk_set_txhash(sk);
/*最终调用的是fib_table_lookup函数从trie树中查找最长匹配的ip地址*/
rt = ip_route_newports(fl4, rt, orig_sport, orig_dport,
inet->inet_sport, inet->inet_dport, sk);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
rt = NULL;
goto failure;
}
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
inet->inet_daddr,
inet->inet_sport,
usin->sin_port);
inet->inet_id = tp->write_seq ^ jiffies;
/*生成SYN数据包并发送*/
err = tcp_connect(sk);
rt = NULL;
if (err)
goto failure;
return 0;
failure:
/*
* This unhashes the socket and releases the local port,
* if necessary.
*/
tcp_set_state(sk, TCP_CLOSE);
ip_rt_put(rt);
sk->sk_route_caps = 0;
inet->inet_dport = 0;
return err;
}
注意:这两行代码是用来构造源ip地址的,可以通过更改这里达到反射DDOS攻击!
if (!inet->inet_saddr)
inet->inet_saddr = fl4->saddr;
这个函数前面都是各种前置条件检查、容错等,真正构造syn数据包并发送的函数是tcp_connect函数中的tcp_send_syn_data和tcp_transmit_skb函数!由于两者是调用关系,这里重点解析transmit函数,如下:
/* This routine actually transmits TCP packets queued in by
* tcp_do_sendmsg(). This is used by both the initial
* transmission and possible later retransmissions.
* All SKB's seen here are completely headerless. It is our
* job to build the TCP header, and pass the packet down to
* IP so it can do the same plus pass the packet off to the
* device.
*
* We are working here with either a clone of the original
* SKB, or a fresh unique copy made by the retransmit engine.
复制或者拷贝skb,构造skb中的tcp首部,并将调用网络层的发送函数发送skb;
在发送前,首先需要克隆或者复制skb,因为在成功发送到网络设备之后,skb会释放,
而tcp层不能真正的释放,是需要等到对该数据段的ack才可以释放;然后构造tcp首部和选项;
最后调用网络层提供的发送回调函数发送skb,ip层的回调函数为ip_queue_xmit
*/
static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
gfp_t gfp_mask)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet;
struct tcp_sock *tp;
struct tcp_skb_cb *tcb;
struct tcp_out_options opts;
unsigned int tcp_options_size, tcp_header_size;
struct tcp_md5sig_key *md5;
struct tcphdr *th;
int err;
BUG_ON(!skb || !tcp_skb_pcount(skb));
tp = tcp_sk(sk);
//如果还有其他进程使用skb,就需要复制skb
if (clone_it) {
skb_mstamp_get(&skb->skb_mstamp);
TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq
- tp->snd_una;
tcp_rate_skb_sent(sk, skb);
if (unlikely(skb_cloned(skb)))
skb = pskb_copy(skb, gfp_mask);
else
skb = skb_clone(skb, gfp_mask);
if (unlikely(!skb))
return -ENOBUFS;
}
inet = inet_sk(sk);
tcb = TCP_SKB_CB(skb);
memset(&opts, 0, sizeof(opts));
//是否是SYN请求数据包
if (unlikely(tcb->tcp_flags & TCPHDR_SYN))
//构建TCP选项包括时间戳、窗口大小、选择回答SACK
tcp_options_size = tcp_syn_options(sk, skb, &opts, &md5);
else//构建常规TCP选项
tcp_options_size = tcp_established_options(sk, skb, &opts,
&md5);
//tCP头部长度包括选择长度+ TCP头部
tcp_header_size = tcp_options_size + sizeof(struct tcphdr);
/* if no packet is in qdisc/device queue, then allow XPS to select
* another queue. We can be called from tcp_tsq_handler()
* which holds one reference to sk_wmem_alloc.
*
* TODO: Ideally, in-flight pure ACK packets should not matter here.
* One way to get this would be to set skb->truesize = 2 on them.
*/
skb->ooo_okay = sk_wmem_alloc_get(sk) < SKB_TRUESIZE(1);
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
skb_orphan(skb);
skb->sk = sk;
skb->destructor = skb_is_tcp_pure_ack(skb) ? __sock_wfree : tcp_wfree;
skb_set_hash_from_sk(skb, sk);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
/* Build TCP header and checksum it.
前面做了大量的准备工作,这里终于开始构造tcp包头了
*/
th = (struct tcphdr *)skb->data;
th->source = inet->inet_sport;
th->dest = inet->inet_dport;
th->seq = htonl(tcb->seq);
th->ack_seq = htonl(tp->rcv_nxt);
*(((__be16 *)th) + 6) = htons(((tcp_header_size >> 2) << 12) |
tcb->tcp_flags);
th->check = 0;
th->urg_ptr = 0;
/* The urg_mode check is necessary during a below snd_una win probe */
//SYN包不需要计算窗口
if (unlikely(tcp_urg_mode(tp) && before(tcb->seq, tp->snd_up))) {
if (before(tp->snd_up, tcb->seq + 0x10000)) {
th->urg_ptr = htons(tp->snd_up - tcb->seq);
th->urg = 1;
} else if (after(tcb->seq + 0xFFFF, tp->snd_nxt)) {
th->urg_ptr = htons(0xFFFF);
th->urg = 1;
}
}
tcp_options_write((__be32 *)(th + 1), tp, &opts);
skb_shinfo(skb)->gso_type = sk->sk_gso_type;
if (likely(!(tcb->tcp_flags & TCPHDR_SYN))) {
th->window = htons(tcp_select_window(sk));
tcp_ecn_send(sk, skb, th, tcp_header_size);
} else {
/* RFC1323: The window in SYN & SYN/ACK segments
* is never scaled.
*/
th->window = htons(min(tp->rcv_wnd, 65535U));
}
#ifdef CONFIG_TCP_MD5SIG
/* Calculate the MD5 hash, as we have all we need now */
if (md5) {
sk_nocaps_add(sk, NETIF_F_GSO_MASK);
tp->af_specific->calc_md5_hash(opts.hash_location,
md5, sk, skb);
}
#endif
icsk->icsk_af_ops->send_check(sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
tcp_event_ack_sent(sk, tcp_skb_pcount(skb));//清楚定时器
/* 有数据要发送 */
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
tp->data_segs_out += tcp_skb_pcount(skb);
}
/* 统计分段数 */
if (after(tcb->end_seq, tp->snd_nxt) || tcb->seq == tcb->end_seq)
TCP_ADD_STATS(sock_net(sk), TCP_MIB_OUTSEGS,
tcp_skb_pcount(skb));
tp->segs_out += tcp_skb_pcount(skb);
/* OK, its time to fill skb_shinfo(skb)->gso_{segs|size} */
/* skb中分段数统计 */
skb_shinfo(skb)->gso_segs = tcp_skb_pcount(skb);
skb_shinfo(skb)->gso_size = tcp_skb_mss(skb);
/* Our usage of tstamp should remain private */
skb->tstamp.tv64 = 0;
/* Cleanup our debris for IP stacks */
/* 清空tcb,ip层要使用 */
memset(skb->cb, 0, max(sizeof(struct inet_skb_parm),
sizeof(struct inet6_skb_parm)));
//数据包给ip层继续添加ip地址;函数指针实际指向ip_queue_ximit,这也是实际调用的ip层函数
err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl);
if (likely(err <= 0))
return err;
/* 拥塞控制 */
tcp_enter_cwr(sk);
return net_xmit_eval(err);
}
不难发现,最终还是把skb穿给ip层的ip_queue_ximit继续构造ip数据包,代码如下:
/* Note: skb->sk can be different from sk, in case of tunnels */
int ip_queue_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl)
{
struct inet_sock *inet = inet_sk(sk);
struct net *net = sock_net(sk);
struct ip_options_rcu *inet_opt;
struct flowi4 *fl4;
struct rtable *rt;
struct iphdr *iph;
int res;
/* Skip all of this if the packet is already routed,
* f.e. by something like SCTP.
*/
rcu_read_lock();
inet_opt = rcu_dereference(inet->inet_opt);
fl4 = &fl->u.ip4;
/* 获取skb中的路由缓存 */
rt = skb_rtable(skb);
if (rt)
goto packet_routed;
/* Make sure we can route this packet. */
/* 检查控制块中的路由缓存 */
rt = (struct rtable *)__sk_dst_check(sk, 0);
/* 缓存过期 */
if (!rt) {
__be32 daddr;
/* Use correct destination address if we have options. */
//终于看到了目的ip地址
daddr = inet->inet_daddr;
if (inet_opt && inet_opt->opt.srr)
daddr = inet_opt->opt.faddr;
/* If this fails, retransmit mechanism of transport layer will
* keep trying until route appears or the connection times
* itself out. 重新查找路由缓存
*/
rt = ip_route_output_ports(net, fl4, sk,
daddr, inet->inet_saddr,
inet->inet_dport,
inet->inet_sport,
sk->sk_protocol,
RT_CONN_FLAGS(sk),
sk->sk_bound_dev_if);
if (IS_ERR(rt))
goto no_route;
/* 设置控制块的路由缓存 */
sk_setup_caps(sk, &rt->dst);
}
/* 将路由设置到skb中 */
skb_dst_set_noref(skb, &rt->dst);
packet_routed:
if (inet_opt && inet_opt->opt.is_strictroute && rt->rt_uses_gateway)
goto no_route;
/* OK, we know where to send it, allocate and build IP header. */
/*找到目标后开始在tcp包的基础上构造ip包*/
/*在skb中加上ip的头*/
skb_push(skb, sizeof(struct iphdr) + (inet_opt ? inet_opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
*((__be16 *)iph) = htons((4 << 12) | (5 << 8) | (inet->tos & 0xff));
if (ip_dont_fragment(sk, &rt->dst) && !skb->ignore_df)
iph->frag_off = htons(IP_DF);
else
iph->frag_off = 0;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->protocol = sk->sk_protocol;
ip_copy_addrs(iph, fl4);
/* Transport layer set skb->h.foo itself. */
/* 构造ip选项 */
if (inet_opt && inet_opt->opt.optlen) {
iph->ihl += inet_opt->opt.optlen >> 2;
ip_options_build(skb, &inet_opt->opt, inet->inet_daddr, rt, 0);
}
/* 设置id */
ip_select_ident_segs(net, skb, sk,
skb_shinfo(skb)->gso_segs ?: 1);
/* TODO : should we use skb->sk here instead of sk ? */
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
//发送ip包
res = ip_local_out(net, sk, skb);
rcu_read_unlock();
return res;
no_route://无路由处理
rcu_read_unlock();
IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES);
kfree_skb(skb);
return -EHOSTUNREACH;
}
ip层调用的是ip_local_out,继续往下面最终调用的是这个函数通过网卡把数据发到network:
/* Output packet to network from transport. */
static inline int dst_output(struct net *net, struct sock *sk, struct sk_buff *skb)
{
return skb_dst(skb)->output(net, sk, skb);
}
纵观整个过程:核心都在一层一层地构通过添加包头造数据包!
(2)server的ack数据包:
server在收到SYN数据包后,需要回复ACK数据包,数据包地内容如上图所示;server构造包的过程和client没任何区别,本质上都是一层一层地添加包头(增加skb字符串的数据和长度)!核心函数如下:
/*
* Send a SYN-ACK after having received a SYN.
* This still operates on a request_sock only, not on a big
* socket.
*/
static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst,
struct flowi *fl,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type)
{
const struct inet_request_sock *ireq = inet_rsk(req);
struct flowi4 fl4;
int err = -1;
struct sk_buff *skb;
/* First, grab a route. */
if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL)
return -1;
/*构造syn-sck的包,并返回skb*/
skb = tcp_make_synack(sk, dst, req, foc, synack_type);
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
/*添加ip包头并发送*/
err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr,
ireq->ir_rmt_addr,
ireq->opt);
err = net_xmit_eval(err);
}
return err;
}
其中构造和发送数据包的函数分别是tcp_make_synack和ip_build_and_send_pkt,第一个函数要分配skb并填充tcp头部,这里就是DDOS攻击点之一:
/**
* tcp_make_synack - Prepare a SYN-ACK.
* sk: listener socket
* dst: dst entry attached to the SYNACK
* req: request_sock pointer
*
* Allocate one skb and build a SYNACK packet.
* @dst is consumed : Caller should not use it again.
生成SYN-ACK数据包
*/
struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
struct request_sock *req,
struct tcp_fastopen_cookie *foc,
enum tcp_synack_type synack_type)
{
struct inet_request_sock *ireq = inet_rsk(req);
const struct tcp_sock *tp = tcp_sk(sk);
struct tcp_md5sig_key *md5 = NULL;
struct tcp_out_options opts;
struct sk_buff *skb;
int tcp_header_size;
struct tcphdr *th;
u16 user_mss;
int mss;
/*分配skb,这是DDOS攻击生效的原因之一:
如果一个client不停的更改源ip给server发送syn包,server还以为有很多
client想和自己通信,然后不停地分配skb为
接下来的通信做准备,可能导致内存耗尽
*/
skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC);
if (unlikely(!skb)) {
dst_release(dst);
return NULL;
}
/* Reserve space for headers. */
skb_reserve(skb, MAX_TCP_HEADER);
switch (synack_type) {
case TCP_SYNACK_NORMAL:
skb_set_owner_w(skb, req_to_sk(req));
break;
case TCP_SYNACK_COOKIE:
/* Under synflood, we do not attach skb to a socket,
* to avoid false sharing.
*/
break;
case TCP_SYNACK_FASTOPEN:
/* sk is a const pointer, because we want to express multiple
* cpu might call us concurrently.
* sk->sk_wmem_alloc in an atomic, we can promote to rw.
*/
skb_set_owner_w(skb, (struct sock *)sk);
break;
}
skb_dst_set(skb, dst);
mss = dst_metric_advmss(dst);
user_mss = READ_ONCE(tp->rx_opt.user_mss);
if (user_mss && user_mss < mss)
mss = user_mss;
memset(&opts, 0, sizeof(opts));
#ifdef CONFIG_SYN_COOKIES
if (unlikely(req->cookie_ts))
skb->skb_mstamp.stamp_jiffies = cookie_init_timestamp(req);
else
#endif
skb_mstamp_get(&skb->skb_mstamp);
#ifdef CONFIG_TCP_MD5SIG
rcu_read_lock();
md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
#endif
skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) +
sizeof(*th);
//开始填充tcp头部了
skb_push(skb, tcp_header_size);
skb_reset_transport_header(skb);
th = (struct tcphdr *)skb->data;
memset(th, 0, sizeof(struct tcphdr));
th->syn = 1;
th->ack = 1;
tcp_ecn_make_synack(req, th);
th->source = htons(ireq->ir_num);
th->dest = ireq->ir_rmt_port;
/* Setting of flags are superfluous here for callers (and ECE is
* not even correctly set)
*/
tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn,
TCPHDR_SYN | TCPHDR_ACK);
th->seq = htonl(TCP_SKB_CB(skb)->seq);
/* XXX data is queued and acked as is. No buffer/window check */
th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt);
/* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */
th->window = htons(min(req->rsk_rcv_wnd, 65535U));
tcp_options_write((__be32 *)(th + 1), NULL, &opts);
th->doff = (tcp_header_size >> 2);
__TCP_INC_STATS(sock_net(sk), TCP_MIB_OUTSEGS);
#ifdef CONFIG_TCP_MD5SIG
/* Okay, we have all we need - do the md5 hash if needed */
if (md5)
tcp_rsk(req)->af_specific->calc_md5_hash(opts.hash_location,
md5, req_to_sk(req), skb);
rcu_read_unlock();
#endif
/* Do not fool tcpdump (if any), clean our debris */
skb->tstamp.tv64 = 0;
return skb;
}
EXPORT_SYMBOL(tcp_make_synack);
最后在skb添加ip头,调用ip_local_out把数据包发送出去,代码逻辑很简单:
/*
* Add an ip header to a skbuff and send it out.
*
*/
int ip_build_and_send_pkt(struct sk_buff *skb, const struct sock *sk,
__be32 saddr, __be32 daddr, struct ip_options_rcu *opt)
{
struct inet_sock *inet = inet_sk(sk);
struct rtable *rt = skb_rtable(skb);
struct net *net = sock_net(sk);
struct iphdr *iph;
/* Build the IP header. 构造ip头*/
skb_push(skb, sizeof(struct iphdr) + (opt ? opt->opt.optlen : 0));
skb_reset_network_header(skb);
iph = ip_hdr(skb);
iph->version = 4;
iph->ihl = 5;
iph->tos = inet->tos;
iph->ttl = ip_select_ttl(inet, &rt->dst);
iph->daddr = (opt && opt->opt.srr ? opt->opt.faddr : daddr);
iph->saddr = saddr;
iph->protocol = sk->sk_protocol;
if (ip_dont_fragment(sk, &rt->dst)) {
iph->frag_off = htons(IP_DF);
iph->id = 0;
} else {
iph->frag_off = 0;
__ip_select_ident(net, iph, 1);
}
if (opt && opt->opt.optlen) {
iph->ihl += opt->opt.optlen>>2;
ip_options_build(skb, &opt->opt, daddr, rt, 0);
}
skb->priority = sk->sk_priority;
skb->mark = sk->sk_mark;
/* Send it out. */
return ip_local_out(net, skb->sk, skb);
}
server发送SYN-ACK就完毕了!
(3)client的ack数据包
这个ack是client给server发送的,本质还是个字符串,构造出这个字符串发送出去就行了,所以最核心的还是调用tcp_transmit_skb函数,整个函数代码如下:
/* This routine sends an ack and also updates the window. */
void tcp_send_ack(struct sock *sk)
{
struct sk_buff *buff;
/* If we have been reset, we may not send again. */
/* 如果当前的套接字已经被关闭了,那么直接返回。 */
if (sk->sk_state == TCP_CLOSE)
return;
/* 拥塞避免 */
tcp_ca_event(sk, CA_EVENT_NON_DELAYED_ACK);
/* We are not putting this on the write queue, so
* tcp_transmit_skb() will set the ownership to this
* sock.
*/
buff = alloc_skb(MAX_TCP_HEADER,
sk_gfp_mask(sk, GFP_ATOMIC | __GFP_NOWARN));
if (unlikely(!buff)) {
inet_csk_schedule_ack(sk);
inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK,
TCP_DELACK_MAX, TCP_RTO_MAX);
return;
}
/* Reserve space for headers and prepare control bits. */
/* 初始化 ACK 包 */
skb_reserve(buff, MAX_TCP_HEADER);
tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
/* We do not want pure acks influencing TCP Small Queues or fq/pacing
* too much.
* SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
* We also avoid tcp_wfree() overhead (cache line miss accessing
* tp->tsq_flags) by using regular sock_wfree()
*/
skb_set_tcp_pure_ack(buff);
/* Send it off, this clears delayed acks for us. */
/* 添加时间戳并发送 ACK 包 */
skb_mstamp_get(&buff->skb_mstamp);
//还是从这里构造和发送ack包,老演员了!
tcp_transmit_skb(sk, buff, 0, (__force gfp_t)0);
}
EXPORT_SYMBOL_GPL(tcp_send_ack);
总结:
1、网卡只负责简单粗暴地收发数据(说白了就是字符串),协议什么的需要操作系统考虑,网卡这种硬件是不care的!
2、socket、sock、sk_buff、tcphdr等结构体存在的最终目的都是为了构造协议不同层级的数据包(说白了就是不同的字符串,为了方便理解和维护、避免眉毛胡子一把抓的毛病,把字符串的不同位置抽象成了不同的属性或标识);所以不同操作系统肯定有不同的结构体和方法来生成和解析数据包,只要保证发出去的字符串符合协议规定的格式就行了!
3、逻辑层面所谓的建立连接:双方通过SYN和ACK确定要互相通信后,会分配skb来存储收发的数据!DDOS攻击的一种就是想办法让server不停地分配skb来接受即将到来的数据!然而server的内存是有限的,分配了大量的skb最终会导致内存被耗尽!
参考:
1、https://network.51cto.com/article/648928.html?mobile tcp三次握手之connect
2、https://www.leviathan.vip/2018/08/09/Linux%E5%86%85%E6%A0%B8%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90-TCP%E5%8D%8F%E8%AE%AE-1/ tcp协议分析
3、http://45.76.5.96/opensource/tcp/tcp.pdf linux tcp源码分析
在线
弄挺好的,页面代码都进来了,有说有图有对应最好帮人理解了
离线
页次: 1