隨手記之Linux 2.6.32內核SYN flooding警告信息
前言
新申請的服務器內核為2.6.32,原先的TCP Server直接在新內核的Linxu服務器上運行,運行dmesg命令,可以看到大量的SYN flooding警告:
possible SYN flooding on port 8080. Sending cookies.
原先的2.6.18內核的參數在2.6.32內核版本情況下,簡單調整"net.ipv4.tcp_max_syn_backlog"已經沒有作用。
怎么辦,只能再次閱讀2.6.32源碼,以下即是。
最后小結處有直接結論,心急的你可以直接閱讀總結好了。
linux內核2.6.32有關backlog值分析
net/Socket.c:
SYSCALL_DEFINE2(listen, int, fd, int, backlog)
{
struct socket *sock;
int err, fput_needed;
int somaxconn;
sock = sockfd_lookup_light(fd, &err, &fput_needed);
if (sock) {
somaxconn = sock_net(sock->sk)->core.sysctl_somaxconn;
if ((unsigned)backlog > somaxconn)
backlog = somaxconn;
err = security_socket_listen(sock, backlog);
if (!err)
err = sock->ops->listen(sock, backlog);
fput_light(sock->file, fput_needed);
}
return err;
}
net/ipv4/Af_inet.c:
/*
* Move a socket into listening state.
*/
int inet_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
unsigned char old_state;
int err;
lock_sock(sk);
err = -EINVAL;
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
goto out;
old_state = sk->sk_state;
if (!((1 << old_state) & (TCPF_CLOSE | TCPF_LISTEN)))
goto out;
/* Really, if the socket is already in listen state
* we can only allow the backlog to be adjusted.
*/
if (old_state != TCP_LISTEN) {
err = inet_csk_listen_start(sk, backlog);
if (err)
goto out;
}
sk->sk_max_ack_backlog = backlog;
err = 0;
out:
release_sock(sk);
return err;
}
inet_listen調用inet_csk_listen_start函數,所傳入的backlog參數改頭換面,變成了不可修改的常量nr_table_entries了。
net/ipv4/Inet_connection_sock.c:
int inet_csk_listen_start(struct sock *sk, const int nr_table_entries)
{
struct inet_sock *inet = inet_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries);
if (rc != 0)
return rc;
sk->sk_max_ack_backlog = 0;
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
* after validation is complete.
*/
sk->sk_state = TCP_LISTEN;
if (!sk->sk_prot->get_port(sk, inet->num)) {
inet->sport = htons(inet->num);
sk_dst_reset(sk);
sk->sk_prot->hash(sk);
return 0;
}
sk->sk_state = TCP_CLOSE;
__reqsk_queue_destroy(&icsk->icsk_accept_queue);
return -EADDRINUSE;
}
下面處理的是TCP SYN_RECV狀態的連接,處于握手階段,也可以說是半連接時,等待著連接方第三次握手。
/*
* Maximum number of SYN_RECV sockets in queue per LISTEN socket.
* One SYN_RECV socket costs about 80bytes on a 32bit machine.
* It would be better to replace it with a global counter for all sockets
* but then some measure against one socket starving all other sockets
* would be needed.
*
* It was 128 by default. Experiments with real servers show, that
* it is absolutely not enough even at 100conn/sec. 256 cures most
* of problems. This value is adjusted to 128 for very small machines
* (<=32Mb of memory) and to 1024 on normal or better ones (>=256Mb).
* Note : Dont forget somaxconn that may limit backlog too.
*/
int reqsk_queue_alloc(struct request_sock_queue *queue,
unsigned int nr_table_entries)
{
size_t lopt_size = sizeof(struct listen_sock);
struct listen_sock *lopt;
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
nr_table_entries = max_t(u32, nr_table_entries, 8);
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
lopt_size += nr_table_entries * sizeof(struct request_sock *);
if (lopt_size > PAGE_SIZE)
lopt = __vmalloc(lopt_size,
GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO,
PAGE_KERNEL);
else
lopt = kzalloc(lopt_size, GFP_KERNEL);
if (lopt == NULL)
return -ENOMEM;
for (lopt->max_qlen_log = 3;
(1 << lopt->max_qlen_log) < nr_table_entries;
lopt->max_qlen_log++);
get_random_bytes(&lopt->hash_rnd, sizeof(lopt->hash_rnd));
rwlock_init(&queue->syn_wait_lock);
queue->rskq_accept_head = NULL;
lopt->nr_table_entries = nr_table_entries;
write_lock_bh(&queue->syn_wait_lock);
queue->listen_opt = lopt;
write_unlock_bh(&queue->syn_wait_lock);
return 0;
}
關鍵要看nr_table_entries變量,在reqsk_queue_alloc函數中nr_table_entries變成了無符號變量,可修改的,變化受限。
比如實際內核參數值為:
net.ipv4.tcp_max_syn_backlog = 65535
所傳入的backlog(不大于net.core.somaxconn = 65535)為8102,那么
// 取listen函數的backlog和sysctl_max_syn_backlog最小值,結果為8102
nr_table_entries = min_t(u32, nr_table_entries, sysctl_max_syn_backlog);
// 取nr_table_entries和8進行比較的最大值,結果為8102
nr_table_entries = max_t(u32, nr_table_entries, 8);
// 可看做 nr_table_entries*2,結果為8102*2=16204
nr_table_entries = roundup_pow_of_two(nr_table_entries + 1);
計算結果,max_qlen_log = 14
2.6.18內核中max_qlen_log的計算方法
for (lopt->max_qlen_log = 6;
(1 << lopt->max_qlen_log) < sysctl_max_syn_backlog;
lopt->max_qlen_log++);
- 很顯然,sysctl_max_syn_backlog參與了運算,sysctl_max_syn_backlog值很大的話會導致max_qlen_log值相對比也很大
- 若sysctl_max_syn_backlog=65535,那么max_qlen_log=16
- 2.6.18內核中半連接長度為2^16=65536
作為listen_sock結構定義了需要處理的處理半連接的隊列元素個數為nr_table_entries,此例中為16204長度。
/** struct listen_sock - listen state
*
* @max_qlen_log - log_2 of maximal queued SYNs/REQUESTs
*/
struct listen_sock {
u8 max_qlen_log;
/* 3 bytes hole, try to use */
int qlen;
int qlen_young;
int clock_hand;
u32 hash_rnd;
u32 nr_table_entries;
struct request_sock *syn_table[0];
};
經描述而知,2^max_qlen_log = 半連接隊列長度qlen值。
再回頭看看報告SYN flooding的函數:
net/ipv4/Tcp_ipv4.c
#ifdef CONFIG_SYN_COOKIES
static void syn_flood_warning(struct sk_buff *skb)
{
static unsigned long warntime;
if (time_after(jiffies, (warntime + HZ * 60))) {
warntime = jiffies;
printk(KERN_INFO
"possible SYN flooding on port %d. Sending cookies.\n",
ntohs(tcp_hdr(skb)->dest));
}
}
#endif
被調用的處,已精簡若干代碼:
int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
......
#ifdef CONFIG_SYN_COOKIES
int want_cookie = 0;
#else
#define want_cookie 0 /* Argh, why doesn't gcc optimize this :( */
#endif
......
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
// 判斷半連接隊列是否已滿 && !0
if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
#ifdef CONFIG_SYN_COOKIES
if (sysctl_tcp_syncookies) {
want_cookie = 1;
} else
#endif
goto drop;
}
/* Accept backlog is full. If we have already queued enough
* of warm entries in syn queue, drop request. It is better than
* clogging syn queue with openreqs with exponentially increasing
* timeout.
*/
if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
goto drop;
req = inet_reqsk_alloc(&tcp_request_sock_ops);
if (!req)
goto drop;
......
if (!want_cookie)
TCP_ECN_create_request(req, tcp_hdr(skb));
if (want_cookie) {
#ifdef CONFIG_SYN_COOKIES
syn_flood_warning(skb);
req->cookie_ts = tmp_opt.tstamp_ok;
#endif
isn = cookie_v4_init_sequence(sk, skb, &req->mss);
} else if (!isn) {
......
}
......
}
判斷半連接隊列已滿的函數很關鍵,可以看看運算法則:
include/net/Inet_connection_sock.h:
static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
{
return reqsk_queue_is_full(&inet_csk(sk)->icsk_accept_queue);
}
include/net/Rquest_sock.h:
static inline int reqsk_queue_is_full(const struct request_sock_queue *queue)
{
// 向右移位max_qlen_log個單位
return queue->listen_opt->qlen >> queue->listen_opt->max_qlen_log;
}
返回1,自然表示半連接隊列已滿。
以上僅僅是分析了半連接隊列已滿的判斷條件,總之應用程序所傳入的backlog很關鍵,如值太小,很容易得到1.
若 somaxconn = 128,sysctl_max_syn_backlog = 4096,backlog = 511 則最終 nr_table_entries = 256,max_qlen_log = 8。那么超過256個半連接的隊列,257 >> 8 = 1,隊列已滿。
如何設置backlog,還得需要結合具體應用程序,需要為其調用listen方法賦值。
Netty backlog處理
Tcp Server使用Netty 3.7 版本,版本較低,在處理backlog,若我們不手動指定backlog值,JDK 1.6默認為50。
有證如下: java.net.ServerSocket:
public void bind(SocketAddress endpoint, int backlog) throws IOException {
if (isClosed())
throw new SocketException("Socket is closed");
if (!oldImpl && isBound())
throw new SocketException("Already bound");
if (endpoint == null)
endpoint = new InetSocketAddress(0);
if (!(endpoint instanceof InetSocketAddress))
throw new IllegalArgumentException("Unsupported address type");
InetSocketAddress epoint = (InetSocketAddress) endpoint;
if (epoint.isUnresolved())
throw new SocketException("Unresolved address");
if (backlog < 1)
backlog = 50;
try {
SecurityManager security = System.getSecurityManager();
if (security != null)
security.checkListen(epoint.getPort());
getImpl().bind(epoint.getAddress(), epoint.getPort());
getImpl().listen(backlog);
bound = true;
} catch(SecurityException e) {
bound = false;
throw e;
} catch(IOException e) {
bound = false;
throw e;
}
}
netty中,處理backlog的地方:
org/jboss/netty/channel/socket/DefaultServerSocketChannelConfig.java:
@Override
public boolean setOption(String key, Object value) {
if (super.setOption(key, value)) {
return true;
}
if ("receiveBufferSize".equals(key)) {
setReceiveBufferSize(ConversionUtil.toInt(value));
} else if ("reuseAddress".equals(key)) {
setReuseAddress(ConversionUtil.toBoolean(value));
} else if ("backlog".equals(key)) {
setBacklog(ConversionUtil.toInt(value));
} else {
return false;
}
return true;
}
既然需要我們手動指定backlog值,那么可以這樣做:
bootstrap.setOption("backlog", 8102); // 設置大一些沒有關系,系統內核會自動與net.core.somaxconn相比較,取最低值
相對比Netty 4.0,有些不智能,可參考:http://www.aygfsteel.com/yongboy/archive/2014/07/30/416373.html
小結
在linux內核2.6.32,若在沒有遭受到SYN flooding攻擊的情況下,可以適當調整:
sysctl -w net.core.somaxconn=32768
sysctl -w net.ipv4.tcp_max_syn_backlog=65535
sysctl -p
另千萬別忘記修改TCP Server的listen接口所傳入的backlog值,若不設置或者過小,都會有可能造成SYN flooding的警告信息。開始不妨設置成1024,然后觀察一段時間根據實際情況需要再慢慢往上調。
無論你如何設置,最終backlog值范圍為:
backlog <= net.core.somaxconn
半連接隊列長度約為:
半連接隊列長度 ≈ 2 * min(backlog, net.ipv4.tcpmax_syn_backlog)
另,若出現SYN flooding時,此時TCP SYN_RECV數量表示半連接隊列已經滿,可以查看一下:
ss -ant | awk 'NR>1 {++s[$1]} END {for(k in s) print k,s[k]}'
感謝運維書坤小伙提供的比較好用查看命令。
posted on 2014-08-20 20:43 nieyong 閱讀(12454) 評論(3) 編輯 收藏 所屬分類: Socket