linux/net
Alexei Starovoitov 622582786c net: filter: x86: internal BPF JIT
Maps all internal BPF instructions into x86_64 instructions.
This patch replaces original BPF x64 JIT with internal BPF x64 JIT.
sysctl net.core.bpf_jit_enable is reused as on/off switch.

Performance:

1. old BPF JIT and internal BPF JIT generate equivalent x86_64 code.
  No performance difference is observed for filters that were JIT-able before

Example assembler code for BPF filter "tcpdump port 22"

original BPF -> old JIT:            original BPF -> internal BPF -> new JIT:
   0:   push   %rbp                      0:     push   %rbp
   1:   mov    %rsp,%rbp                 1:     mov    %rsp,%rbp
   4:   sub    $0x60,%rsp                4:     sub    $0x228,%rsp
   8:   mov    %rbx,-0x8(%rbp)           b:     mov    %rbx,-0x228(%rbp) // prologue
                                        12:     mov    %r13,-0x220(%rbp)
                                        19:     mov    %r14,-0x218(%rbp)
                                        20:     mov    %r15,-0x210(%rbp)
                                        27:     xor    %eax,%eax         // clear A
   c:   xor    %ebx,%ebx                29:     xor    %r13,%r13         // clear X
   e:   mov    0x68(%rdi),%r9d          2c:     mov    0x68(%rdi),%r9d
  12:   sub    0x6c(%rdi),%r9d          30:     sub    0x6c(%rdi),%r9d
  16:   mov    0xd8(%rdi),%r8           34:     mov    0xd8(%rdi),%r10
                                        3b:     mov    %rdi,%rbx
  1d:   mov    $0xc,%esi                3e:     mov    $0xc,%esi
  22:   callq  0xffffffffe1021e15       43:     callq  0xffffffffe102bd75
  27:   cmp    $0x86dd,%eax             48:     cmp    $0x86dd,%rax
  2c:   jne    0x0000000000000069       4f:     jne    0x000000000000009a
  2e:   mov    $0x14,%esi               51:     mov    $0x14,%esi
  33:   callq  0xffffffffe1021e31       56:     callq  0xffffffffe102bd91
  38:   cmp    $0x84,%eax               5b:     cmp    $0x84,%rax
  3d:   je     0x0000000000000049       62:     je     0x0000000000000074
  3f:   cmp    $0x6,%eax                64:     cmp    $0x6,%rax
  42:   je     0x0000000000000049       68:     je     0x0000000000000074
  44:   cmp    $0x11,%eax               6a:     cmp    $0x11,%rax
  47:   jne    0x00000000000000c6       6e:     jne    0x0000000000000117
  49:   mov    $0x36,%esi               74:     mov    $0x36,%esi
  4e:   callq  0xffffffffe1021e15       79:     callq  0xffffffffe102bd75
  53:   cmp    $0x16,%eax               7e:     cmp    $0x16,%rax
  56:   je     0x00000000000000bf       82:     je     0x0000000000000110
  58:   mov    $0x38,%esi               88:     mov    $0x38,%esi
  5d:   callq  0xffffffffe1021e15       8d:     callq  0xffffffffe102bd75
  62:   cmp    $0x16,%eax               92:     cmp    $0x16,%rax
  65:   je     0x00000000000000bf       96:     je     0x0000000000000110
  67:   jmp    0x00000000000000c6       98:     jmp    0x0000000000000117
  69:   cmp    $0x800,%eax              9a:     cmp    $0x800,%rax
  6e:   jne    0x00000000000000c6       a1:     jne    0x0000000000000117
  70:   mov    $0x17,%esi               a3:     mov    $0x17,%esi
  75:   callq  0xffffffffe1021e31       a8:     callq  0xffffffffe102bd91
  7a:   cmp    $0x84,%eax               ad:     cmp    $0x84,%rax
  7f:   je     0x000000000000008b       b4:     je     0x00000000000000c2
  81:   cmp    $0x6,%eax                b6:     cmp    $0x6,%rax
  84:   je     0x000000000000008b       ba:     je     0x00000000000000c2
  86:   cmp    $0x11,%eax               bc:     cmp    $0x11,%rax
  89:   jne    0x00000000000000c6       c0:     jne    0x0000000000000117
  8b:   mov    $0x14,%esi               c2:     mov    $0x14,%esi
  90:   callq  0xffffffffe1021e15       c7:     callq  0xffffffffe102bd75
  95:   test   $0x1fff,%ax              cc:     test   $0x1fff,%rax
  99:   jne    0x00000000000000c6       d3:     jne    0x0000000000000117
                                        d5:     mov    %rax,%r14
  9b:   mov    $0xe,%esi                d8:     mov    $0xe,%esi
  a0:   callq  0xffffffffe1021e44       dd:     callq  0xffffffffe102bd91 // MSH
                                        e2:     and    $0xf,%eax
                                        e5:     shl    $0x2,%eax
                                        e8:     mov    %rax,%r13
                                        eb:     mov    %r14,%rax
                                        ee:     mov    %r13,%rsi
  a5:   lea    0xe(%rbx),%esi           f1:     add    $0xe,%esi
  a8:   callq  0xffffffffe1021e0d       f4:     callq  0xffffffffe102bd6d
  ad:   cmp    $0x16,%eax               f9:     cmp    $0x16,%rax
  b0:   je     0x00000000000000bf       fd:     je     0x0000000000000110
                                        ff:     mov    %r13,%rsi
  b2:   lea    0x10(%rbx),%esi         102:     add    $0x10,%esi
  b5:   callq  0xffffffffe1021e0d      105:     callq  0xffffffffe102bd6d
  ba:   cmp    $0x16,%eax              10a:     cmp    $0x16,%rax
  bd:   jne    0x00000000000000c6      10e:     jne    0x0000000000000117
  bf:   mov    $0xffff,%eax            110:     mov    $0xffff,%eax
  c4:   jmp    0x00000000000000c8      115:     jmp    0x000000000000011c
  c6:   xor    %eax,%eax               117:     mov    $0x0,%eax
  c8:   mov    -0x8(%rbp),%rbx         11c:     mov    -0x228(%rbp),%rbx // epilogue
  cc:   leaveq                         123:     mov    -0x220(%rbp),%r13
  cd:   retq                           12a:     mov    -0x218(%rbp),%r14
                                       131:     mov    -0x210(%rbp),%r15
                                       138:     leaveq
                                       139:     retq

On fully cached SKBs both JITed functions take 12 nsec to execute.
BPF interpreter executes the program in 30 nsec.

The difference in generated assembler is due to the following:

Old BPF imlements LDX_MSH instruction via sk_load_byte_msh() helper function
inside bpf_jit.S.
New JIT removes the helper and does it explicitly, so ldx_msh cost
is the same for both JITs, but generated code looks longer.

New JIT has 4 registers to save, so prologue/epilogue are larger,
but the cost is within noise on x64.

Old JIT checks whether first insn clears A and if not emits 'xor %eax,%eax'.
New JIT clears %rax unconditionally.

2. old BPF JIT doesn't support ANC_NLATTR, ANC_PAY_OFFSET, ANC_RANDOM
  extensions. New JIT supports all BPF extensions.
  Performance of such filters improves 2-4 times depending on a filter.
  The longer the filter the higher performance gain.
  Synthetic benchmarks with many ancillary loads see 20x speedup
  which seems to be the maximum gain from JIT

Notes:

. net.core.bpf_jit_enable=2 + tools/net/bpf_jit_disasm is still functional
  and can be used to see generated assembler

. there are two jit_compile() functions and code flow for classic filters is:
  sk_attach_filter() - load classic BPF
  bpf_jit_compile() - try to JIT from classic BPF
  sk_convert_filter() - convert classic to internal
  bpf_int_jit_compile() - JIT from internal BPF

  seccomp and tracing filters will just call bpf_int_jit_compile()

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
2014-05-15 16:31:30 -04:00
..
9p A bunch of updates and cleanup within the transport layer, 2014-04-11 14:14:57 -07:00
802 neigh: use NEIGH_VAR_INIT in ndo_neigh_setup functions. 2014-01-16 11:31:58 -08:00
8021q vlan: rename __vlan_find_dev_deep() to __vlan_find_dev_deep_rcu() 2014-05-12 14:39:13 -04:00
appletalk appletalk: fix checkpatch error with indent 2014-02-14 16:18:32 -05:00
atm net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
ax25 net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
batman-adv net: get rid of SET_ETHTOOL_OPS 2014-05-13 17:43:20 -04:00
bluetooth Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-05-12 13:19:14 -04:00
bridge net: get rid of SET_ETHTOOL_OPS 2014-05-13 17:43:20 -04:00
caif net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
can net: Use netlink_ns_capable to verify the permisions of netlink messages 2014-04-24 13:44:54 -04:00
ceph Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/sage/ceph-client 2014-05-05 15:17:02 -07:00
core net: filter: x86: internal BPF JIT 2014-05-15 16:31:30 -04:00
dcb net: Use netlink_ns_capable to verify the permisions of netlink messages 2014-04-24 13:44:54 -04:00
dccp dccp: make the request_retries minimum is 1 2014-05-14 15:34:16 -04:00
decnet net: Use netlink_ns_capable to verify the permisions of netlink messages 2014-04-24 13:44:54 -04:00
dns_resolver net/*: Fix FSF address in file headers 2013-12-06 12:37:57 -05:00
dsa net: get rid of SET_ETHTOOL_OPS 2014-05-13 17:43:20 -04:00
ethernet net: eth_type_trans() should use skb_header_pointer() 2014-01-16 15:30:31 -08:00
hsr hsr: replace del_timer by del_timer_sync 2014-03-27 15:28:06 -04:00
ieee802154 ieee802154: fix dgram socket sendmsg() 2014-05-15 15:51:43 -04:00
ipv4 net: Use a more standard macro for INET_ADDR_COOKIE 2014-05-14 16:07:23 -04:00
ipv6 snmp: fix some left over of snmp stats 2014-05-14 15:33:47 -04:00
ipx ipx: implement shutdown() 2014-02-12 19:26:32 -05:00
irda net: add build-time checks for msg->msg_name size 2014-01-18 23:04:16 -08:00
iucv net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
key net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
l2tp net: rename local_df to ignore_df 2014-05-12 14:03:41 -04:00
lapb net/lapb: re-send packets on timeout 2013-09-23 16:52:45 -04:00
llc llc: remove noisy WARN from llc_mac_hdr_init 2014-01-28 18:01:32 -08:00
mac80211 Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-05-12 13:19:14 -04:00
mac802154 mac802154: make mac802154_wpan_open static 2014-05-15 15:51:43 -04:00
mpls ipip: add GSO/TSO support 2013-10-19 19:36:19 -04:00
netfilter net: rename local_df to ignore_df 2014-05-12 14:03:41 -04:00
netlabel netlabel: Fix FSF address in file headers 2013-12-06 12:37:56 -05:00
netlink Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-05-12 13:19:14 -04:00
netrom net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
nfc net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
openvswitch net: get rid of SET_ETHTOOL_OPS 2014-05-13 17:43:20 -04:00
packet net: Use netlink_ns_capable to verify the permisions of netlink messages 2014-04-24 13:44:54 -04:00
phonet net: Use netlink_ns_capable to verify the permisions of netlink messages 2014-04-24 13:44:54 -04:00
rds rds: remove the unneed NULL checking 2014-05-09 15:59:45 -04:00
rfkill net: rfkill: move poll work to power efficient workqueue 2014-02-04 21:58:16 +01:00
rose net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
rxrpc net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
sched sch_hhf: fix comparison of qlen and limit 2014-05-12 14:55:21 -04:00
sctp snmp: fix some left over of snmp stats 2014-05-14 15:33:47 -04:00
sunrpc Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-04-12 17:31:22 -07:00
tipc tipc: merge port message reception into socket reception function 2014-05-14 15:19:48 -04:00
unix net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
vmw_vsock vsock: Make transport the proto owner 2014-05-05 13:13:50 -04:00
wimax wimax: remove dead code 2013-11-21 13:09:42 -05:00
wireless Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-05-12 13:19:14 -04:00
x25 net: Fix use after free by removing length arg from sk_data_ready callbacks. 2014-04-11 16:15:36 -04:00
xfrm Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net 2014-05-12 13:19:14 -04:00
compat.c net/compat: convert to COMPAT_SYSCALL_DEFINE with changing parameter types 2014-03-06 16:30:45 +01:00
Kconfig Merge branch 'for-3.15' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup 2014-04-03 13:05:42 -07:00
Makefile net: move 6lowpan compression code to separate module 2014-01-15 15:36:38 -08:00
nonet.c
socket.c net: use SYSCALL_DEFINEx for sys_recv 2014-04-16 15:15:05 -04:00
sysctl_net.c net: Update the sysctl permissions handler to test effective uid/gid 2013-10-07 15:57:56 -04:00